/home/andy/git/oilshell/oil/mycpp/gc_mylib.h
Line | Count | Source (jump to first uncovered line) |
1 | | // gc_mylib.h - corresponds to mycpp/mylib.py |
2 | | |
3 | | #ifndef MYCPP_GC_MYLIB_H |
4 | | #define MYCPP_GC_MYLIB_H |
5 | | |
6 | | #include <limits.h> // CHAR_BIT |
7 | | |
8 | | #include "mycpp/gc_alloc.h" // gHeap |
9 | | #include "mycpp/gc_dict.h" // for dict_erase() |
10 | | #include "mycpp/gc_tuple.h" |
11 | | |
12 | | template <class K, class V> |
13 | | class Dict; |
14 | | |
15 | | // https://stackoverflow.com/questions/3919995/determining-sprintf-buffer-size-whats-the-standard/11092994#11092994 |
16 | | // Notes: |
17 | | // - Python 2.7's intobject.c has an erroneous +6 |
18 | | // - This is 13, but len('-2147483648') is 11, which means we only need 12? |
19 | | // - This formula is valid for octal(), because 2^(3 bits) = 8 |
20 | | |
21 | | const int kIntBufSize = CHAR_BIT * sizeof(int) / 3 + 3; |
22 | | |
23 | | namespace mylib { |
24 | | |
25 | | void InitCppOnly(); |
26 | | |
27 | | // Wrappers around our C++ APIs |
28 | | |
29 | 14 | inline void MaybeCollect() { |
30 | 14 | gHeap.MaybeCollect(); |
31 | 14 | } |
32 | | |
33 | | void print_stderr(BigStr* s); |
34 | | |
35 | | // const int kStdout = 1; |
36 | | // const int kStderr = 2; |
37 | | |
38 | | // void writeln(BigStr* s, int fd = kStdout); |
39 | | |
40 | | Tuple2<BigStr*, BigStr*> split_once(BigStr* s, BigStr* delim); |
41 | | |
42 | | template <typename K, typename V> |
43 | 280 | void dict_erase(Dict<K, V>* haystack, K needle) { |
44 | 280 | DCHECK(haystack->obj_header().heap_tag != HeapTag::Global); |
45 | | |
46 | 0 | int pos = haystack->hash_and_probe(needle); |
47 | 280 | if (pos == kTooSmall) { |
48 | 0 | return; |
49 | 0 | } |
50 | 280 | DCHECK(pos >= 0); |
51 | 0 | int kv_index = haystack->index_->items_[pos]; |
52 | 280 | if (kv_index < 0) { |
53 | 2 | return; |
54 | 2 | } |
55 | | |
56 | 278 | int last_kv_index = haystack->len_ - 1; |
57 | 278 | DCHECK(kv_index <= last_kv_index); |
58 | | |
59 | | // Swap the target entry with the most recently inserted one before removing |
60 | | // it. This has two benefits. |
61 | | // (1) It keeps the entry arrays compact. All valid entries occupy a |
62 | | // contiguous region in memory. |
63 | | // (2) It prevents holes in the entry arrays. This makes iterating over |
64 | | // entries (e.g. in keys() or DictIter()) trivial and doesn't require |
65 | | // any extra validity state (like a bitset of unusable slots). This is |
66 | | // important because keys and values wont't always be pointers, so we |
67 | | // can't rely on NULL checks for validity. We also can't wrap the slab |
68 | | // entry types in some other type without modifying the garbage |
69 | | // collector to trace through unmanaged types (or paying the extra |
70 | | // allocations for the outer type). |
71 | 278 | if (kv_index != last_kv_index) { |
72 | 142 | K last_key = haystack->keys_->items_[last_kv_index]; |
73 | 142 | V last_val = haystack->values_->items_[last_kv_index]; |
74 | 142 | int last_pos = haystack->hash_and_probe(last_key); |
75 | 142 | DCHECK(last_pos != kNotFound); |
76 | 0 | haystack->keys_->items_[kv_index] = last_key; |
77 | 142 | haystack->values_->items_[kv_index] = last_val; |
78 | 142 | haystack->index_->items_[last_pos] = kv_index; |
79 | 142 | } |
80 | | |
81 | | // Zero out for GC. These could be nullptr or 0 |
82 | 0 | haystack->keys_->items_[last_kv_index] = 0; |
83 | 278 | haystack->values_->items_[last_kv_index] = 0; |
84 | 278 | haystack->index_->items_[pos] = kDeletedEntry; |
85 | 278 | haystack->len_--; |
86 | 278 | DCHECK(haystack->len_ < haystack->capacity_); |
87 | 278 | } _ZN5mylib10dict_eraseIP6BigStriEEvP4DictIT_T0_ES4_ Line | Count | Source | 43 | 2 | void dict_erase(Dict<K, V>* haystack, K needle) { | 44 | 2 | DCHECK(haystack->obj_header().heap_tag != HeapTag::Global); | 45 | | | 46 | 0 | int pos = haystack->hash_and_probe(needle); | 47 | 2 | if (pos == kTooSmall) { | 48 | 0 | return; | 49 | 0 | } | 50 | 2 | DCHECK(pos >= 0); | 51 | 0 | int kv_index = haystack->index_->items_[pos]; | 52 | 2 | if (kv_index < 0) { | 53 | 0 | return; | 54 | 0 | } | 55 | | | 56 | 2 | int last_kv_index = haystack->len_ - 1; | 57 | 2 | DCHECK(kv_index <= last_kv_index); | 58 | | | 59 | | // Swap the target entry with the most recently inserted one before removing | 60 | | // it. This has two benefits. | 61 | | // (1) It keeps the entry arrays compact. All valid entries occupy a | 62 | | // contiguous region in memory. | 63 | | // (2) It prevents holes in the entry arrays. This makes iterating over | 64 | | // entries (e.g. in keys() or DictIter()) trivial and doesn't require | 65 | | // any extra validity state (like a bitset of unusable slots). This is | 66 | | // important because keys and values wont't always be pointers, so we | 67 | | // can't rely on NULL checks for validity. We also can't wrap the slab | 68 | | // entry types in some other type without modifying the garbage | 69 | | // collector to trace through unmanaged types (or paying the extra | 70 | | // allocations for the outer type). | 71 | 2 | if (kv_index != last_kv_index) { | 72 | 0 | K last_key = haystack->keys_->items_[last_kv_index]; | 73 | 0 | V last_val = haystack->values_->items_[last_kv_index]; | 74 | 0 | int last_pos = haystack->hash_and_probe(last_key); | 75 | 0 | DCHECK(last_pos != kNotFound); | 76 | 0 | haystack->keys_->items_[kv_index] = last_key; | 77 | 0 | haystack->values_->items_[kv_index] = last_val; | 78 | 0 | haystack->index_->items_[last_pos] = kv_index; | 79 | 0 | } | 80 | | | 81 | | // Zero out for GC. These could be nullptr or 0 | 82 | 0 | haystack->keys_->items_[last_kv_index] = 0; | 83 | 2 | haystack->values_->items_[last_kv_index] = 0; | 84 | 2 | haystack->index_->items_[pos] = kDeletedEntry; | 85 | 2 | haystack->len_--; | 86 | 2 | DCHECK(haystack->len_ < haystack->capacity_); | 87 | 2 | } |
_ZN5mylib10dict_eraseIP6BigStrS2_EEvP4DictIT_T0_ES4_ Line | Count | Source | 43 | 2 | void dict_erase(Dict<K, V>* haystack, K needle) { | 44 | 2 | DCHECK(haystack->obj_header().heap_tag != HeapTag::Global); | 45 | | | 46 | 0 | int pos = haystack->hash_and_probe(needle); | 47 | 2 | if (pos == kTooSmall) { | 48 | 0 | return; | 49 | 0 | } | 50 | 2 | DCHECK(pos >= 0); | 51 | 0 | int kv_index = haystack->index_->items_[pos]; | 52 | 2 | if (kv_index < 0) { | 53 | 0 | return; | 54 | 0 | } | 55 | | | 56 | 2 | int last_kv_index = haystack->len_ - 1; | 57 | 2 | DCHECK(kv_index <= last_kv_index); | 58 | | | 59 | | // Swap the target entry with the most recently inserted one before removing | 60 | | // it. This has two benefits. | 61 | | // (1) It keeps the entry arrays compact. All valid entries occupy a | 62 | | // contiguous region in memory. | 63 | | // (2) It prevents holes in the entry arrays. This makes iterating over | 64 | | // entries (e.g. in keys() or DictIter()) trivial and doesn't require | 65 | | // any extra validity state (like a bitset of unusable slots). This is | 66 | | // important because keys and values wont't always be pointers, so we | 67 | | // can't rely on NULL checks for validity. We also can't wrap the slab | 68 | | // entry types in some other type without modifying the garbage | 69 | | // collector to trace through unmanaged types (or paying the extra | 70 | | // allocations for the outer type). | 71 | 2 | if (kv_index != last_kv_index) { | 72 | 0 | K last_key = haystack->keys_->items_[last_kv_index]; | 73 | 0 | V last_val = haystack->values_->items_[last_kv_index]; | 74 | 0 | int last_pos = haystack->hash_and_probe(last_key); | 75 | 0 | DCHECK(last_pos != kNotFound); | 76 | 0 | haystack->keys_->items_[kv_index] = last_key; | 77 | 0 | haystack->values_->items_[kv_index] = last_val; | 78 | 0 | haystack->index_->items_[last_pos] = kv_index; | 79 | 0 | } | 80 | | | 81 | | // Zero out for GC. These could be nullptr or 0 | 82 | 0 | haystack->keys_->items_[last_kv_index] = 0; | 83 | 2 | haystack->values_->items_[last_kv_index] = 0; | 84 | 2 | haystack->index_->items_[pos] = kDeletedEntry; | 85 | 2 | haystack->len_--; | 86 | 2 | DCHECK(haystack->len_ < haystack->capacity_); | 87 | 2 | } |
_ZN5mylib10dict_eraseIiiEEvP4DictIT_T0_ES2_ Line | Count | Source | 43 | 274 | void dict_erase(Dict<K, V>* haystack, K needle) { | 44 | 274 | DCHECK(haystack->obj_header().heap_tag != HeapTag::Global); | 45 | | | 46 | 0 | int pos = haystack->hash_and_probe(needle); | 47 | 274 | if (pos == kTooSmall) { | 48 | 0 | return; | 49 | 0 | } | 50 | 274 | DCHECK(pos >= 0); | 51 | 0 | int kv_index = haystack->index_->items_[pos]; | 52 | 274 | if (kv_index < 0) { | 53 | 0 | return; | 54 | 0 | } | 55 | | | 56 | 274 | int last_kv_index = haystack->len_ - 1; | 57 | 274 | DCHECK(kv_index <= last_kv_index); | 58 | | | 59 | | // Swap the target entry with the most recently inserted one before removing | 60 | | // it. This has two benefits. | 61 | | // (1) It keeps the entry arrays compact. All valid entries occupy a | 62 | | // contiguous region in memory. | 63 | | // (2) It prevents holes in the entry arrays. This makes iterating over | 64 | | // entries (e.g. in keys() or DictIter()) trivial and doesn't require | 65 | | // any extra validity state (like a bitset of unusable slots). This is | 66 | | // important because keys and values wont't always be pointers, so we | 67 | | // can't rely on NULL checks for validity. We also can't wrap the slab | 68 | | // entry types in some other type without modifying the garbage | 69 | | // collector to trace through unmanaged types (or paying the extra | 70 | | // allocations for the outer type). | 71 | 274 | if (kv_index != last_kv_index) { | 72 | 142 | K last_key = haystack->keys_->items_[last_kv_index]; | 73 | 142 | V last_val = haystack->values_->items_[last_kv_index]; | 74 | 142 | int last_pos = haystack->hash_and_probe(last_key); | 75 | 142 | DCHECK(last_pos != kNotFound); | 76 | 0 | haystack->keys_->items_[kv_index] = last_key; | 77 | 142 | haystack->values_->items_[kv_index] = last_val; | 78 | 142 | haystack->index_->items_[last_pos] = kv_index; | 79 | 142 | } | 80 | | | 81 | | // Zero out for GC. These could be nullptr or 0 | 82 | 0 | haystack->keys_->items_[last_kv_index] = 0; | 83 | 274 | haystack->values_->items_[last_kv_index] = 0; | 84 | 274 | haystack->index_->items_[pos] = kDeletedEntry; | 85 | 274 | haystack->len_--; | 86 | 274 | DCHECK(haystack->len_ < haystack->capacity_); | 87 | 274 | } |
_ZN5mylib10dict_eraseIiP6BigStrEEvP4DictIT_T0_ES4_ Line | Count | Source | 43 | 2 | void dict_erase(Dict<K, V>* haystack, K needle) { | 44 | 2 | DCHECK(haystack->obj_header().heap_tag != HeapTag::Global); | 45 | | | 46 | 0 | int pos = haystack->hash_and_probe(needle); | 47 | 2 | if (pos == kTooSmall) { | 48 | 0 | return; | 49 | 0 | } | 50 | 2 | DCHECK(pos >= 0); | 51 | 0 | int kv_index = haystack->index_->items_[pos]; | 52 | 2 | if (kv_index < 0) { | 53 | 2 | return; | 54 | 2 | } | 55 | | | 56 | 0 | int last_kv_index = haystack->len_ - 1; | 57 | 0 | DCHECK(kv_index <= last_kv_index); | 58 | | | 59 | | // Swap the target entry with the most recently inserted one before removing | 60 | | // it. This has two benefits. | 61 | | // (1) It keeps the entry arrays compact. All valid entries occupy a | 62 | | // contiguous region in memory. | 63 | | // (2) It prevents holes in the entry arrays. This makes iterating over | 64 | | // entries (e.g. in keys() or DictIter()) trivial and doesn't require | 65 | | // any extra validity state (like a bitset of unusable slots). This is | 66 | | // important because keys and values wont't always be pointers, so we | 67 | | // can't rely on NULL checks for validity. We also can't wrap the slab | 68 | | // entry types in some other type without modifying the garbage | 69 | | // collector to trace through unmanaged types (or paying the extra | 70 | | // allocations for the outer type). | 71 | 0 | if (kv_index != last_kv_index) { | 72 | 0 | K last_key = haystack->keys_->items_[last_kv_index]; | 73 | 0 | V last_val = haystack->values_->items_[last_kv_index]; | 74 | 0 | int last_pos = haystack->hash_and_probe(last_key); | 75 | 0 | DCHECK(last_pos != kNotFound); | 76 | 0 | haystack->keys_->items_[kv_index] = last_key; | 77 | 0 | haystack->values_->items_[kv_index] = last_val; | 78 | 0 | haystack->index_->items_[last_pos] = kv_index; | 79 | 0 | } | 80 | | | 81 | | // Zero out for GC. These could be nullptr or 0 | 82 | 0 | haystack->keys_->items_[last_kv_index] = 0; | 83 | 0 | haystack->values_->items_[last_kv_index] = 0; | 84 | 0 | haystack->index_->items_[pos] = kDeletedEntry; | 85 | 0 | haystack->len_--; | 86 | 0 | DCHECK(haystack->len_ < haystack->capacity_); | 87 | 0 | } |
|
88 | | |
89 | | // NOTE: Can use OverAllocatedStr for all of these, rather than copying |
90 | | |
91 | 4 | inline BigStr* hex_lower(int i) { |
92 | 4 | char buf[kIntBufSize]; |
93 | 4 | int len = snprintf(buf, kIntBufSize, "%x", i); |
94 | 4 | return ::StrFromC(buf, len); |
95 | 4 | } |
96 | | |
97 | 4 | inline BigStr* hex_upper(int i) { |
98 | 4 | char buf[kIntBufSize]; |
99 | 4 | int len = snprintf(buf, kIntBufSize, "%X", i); |
100 | 4 | return ::StrFromC(buf, len); |
101 | 4 | } |
102 | | |
103 | 4 | inline BigStr* octal(int i) { |
104 | 4 | char buf[kIntBufSize]; |
105 | 4 | int len = snprintf(buf, kIntBufSize, "%o", i); |
106 | 4 | return ::StrFromC(buf, len); |
107 | 4 | } |
108 | | |
109 | | class LineReader { |
110 | | public: |
111 | | // Abstract type with no fields: unknown size |
112 | 14 | LineReader() { |
113 | 14 | } |
114 | | virtual BigStr* readline() = 0; |
115 | | virtual bool isatty() = 0; |
116 | | virtual void close() = 0; |
117 | | |
118 | 0 | static constexpr ObjHeader obj_header() { |
119 | 0 | return ObjHeader::ClassFixed(field_mask(), sizeof(LineReader)); |
120 | 0 | } |
121 | | |
122 | 14 | static constexpr uint32_t field_mask() { |
123 | 14 | return kZeroMask; |
124 | 14 | } |
125 | | }; |
126 | | |
127 | | class BufLineReader : public LineReader { |
128 | | public: |
129 | 6 | explicit BufLineReader(BigStr* s) : LineReader(), s_(s), pos_(0) { |
130 | 6 | } |
131 | | virtual BigStr* readline(); |
132 | 2 | virtual bool isatty() { |
133 | 2 | return false; |
134 | 2 | } |
135 | 2 | virtual void close() { |
136 | 2 | } |
137 | | |
138 | | BigStr* s_; |
139 | | int pos_; |
140 | | |
141 | 6 | static constexpr ObjHeader obj_header() { |
142 | 6 | return ObjHeader::ClassFixed(field_mask(), sizeof(LineReader)); |
143 | 6 | } |
144 | | |
145 | 6 | static constexpr uint32_t field_mask() { |
146 | 6 | return LineReader::field_mask() | maskbit(offsetof(BufLineReader, s_)); |
147 | 6 | } |
148 | | |
149 | | DISALLOW_COPY_AND_ASSIGN(BufLineReader) |
150 | | }; |
151 | | |
152 | | // Wrap a FILE* |
153 | | class CFileLineReader : public LineReader { |
154 | | public: |
155 | 8 | explicit CFileLineReader(FILE* f) : LineReader(), f_(f) { |
156 | 8 | } |
157 | | virtual BigStr* readline(); |
158 | | virtual bool isatty(); |
159 | 2 | void close() { |
160 | 2 | fclose(f_); |
161 | 2 | } |
162 | | |
163 | 8 | static constexpr ObjHeader obj_header() { |
164 | 8 | return ObjHeader::ClassFixed(field_mask(), sizeof(LineReader)); |
165 | 8 | } |
166 | | |
167 | 8 | static constexpr uint32_t field_mask() { |
168 | | // not mutating field_mask because FILE* isn't a GC object |
169 | 8 | return LineReader::field_mask(); |
170 | 8 | } |
171 | | |
172 | | private: |
173 | | FILE* f_; |
174 | | |
175 | | DISALLOW_COPY_AND_ASSIGN(CFileLineReader) |
176 | | }; |
177 | | |
178 | | extern LineReader* gStdin; |
179 | | |
180 | 2 | inline LineReader* Stdin() { |
181 | 2 | if (gStdin == nullptr) { |
182 | 2 | gStdin = Alloc<CFileLineReader>(stdin); |
183 | 2 | } |
184 | 2 | return gStdin; |
185 | 2 | } |
186 | | |
187 | | LineReader* open(BigStr* path); |
188 | | |
189 | | class Writer { |
190 | | public: |
191 | 86 | Writer() { |
192 | 86 | } |
193 | | virtual void write(BigStr* s) = 0; |
194 | | virtual void flush() = 0; |
195 | | virtual bool isatty() = 0; |
196 | | |
197 | 9 | static constexpr ObjHeader obj_header() { |
198 | 9 | return ObjHeader::ClassFixed(field_mask(), sizeof(Writer)); |
199 | 9 | } |
200 | | |
201 | 86 | static constexpr uint32_t field_mask() { |
202 | 86 | return kZeroMask; |
203 | 86 | } |
204 | | }; |
205 | | |
206 | | class MutableStr; |
207 | | |
208 | | class BufWriter : public Writer { |
209 | | public: |
210 | 77 | BufWriter() : Writer(), str_(nullptr), len_(0) { |
211 | 77 | } |
212 | | void write(BigStr* s) override; |
213 | 2 | void flush() override { |
214 | 2 | } |
215 | 2 | bool isatty() override { |
216 | 2 | return false; |
217 | 2 | } |
218 | | // For cStringIO API |
219 | | BigStr* getvalue(); |
220 | | |
221 | 77 | static constexpr ObjHeader obj_header() { |
222 | 77 | return ObjHeader::ClassFixed(field_mask(), sizeof(BufWriter)); |
223 | 77 | } |
224 | | |
225 | 77 | static constexpr unsigned field_mask() { |
226 | | // maskvit_v() because BufWriter has virtual methods |
227 | 77 | return Writer::field_mask() | maskbit(offsetof(BufWriter, str_)); |
228 | 77 | } |
229 | | |
230 | | private: |
231 | | void EnsureCapacity(int n); |
232 | | |
233 | | void Extend(BigStr* s); |
234 | | char* data(); |
235 | | char* end(); |
236 | | int capacity(); |
237 | | |
238 | | MutableStr* str_; |
239 | | int len_; |
240 | | bool is_valid_ = true; // It becomes invalid after getvalue() is called |
241 | | }; |
242 | | |
243 | | // Wrap a FILE* |
244 | | class CFileWriter : public Writer { |
245 | | public: |
246 | 9 | explicit CFileWriter(FILE* f) : Writer(), f_(f) { |
247 | | // not mutating field_mask because FILE* is not a managed pointer |
248 | 9 | } |
249 | | void write(BigStr* s) override; |
250 | | void flush() override; |
251 | | bool isatty() override; |
252 | | |
253 | | private: |
254 | | FILE* f_; |
255 | | |
256 | | DISALLOW_COPY_AND_ASSIGN(CFileWriter) |
257 | | }; |
258 | | |
259 | | extern Writer* gStdout; |
260 | | |
261 | 13 | inline Writer* Stdout() { |
262 | 13 | if (gStdout == nullptr) { |
263 | 5 | gStdout = Alloc<CFileWriter>(stdout); |
264 | 5 | gHeap.RootGlobalVar(gStdout); |
265 | 5 | } |
266 | 13 | return gStdout; |
267 | 13 | } |
268 | | |
269 | | extern Writer* gStderr; |
270 | | |
271 | 2 | inline Writer* Stderr() { |
272 | 2 | if (gStderr == nullptr) { |
273 | 2 | gStderr = Alloc<CFileWriter>(stderr); |
274 | 2 | gHeap.RootGlobalVar(gStderr); |
275 | 2 | } |
276 | 2 | return gStderr; |
277 | 2 | } |
278 | | |
279 | | class UniqueObjects { |
280 | | // Can't be expressed in typed Python because we don't have uint64_t for |
281 | | // addresses |
282 | | |
283 | | public: |
284 | 0 | UniqueObjects() { |
285 | 0 | } |
286 | 0 | void Add(void* obj) { |
287 | 0 | } |
288 | 0 | int Get(void* obj) { |
289 | 0 | return -1; |
290 | 0 | } |
291 | | |
292 | 0 | static constexpr ObjHeader obj_header() { |
293 | 0 | return ObjHeader::ClassFixed(field_mask(), sizeof(UniqueObjects)); |
294 | 0 | } |
295 | | |
296 | | // SPECIAL CASE? We should never have a unique reference to an object? So |
297 | | // don't bother tracing |
298 | 0 | static constexpr uint32_t field_mask() { |
299 | 0 | return kZeroMask; |
300 | 0 | } |
301 | | |
302 | | private: |
303 | | // address -> small integer ID |
304 | | Dict<void*, int> addresses_; |
305 | | }; |
306 | | |
307 | | } // namespace mylib |
308 | | |
309 | | #endif // MYCPP_GC_MYLIB_H |