cpp

Coverage Report

Created: 2024-08-25 11:48

/home/andy/git/oilshell/oil/mycpp/gc_mylib.h
Line
Count
Source (jump to first uncovered line)
1
// gc_mylib.h - corresponds to mycpp/mylib.py
2
3
#ifndef MYCPP_GC_MYLIB_H
4
#define MYCPP_GC_MYLIB_H
5
6
#include <limits.h>  // CHAR_BIT
7
8
#include "mycpp/gc_alloc.h"  // gHeap
9
#include "mycpp/gc_dict.h"   // for dict_erase()
10
#include "mycpp/gc_mops.h"
11
#include "mycpp/gc_tuple.h"
12
13
template <class K, class V>
14
class Dict;
15
16
// https://stackoverflow.com/questions/3919995/determining-sprintf-buffer-size-whats-the-standard/11092994#11092994
17
// Notes:
18
// - Python 2.7's intobject.c has an erroneous +6
19
// - This is 13, but len('-2147483648') is 11, which means we only need 12?
20
// - This formula is valid for octal(), because 2^(3 bits) = 8
21
22
const int kIntBufSize = CHAR_BIT * sizeof(int) / 3 + 3;
23
24
namespace mylib {
25
26
void InitCppOnly();
27
28
// Wrappers around our C++ APIs
29
30
20
inline void MaybeCollect() {
31
20
  gHeap.MaybeCollect();
32
20
}
33
34
void print_stderr(BigStr* s);
35
36
286
inline int ByteAt(BigStr* s, int i) {
37
286
  DCHECK(0 <= i);
38
286
  DCHECK(i <= len(s));
39
40
0
  return static_cast<unsigned char>(s->data_[i]);
41
286
}
42
43
1.02k
inline int ByteEquals(int byte, BigStr* ch) {
44
1.02k
  DCHECK(0 <= byte);
45
1.02k
  DCHECK(byte < 256);
46
47
1.02k
  DCHECK(len(ch) == 1);
48
49
0
  return byte == static_cast<unsigned char>(ch->data_[0]);
50
1.02k
}
51
52
256
inline int ByteInSet(int byte, BigStr* byte_set) {
53
256
  DCHECK(0 <= byte);
54
256
  DCHECK(byte < 256);
55
56
0
  int n = len(byte_set);
57
1.77k
  for (int i = 0; i < n; ++i) {
58
1.52k
    int b = static_cast<unsigned char>(byte_set->data_[i]);
59
1.52k
    if (byte == b) {
60
6
      return true;
61
6
    }
62
1.52k
  }
63
250
  return false;
64
256
}
65
66
BigStr* JoinBytes(List<int>* byte_list);
67
68
void BigIntSort(List<mops::BigInt>* keys);
69
70
// const int kStdout = 1;
71
// const int kStderr = 2;
72
73
// void writeln(BigStr* s, int fd = kStdout);
74
75
Tuple2<BigStr*, BigStr*> split_once(BigStr* s, BigStr* delim);
76
77
template <typename K, typename V>
78
280
void dict_erase(Dict<K, V>* haystack, K needle) {
79
280
  DCHECK(haystack->obj_header().heap_tag != HeapTag::Global);
80
81
0
  int pos = haystack->hash_and_probe(needle);
82
280
  if (pos == kTooSmall) {
83
0
    return;
84
0
  }
85
280
  DCHECK(pos >= 0);
86
0
  int kv_index = haystack->index_->items_[pos];
87
280
  if (kv_index < 0) {
88
2
    return;
89
2
  }
90
91
278
  int last_kv_index = haystack->len_ - 1;
92
278
  DCHECK(kv_index <= last_kv_index);
93
94
  // Swap the target entry with the most recently inserted one before removing
95
  // it. This has two benefits.
96
  //   (1) It keeps the entry arrays compact. All valid entries occupy a
97
  //       contiguous region in memory.
98
  //   (2) It prevents holes in the entry arrays. This makes iterating over
99
  //       entries (e.g. in keys() or DictIter()) trivial and doesn't require
100
  //       any extra validity state (like a bitset of unusable slots). This is
101
  //       important because keys and values wont't always be pointers, so we
102
  //       can't rely on NULL checks for validity. We also can't wrap the slab
103
  //       entry types in some other type without modifying the garbage
104
  //       collector to trace through unmanaged types (or paying the extra
105
  //       allocations for the outer type).
106
278
  if (kv_index != last_kv_index) {
107
142
    K last_key = haystack->keys_->items_[last_kv_index];
108
142
    V last_val = haystack->values_->items_[last_kv_index];
109
142
    int last_pos = haystack->hash_and_probe(last_key);
110
142
    DCHECK(last_pos != kNotFound);
111
0
    haystack->keys_->items_[kv_index] = last_key;
112
142
    haystack->values_->items_[kv_index] = last_val;
113
142
    haystack->index_->items_[last_pos] = kv_index;
114
142
  }
115
116
  // Zero out for GC.  These could be nullptr or 0
117
0
  haystack->keys_->items_[last_kv_index] = 0;
118
278
  haystack->values_->items_[last_kv_index] = 0;
119
278
  haystack->index_->items_[pos] = kDeletedEntry;
120
278
  haystack->len_--;
121
278
  DCHECK(haystack->len_ < haystack->capacity_);
122
278
}
_ZN5mylib10dict_eraseIP6BigStriEEvP4DictIT_T0_ES4_
Line
Count
Source
78
2
void dict_erase(Dict<K, V>* haystack, K needle) {
79
2
  DCHECK(haystack->obj_header().heap_tag != HeapTag::Global);
80
81
0
  int pos = haystack->hash_and_probe(needle);
82
2
  if (pos == kTooSmall) {
83
0
    return;
84
0
  }
85
2
  DCHECK(pos >= 0);
86
0
  int kv_index = haystack->index_->items_[pos];
87
2
  if (kv_index < 0) {
88
0
    return;
89
0
  }
90
91
2
  int last_kv_index = haystack->len_ - 1;
92
2
  DCHECK(kv_index <= last_kv_index);
93
94
  // Swap the target entry with the most recently inserted one before removing
95
  // it. This has two benefits.
96
  //   (1) It keeps the entry arrays compact. All valid entries occupy a
97
  //       contiguous region in memory.
98
  //   (2) It prevents holes in the entry arrays. This makes iterating over
99
  //       entries (e.g. in keys() or DictIter()) trivial and doesn't require
100
  //       any extra validity state (like a bitset of unusable slots). This is
101
  //       important because keys and values wont't always be pointers, so we
102
  //       can't rely on NULL checks for validity. We also can't wrap the slab
103
  //       entry types in some other type without modifying the garbage
104
  //       collector to trace through unmanaged types (or paying the extra
105
  //       allocations for the outer type).
106
2
  if (kv_index != last_kv_index) {
107
0
    K last_key = haystack->keys_->items_[last_kv_index];
108
0
    V last_val = haystack->values_->items_[last_kv_index];
109
0
    int last_pos = haystack->hash_and_probe(last_key);
110
0
    DCHECK(last_pos != kNotFound);
111
0
    haystack->keys_->items_[kv_index] = last_key;
112
0
    haystack->values_->items_[kv_index] = last_val;
113
0
    haystack->index_->items_[last_pos] = kv_index;
114
0
  }
115
116
  // Zero out for GC.  These could be nullptr or 0
117
0
  haystack->keys_->items_[last_kv_index] = 0;
118
2
  haystack->values_->items_[last_kv_index] = 0;
119
2
  haystack->index_->items_[pos] = kDeletedEntry;
120
2
  haystack->len_--;
121
2
  DCHECK(haystack->len_ < haystack->capacity_);
122
2
}
_ZN5mylib10dict_eraseIP6BigStrS2_EEvP4DictIT_T0_ES4_
Line
Count
Source
78
2
void dict_erase(Dict<K, V>* haystack, K needle) {
79
2
  DCHECK(haystack->obj_header().heap_tag != HeapTag::Global);
80
81
0
  int pos = haystack->hash_and_probe(needle);
82
2
  if (pos == kTooSmall) {
83
0
    return;
84
0
  }
85
2
  DCHECK(pos >= 0);
86
0
  int kv_index = haystack->index_->items_[pos];
87
2
  if (kv_index < 0) {
88
0
    return;
89
0
  }
90
91
2
  int last_kv_index = haystack->len_ - 1;
92
2
  DCHECK(kv_index <= last_kv_index);
93
94
  // Swap the target entry with the most recently inserted one before removing
95
  // it. This has two benefits.
96
  //   (1) It keeps the entry arrays compact. All valid entries occupy a
97
  //       contiguous region in memory.
98
  //   (2) It prevents holes in the entry arrays. This makes iterating over
99
  //       entries (e.g. in keys() or DictIter()) trivial and doesn't require
100
  //       any extra validity state (like a bitset of unusable slots). This is
101
  //       important because keys and values wont't always be pointers, so we
102
  //       can't rely on NULL checks for validity. We also can't wrap the slab
103
  //       entry types in some other type without modifying the garbage
104
  //       collector to trace through unmanaged types (or paying the extra
105
  //       allocations for the outer type).
106
2
  if (kv_index != last_kv_index) {
107
0
    K last_key = haystack->keys_->items_[last_kv_index];
108
0
    V last_val = haystack->values_->items_[last_kv_index];
109
0
    int last_pos = haystack->hash_and_probe(last_key);
110
0
    DCHECK(last_pos != kNotFound);
111
0
    haystack->keys_->items_[kv_index] = last_key;
112
0
    haystack->values_->items_[kv_index] = last_val;
113
0
    haystack->index_->items_[last_pos] = kv_index;
114
0
  }
115
116
  // Zero out for GC.  These could be nullptr or 0
117
0
  haystack->keys_->items_[last_kv_index] = 0;
118
2
  haystack->values_->items_[last_kv_index] = 0;
119
2
  haystack->index_->items_[pos] = kDeletedEntry;
120
2
  haystack->len_--;
121
2
  DCHECK(haystack->len_ < haystack->capacity_);
122
2
}
_ZN5mylib10dict_eraseIiiEEvP4DictIT_T0_ES2_
Line
Count
Source
78
274
void dict_erase(Dict<K, V>* haystack, K needle) {
79
274
  DCHECK(haystack->obj_header().heap_tag != HeapTag::Global);
80
81
0
  int pos = haystack->hash_and_probe(needle);
82
274
  if (pos == kTooSmall) {
83
0
    return;
84
0
  }
85
274
  DCHECK(pos >= 0);
86
0
  int kv_index = haystack->index_->items_[pos];
87
274
  if (kv_index < 0) {
88
0
    return;
89
0
  }
90
91
274
  int last_kv_index = haystack->len_ - 1;
92
274
  DCHECK(kv_index <= last_kv_index);
93
94
  // Swap the target entry with the most recently inserted one before removing
95
  // it. This has two benefits.
96
  //   (1) It keeps the entry arrays compact. All valid entries occupy a
97
  //       contiguous region in memory.
98
  //   (2) It prevents holes in the entry arrays. This makes iterating over
99
  //       entries (e.g. in keys() or DictIter()) trivial and doesn't require
100
  //       any extra validity state (like a bitset of unusable slots). This is
101
  //       important because keys and values wont't always be pointers, so we
102
  //       can't rely on NULL checks for validity. We also can't wrap the slab
103
  //       entry types in some other type without modifying the garbage
104
  //       collector to trace through unmanaged types (or paying the extra
105
  //       allocations for the outer type).
106
274
  if (kv_index != last_kv_index) {
107
142
    K last_key = haystack->keys_->items_[last_kv_index];
108
142
    V last_val = haystack->values_->items_[last_kv_index];
109
142
    int last_pos = haystack->hash_and_probe(last_key);
110
142
    DCHECK(last_pos != kNotFound);
111
0
    haystack->keys_->items_[kv_index] = last_key;
112
142
    haystack->values_->items_[kv_index] = last_val;
113
142
    haystack->index_->items_[last_pos] = kv_index;
114
142
  }
115
116
  // Zero out for GC.  These could be nullptr or 0
117
0
  haystack->keys_->items_[last_kv_index] = 0;
118
274
  haystack->values_->items_[last_kv_index] = 0;
119
274
  haystack->index_->items_[pos] = kDeletedEntry;
120
274
  haystack->len_--;
121
274
  DCHECK(haystack->len_ < haystack->capacity_);
122
274
}
_ZN5mylib10dict_eraseIiP6BigStrEEvP4DictIT_T0_ES4_
Line
Count
Source
78
2
void dict_erase(Dict<K, V>* haystack, K needle) {
79
2
  DCHECK(haystack->obj_header().heap_tag != HeapTag::Global);
80
81
0
  int pos = haystack->hash_and_probe(needle);
82
2
  if (pos == kTooSmall) {
83
0
    return;
84
0
  }
85
2
  DCHECK(pos >= 0);
86
0
  int kv_index = haystack->index_->items_[pos];
87
2
  if (kv_index < 0) {
88
2
    return;
89
2
  }
90
91
0
  int last_kv_index = haystack->len_ - 1;
92
0
  DCHECK(kv_index <= last_kv_index);
93
94
  // Swap the target entry with the most recently inserted one before removing
95
  // it. This has two benefits.
96
  //   (1) It keeps the entry arrays compact. All valid entries occupy a
97
  //       contiguous region in memory.
98
  //   (2) It prevents holes in the entry arrays. This makes iterating over
99
  //       entries (e.g. in keys() or DictIter()) trivial and doesn't require
100
  //       any extra validity state (like a bitset of unusable slots). This is
101
  //       important because keys and values wont't always be pointers, so we
102
  //       can't rely on NULL checks for validity. We also can't wrap the slab
103
  //       entry types in some other type without modifying the garbage
104
  //       collector to trace through unmanaged types (or paying the extra
105
  //       allocations for the outer type).
106
0
  if (kv_index != last_kv_index) {
107
0
    K last_key = haystack->keys_->items_[last_kv_index];
108
0
    V last_val = haystack->values_->items_[last_kv_index];
109
0
    int last_pos = haystack->hash_and_probe(last_key);
110
0
    DCHECK(last_pos != kNotFound);
111
0
    haystack->keys_->items_[kv_index] = last_key;
112
0
    haystack->values_->items_[kv_index] = last_val;
113
0
    haystack->index_->items_[last_pos] = kv_index;
114
0
  }
115
116
  // Zero out for GC.  These could be nullptr or 0
117
0
  haystack->keys_->items_[last_kv_index] = 0;
118
0
  haystack->values_->items_[last_kv_index] = 0;
119
0
  haystack->index_->items_[pos] = kDeletedEntry;
120
0
  haystack->len_--;
121
0
  DCHECK(haystack->len_ < haystack->capacity_);
122
0
}
123
124
4
inline BigStr* hex_lower(int i) {
125
  // Note: Could also use OverAllocatedStr, but most strings are small?
126
4
  char buf[kIntBufSize];
127
4
  int len = snprintf(buf, kIntBufSize, "%x", i);
128
4
  return ::StrFromC(buf, len);
129
4
}
130
131
// Abstract type: Union of LineReader and Writer
132
class File {
133
 public:
134
146
  File() {
135
146
  }
136
  // Writer
137
  virtual void write(BigStr* s) = 0;
138
  virtual void flush() = 0;
139
140
  // Reader
141
  virtual BigStr* readline() = 0;
142
143
  // Both
144
  virtual bool isatty() = 0;
145
  virtual void close() = 0;
146
147
0
  static constexpr ObjHeader obj_header() {
148
0
    return ObjHeader::ClassFixed(field_mask(), sizeof(File));
149
0
  }
150
151
17
  static constexpr uint32_t field_mask() {
152
17
    return kZeroMask;
153
17
  }
154
};
155
156
// Wrap a FILE* for read and write
157
class CFile : public File {
158
 public:
159
17
  explicit CFile(FILE* f) : File(), f_(f) {
160
17
  }
161
  // Writer
162
  void write(BigStr* s) override;
163
  void flush() override;
164
165
  // Reader
166
  BigStr* readline() override;
167
168
  // Both
169
  bool isatty() override;
170
  void close() override;
171
172
17
  static constexpr ObjHeader obj_header() {
173
17
    return ObjHeader::ClassFixed(field_mask(), sizeof(CFile));
174
17
  }
175
176
17
  static constexpr uint32_t field_mask() {
177
    // not mutating field_mask because FILE* isn't a GC object
178
17
    return File::field_mask();
179
17
  }
180
181
 private:
182
  FILE* f_;
183
184
  DISALLOW_COPY_AND_ASSIGN(CFile)
185
};
186
187
// Abstract File we can only read from.
188
// TODO: can we get rid of DCHECK() and reinterpret_cast?
189
class LineReader : public File {
190
 public:
191
6
  LineReader() : File() {
192
6
  }
193
0
  void write(BigStr* s) override {
194
0
    CHECK(false);  // should not happen
195
0
  }
196
0
  void flush() override {
197
0
    CHECK(false);  // should not happen
198
0
  }
199
200
0
  static constexpr ObjHeader obj_header() {
201
0
    return ObjHeader::ClassFixed(field_mask(), sizeof(LineReader));
202
0
  }
203
204
6
  static constexpr uint32_t field_mask() {
205
6
    return kZeroMask;
206
6
  }
207
};
208
209
class BufLineReader : public LineReader {
210
 public:
211
6
  explicit BufLineReader(BigStr* s) : LineReader(), s_(s), pos_(0) {
212
6
  }
213
  virtual BigStr* readline();
214
2
  virtual bool isatty() {
215
2
    return false;
216
2
  }
217
2
  virtual void close() {
218
2
  }
219
220
  BigStr* s_;
221
  int pos_;
222
223
6
  static constexpr ObjHeader obj_header() {
224
6
    return ObjHeader::ClassFixed(field_mask(), sizeof(LineReader));
225
6
  }
226
227
6
  static constexpr uint32_t field_mask() {
228
6
    return LineReader::field_mask() | maskbit(offsetof(BufLineReader, s_));
229
6
  }
230
231
  DISALLOW_COPY_AND_ASSIGN(BufLineReader)
232
};
233
234
extern LineReader* gStdin;
235
236
2
inline LineReader* Stdin() {
237
2
  if (gStdin == nullptr) {
238
2
    gStdin = reinterpret_cast<LineReader*>(Alloc<CFile>(stdin));
239
2
  }
240
2
  return gStdin;
241
2
}
242
243
LineReader* open(BigStr* path);
244
245
// Abstract File we can only write to.
246
// TODO: can we get rid of DCHECK() and reinterpret_cast?
247
class Writer : public File {
248
 public:
249
123
  Writer() : File() {
250
123
  }
251
0
  BigStr* readline() override {
252
0
    CHECK(false);  // should not happen
253
0
  }
254
255
0
  static constexpr ObjHeader obj_header() {
256
0
    return ObjHeader::ClassFixed(field_mask(), sizeof(Writer));
257
0
  }
258
259
123
  static constexpr uint32_t field_mask() {
260
123
    return kZeroMask;
261
123
  }
262
};
263
264
class MutableStr;
265
266
class BufWriter : public Writer {
267
 public:
268
123
  BufWriter() : Writer(), str_(nullptr), len_(0) {
269
123
  }
270
  void write(BigStr* s) override;
271
  void write_spaces(int n);
272
2
  void clear() {  // Reuse this instance
273
2
    str_ = nullptr;
274
2
    len_ = 0;
275
2
    is_valid_ = true;
276
2
  }
277
0
  void close() override {
278
0
  }
279
2
  void flush() override {
280
2
  }
281
2
  bool isatty() override {
282
2
    return false;
283
2
  }
284
  BigStr* getvalue();  // part of cStringIO API
285
286
  //
287
  // Low Level API for C++ usage only
288
  //
289
290
  // Convenient API that avoids BigStr*
291
  void WriteConst(const char* c_string);
292
293
  // Potentially resizes the buffer.
294
  void EnsureMoreSpace(int n);
295
  // After EnsureMoreSpace(42), you can write 42 more bytes safely.
296
  //
297
  // Note that if you call EnsureMoreSpace(42), write 5 byte, and then
298
  // EnsureMoreSpace(42) again, the amount of additional space reserved is 47.
299
300
  // (Similar to vector::reserve(n), but it takes an integer to ADD to the
301
  // capacity.)
302
303
  uint8_t* LengthPointer();    // start + length
304
  uint8_t* CapacityPointer();  // start + capacity
305
  void SetLengthFrom(uint8_t* length_ptr);
306
307
79
  int Length() {
308
79
    return len_;
309
79
  }
310
311
  // Rewind to earlier position, future writes start there
312
  void Truncate(int length);
313
314
123
  static constexpr ObjHeader obj_header() {
315
123
    return ObjHeader::ClassFixed(field_mask(), sizeof(BufWriter));
316
123
  }
317
318
123
  static constexpr unsigned field_mask() {
319
    // maskvit_v() because BufWriter has virtual methods
320
123
    return Writer::field_mask() | maskbit(offsetof(BufWriter, str_));
321
123
  }
322
323
 private:
324
  void WriteRaw(char* s, int n);
325
326
  MutableStr* str_;  // getvalue() turns this directly into Str*, no copying
327
  int len_;          // how many bytes have been written so far
328
  bool is_valid_ = true;  // It becomes invalid after getvalue() is called
329
};
330
331
extern Writer* gStdout;
332
333
13
inline Writer* Stdout() {
334
13
  if (gStdout == nullptr) {
335
5
    gStdout = reinterpret_cast<Writer*>(Alloc<CFile>(stdout));
336
5
    gHeap.RootGlobalVar(gStdout);
337
5
  }
338
13
  return gStdout;
339
13
}
340
341
extern Writer* gStderr;
342
343
2
inline Writer* Stderr() {
344
2
  if (gStderr == nullptr) {
345
2
    gStderr = reinterpret_cast<Writer*>(Alloc<CFile>(stderr));
346
2
    gHeap.RootGlobalVar(gStderr);
347
2
  }
348
2
  return gStderr;
349
2
}
350
351
class UniqueObjects {
352
  // Can't be expressed in typed Python because we don't have uint64_t for
353
  // addresses
354
355
 public:
356
0
  UniqueObjects() {
357
0
  }
358
0
  void Add(void* obj) {
359
0
  }
360
0
  int Get(void* obj) {
361
0
    return -1;
362
0
  }
363
364
0
  static constexpr ObjHeader obj_header() {
365
0
    return ObjHeader::ClassFixed(field_mask(), sizeof(UniqueObjects));
366
0
  }
367
368
  // SPECIAL CASE? We should never have a unique reference to an object?  So
369
  // don't bother tracing
370
0
  static constexpr uint32_t field_mask() {
371
0
    return kZeroMask;
372
0
  }
373
374
 private:
375
  // address -> small integer ID
376
  Dict<void*, int> addresses_;
377
};
378
379
}  // namespace mylib
380
381
#endif  // MYCPP_GC_MYLIB_H