cpp

Coverage Report

Created: 2023-11-29 23:45

/home/andy/git/oilshell/oil/mycpp/gc_builtins.cc
Line
Count
Source (jump to first uncovered line)
1
#include <ctype.h>  // isspace()
2
#include <errno.h>  // errno
3
#include <float.h>  // DBL_MIN, DBL_MAX
4
#include <math.h>   // INFINITY
5
#include <stdio.h>  // required for readline/readline.h (man readline)
6
7
#include "_build/detected-cpp-config.h"
8
#include "mycpp/runtime.h"
9
#ifdef HAVE_READLINE
10
  #include "cpp/frontend_pyreadline.h"
11
#endif
12
13
// Translation of Python's print().
14
134
void print(BigStr* s) {
15
134
  fputs(s->data_, stdout);  // print until first NUL
16
134
  fputc('\n', stdout);
17
134
}
18
19
23
BigStr* str(int i) {
20
23
  BigStr* s = OverAllocatedStr(kIntBufSize);
21
23
  int length = snprintf(s->data(), kIntBufSize, "%d", i);
22
23
  s->MaybeShrink(length);
23
23
  return s;
24
23
}
25
26
// TODO:
27
// - This could use a fancy exact algorithm, not libc
28
// - Does libc depend on locale?
29
4
BigStr* str(double d) {
30
4
  char buf[64];  // overestimate, but we use snprintf() to be safe
31
32
  // Problem:
33
  // %f prints 3.0000000 and 3.500000
34
  // %g prints 3 and 3.5
35
  //
36
  // We want literal syntax to indicate float, so add '.'
37
38
4
  int n = sizeof(buf) - 2;  // in case we add '.0'
39
40
  // %.9g digits for string that can be converted back to the same FLOAT
41
  // (not double)
42
  // https://stackoverflow.com/a/21162120
43
  // https://en.cppreference.com/w/cpp/types/numeric_limits/max_digits10
44
4
  int length = snprintf(buf, n, "%.9g", d);
45
46
  // %a is a hexfloat form, could use that somewhere
47
  // int length = snprintf(buf, n, "%a", d);
48
49
4
  if (strchr(buf, 'i')) {  // inf or -inf
50
0
    return StrFromC(buf);
51
0
  }
52
53
4
  if (!strchr(buf, '.')) {  // 12345 -> 12345.0
54
2
    buf[length] = '.';
55
2
    buf[length + 1] = '0';
56
2
    buf[length + 2] = '\0';
57
2
  }
58
59
4
  return StrFromC(buf);
60
4
}
61
62
// Do we need this API?  Or is mylib.InternedStr(BigStr* s, int start, int end)
63
// better for getting values out of Token.line without allocating?
64
//
65
// e.g. mylib.InternedStr(tok.line, tok.start, tok.start+1)
66
//
67
// Also for SmallStr, we don't care about interning.  Only for HeapStr.
68
69
2
BigStr* intern(BigStr* s) {
70
  // TODO: put in table gHeap.interned_
71
2
  return s;
72
2
}
73
74
// Print quoted string.  TODO: use C-style strings (YSTR)
75
56
BigStr* repr(BigStr* s) {
76
  // Worst case: \0 becomes 4 bytes as '\\x00', and then two quote bytes.
77
56
  int n = len(s);
78
56
  int upper_bound = n * 4 + 2;
79
80
56
  BigStr* result = OverAllocatedStr(upper_bound);
81
82
  // Single quote by default.
83
56
  char quote = '\'';
84
56
  if (memchr(s->data_, '\'', n) && !memchr(s->data_, '"', n)) {
85
10
    quote = '"';
86
10
  }
87
56
  char* p = result->data_;
88
89
  // From PyString_Repr()
90
56
  *p++ = quote;
91
474
  for (int i = 0; i < n; ++i) {
92
418
    char c = s->data_[i];
93
418
    if (c == quote || c == '\\') {
94
0
      *p++ = '\\';
95
0
      *p++ = c;
96
418
    } else if (c == '\t') {
97
7
      *p++ = '\\';
98
7
      *p++ = 't';
99
411
    } else if (c == '\n') {
100
14
      *p++ = '\\';
101
14
      *p++ = 'n';
102
397
    } else if (c == '\r') {
103
7
      *p++ = '\\';
104
7
      *p++ = 'r';
105
390
    } else if (isprint(c)) {
106
372
      *p++ = c;
107
372
    } else {  // Unprintable is \xff
108
18
      sprintf(p, "\\x%02x", c & 0xff);
109
18
      p += 4;
110
18
    }
111
418
  }
112
56
  *p++ = quote;
113
56
  *p = '\0';
114
115
56
  int length = p - result->data_;
116
56
  result->MaybeShrink(length);
117
56
  return result;
118
56
}
119
120
// Helper for str_to_int() that doesn't use exceptions.
121
84
bool StringToInteger(const char* s, int length, int base, int* result) {
122
84
  if (length == 0) {
123
2
    return false;  // empty string isn't a valid integer
124
2
  }
125
126
  // Empirically this is 4 4 8 on 32-bit and 4 8 8 on 64-bit
127
  // We want the bigger numbers
128
#if 0
129
  log("sizeof(int) = %d", sizeof(int));
130
  log("sizeof(long) = %ld", sizeof(long));
131
  log("sizeof(long long) = %ld", sizeof(long long));
132
  log("");
133
  log("LONG_MAX = %ld", LONG_MAX);
134
  log("LLONG_MAX = %lld", LLONG_MAX);
135
#endif
136
137
82
  char* pos;  // mutated by strtol
138
139
82
  long v = strtol(s, &pos, base);
140
141
  // The problem with long long is that mycpp deals with C++ int
142
  // long long v = strtoll(s, &pos, base);
143
144
  // log("v = %ld", v);
145
146
82
  switch (v) {
147
2
  case LONG_MIN:
148
2
    return false;  // underflow
149
2
  case LONG_MAX:
150
2
    return false;  // overflow
151
82
  }
152
153
78
  const char* end = s + length;
154
78
  if (pos == end) {
155
69
    *result = v;
156
69
    return true;  // strtol() consumed ALL characters.
157
69
  }
158
159
13
  while (pos < end) {
160
11
    if (!isspace(*pos)) {
161
7
      return false;  // Trailing non-space
162
7
    }
163
4
    pos++;
164
4
  }
165
166
2
  *result = v;
167
2
  return true;  // Trailing space is OK
168
9
}
169
170
8
int to_int(BigStr* s, int base) {
171
8
  int i;
172
8
  if (StringToInteger(s->data_, len(s), base, &i)) {
173
8
    return i;
174
8
  } else {
175
0
    throw Alloc<ValueError>();
176
0
  }
177
8
}
178
179
31
int to_int(BigStr* s) {
180
31
  int i;
181
31
  if (StringToInteger(s->data_, len(s), 10, &i)) {
182
28
    return i;
183
28
  } else {
184
3
    throw Alloc<ValueError>();
185
3
  }
186
31
}
187
188
832
BigStr* chr(int i) {
189
  // NOTE: i should be less than 256, in which we could return an object from
190
  // GLOBAL_STR() pool, like StrIter
191
832
  auto result = NewStr(1);
192
832
  result->data_[0] = i;
193
832
  return result;
194
832
}
195
196
836
int ord(BigStr* s) {
197
836
  assert(len(s) == 1);
198
  // signed to unsigned conversion, so we don't get values like -127
199
0
  uint8_t c = static_cast<uint8_t>(s->data_[0]);
200
836
  return c;
201
836
}
202
203
4
bool to_bool(BigStr* s) {
204
4
  return len(s) != 0;
205
4
}
206
207
8
double to_float(int i) {
208
8
  return static_cast<double>(i);
209
8
}
210
211
26
double to_float(BigStr* s) {
212
26
  char* begin = s->data_;
213
26
  char* end = begin + len(s);
214
215
26
  errno = 0;
216
26
  double result = strtod(begin, &end);
217
218
26
  if (errno == ERANGE) {  // error: overflow or underflow
219
8
    if (result >= HUGE_VAL) {
220
2
      return INFINITY;
221
6
    } else if (result <= -HUGE_VAL) {
222
2
      return -INFINITY;
223
4
    } else if (-DBL_MIN <= result && result <= DBL_MIN) {
224
4
      return 0.0;
225
4
    } else {
226
0
      FAIL("Invalid value after ERANGE");
227
0
    }
228
8
  }
229
18
  if (end == begin) {  // error: not a floating point number
230
4
    throw Alloc<ValueError>();
231
4
  }
232
233
14
  return result;
234
18
}
235
236
// e.g. ('a' in 'abc')
237
84
bool str_contains(BigStr* haystack, BigStr* needle) {
238
  // Common case
239
84
  if (len(needle) == 1) {
240
72
    return memchr(haystack->data_, needle->data_[0], len(haystack));
241
72
  }
242
243
12
  if (len(needle) > len(haystack)) {
244
2
    return false;
245
2
  }
246
247
  // General case. TODO: We could use a smarter substring algorithm.
248
249
10
  const char* end = haystack->data_ + len(haystack);
250
10
  const char* last_possible = end - len(needle);
251
10
  const char* p = haystack->data_;
252
253
22
  while (p <= last_possible) {
254
20
    if (memcmp(p, needle->data_, len(needle)) == 0) {
255
8
      return true;
256
8
    }
257
12
    p++;
258
12
  }
259
2
  return false;
260
10
}
261
262
92
BigStr* str_repeat(BigStr* s, int times) {
263
  // Python allows -1 too, and Oil used that
264
92
  if (times <= 0) {
265
20
    return kEmptyString;
266
20
  }
267
72
  int len_ = len(s);
268
72
  int new_len = len_ * times;
269
72
  BigStr* result = NewStr(new_len);
270
271
72
  char* dest = result->data_;
272
945
  for (int i = 0; i < times; i++) {
273
873
    memcpy(dest, s->data_, len_);
274
873
    dest += len_;
275
873
  }
276
72
  return result;
277
92
}
278
279
// for os_path.join()
280
// NOTE(Jesse): Perfect candidate for BoundedBuffer
281
22
BigStr* str_concat3(BigStr* a, BigStr* b, BigStr* c) {
282
22
  int a_len = len(a);
283
22
  int b_len = len(b);
284
22
  int c_len = len(c);
285
286
22
  int new_len = a_len + b_len + c_len;
287
22
  BigStr* result = NewStr(new_len);
288
22
  char* pos = result->data_;
289
290
22
  memcpy(pos, a->data_, a_len);
291
22
  pos += a_len;
292
293
22
  memcpy(pos, b->data_, b_len);
294
22
  pos += b_len;
295
296
22
  memcpy(pos, c->data_, c_len);
297
298
22
  assert(pos + c_len == result->data_ + new_len);
299
300
0
  return result;
301
22
}
302
303
71
BigStr* str_concat(BigStr* a, BigStr* b) {
304
71
  int a_len = len(a);
305
71
  int b_len = len(b);
306
71
  int new_len = a_len + b_len;
307
71
  BigStr* result = NewStr(new_len);
308
71
  char* buf = result->data_;
309
310
71
  memcpy(buf, a->data_, a_len);
311
71
  memcpy(buf + a_len, b->data_, b_len);
312
313
71
  return result;
314
71
}
315
316
//
317
// Comparators
318
//
319
320
2.41k
bool str_equals(BigStr* left, BigStr* right) {
321
  // Fast path for identical strings.  String deduplication during GC could
322
  // make this more likely.  String interning could guarantee it, allowing us
323
  // to remove memcmp().
324
2.41k
  if (left == right) {
325
171
    return true;
326
171
  }
327
328
2.24k
  if (left == nullptr || right == nullptr) {
329
0
    return false;
330
0
  }
331
332
  // obj_len equal implies string lengths are equal
333
334
2.24k
  if (left->len_ == right->len_) {
335
    // assert(len(left) == len(right));
336
531
    return memcmp(left->data_, right->data_, left->len_) == 0;
337
531
  }
338
339
1.70k
  return false;
340
2.24k
}
341
342
10
bool maybe_str_equals(BigStr* left, BigStr* right) {
343
10
  if (left && right) {
344
4
    return str_equals(left, right);
345
4
  }
346
347
6
  if (!left && !right) {
348
2
    return true;  // None == None
349
2
  }
350
351
4
  return false;  // one is None and one is a BigStr*
352
6
}
353
354
// TODO(Jesse): Make an inline version of this
355
1.83k
bool are_equal(BigStr* left, BigStr* right) {
356
1.83k
  return str_equals(left, right);
357
1.83k
}
358
359
// TODO(Jesse): Make an inline version of this
360
40
bool are_equal(int left, int right) {
361
40
  return left == right;
362
40
}
363
364
// TODO(Jesse): Make an inline version of this
365
32.4k
bool keys_equal(int left, int right) {
366
32.4k
  return left == right;
367
32.4k
}
368
369
// TODO(Jesse): Make an inline version of this
370
1.74k
bool keys_equal(BigStr* left, BigStr* right) {
371
1.74k
  return are_equal(left, right);
372
1.74k
}
373
374
8
bool are_equal(Tuple2<BigStr*, int>* t1, Tuple2<BigStr*, int>* t2) {
375
8
  bool result = are_equal(t1->at0(), t2->at0());
376
8
  result = result && (t1->at1() == t2->at1());
377
8
  return result;
378
8
}
379
380
4
bool are_equal(Tuple2<int, int>* t1, Tuple2<int, int>* t2) {
381
4
  return t1->at0() == t2->at0() && t1->at1() == t2->at1();
382
4
}
383
384
4
bool keys_equal(Tuple2<int, int>* t1, Tuple2<int, int>* t2) {
385
4
  return are_equal(t1, t2);
386
4
}
387
388
4
bool keys_equal(Tuple2<BigStr*, int>* t1, Tuple2<BigStr*, int>* t2) {
389
4
  return are_equal(t1, t2);
390
4
}
391
392
252
bool str_equals0(const char* c_string, BigStr* s) {
393
252
  int n = strlen(c_string);
394
252
  if (len(s) == n) {
395
152
    return memcmp(s->data_, c_string, n) == 0;
396
152
  } else {
397
100
    return false;
398
100
  }
399
252
}
400
401
4
int hash(BigStr* s) {
402
4
  return s->hash(fnv1);
403
4
}
404
405
8
int max(int a, int b) {
406
8
  return std::max(a, b);
407
8
}
408
409
2
int max(List<int>* elems) {
410
2
  int n = len(elems);
411
2
  if (n < 1) {
412
0
    throw Alloc<ValueError>();
413
0
  }
414
415
2
  int ret = elems->at(0);
416
10
  for (int i = 0; i < n; ++i) {
417
8
    int cand = elems->at(i);
418
8
    if (cand > ret) {
419
2
      ret = cand;
420
2
    }
421
8
  }
422
423
2
  return ret;
424
2
}