cpp

Coverage Report

Created: 2024-08-25 11:48

/home/andy/git/oilshell/oil/cpp/data_lang.cc
Line
Count
Source (jump to first uncovered line)
1
// data_lang.cc
2
3
#include "cpp/data_lang.h"
4
5
#include "data_lang/j8.h"
6
#include "data_lang/utf8.h"
7
8
// TODO: remove duplication
9
79
#define LOSSY_JSON (1 << 3)
10
11
namespace {
12
13
12
void WriteBString(BigStr* s, mylib::BufWriter* buf, int capacity) {
14
12
  uint8_t* in = reinterpret_cast<uint8_t*>(s->data_);
15
12
  uint8_t* in_end = reinterpret_cast<uint8_t*>(s->data_ + len(s));
16
17
12
  buf->WriteConst("b'");
18
19
  // Set up pointers after writing opening quote
20
12
  uint8_t* out = buf->LengthPointer();  // mutated
21
12
  uint8_t* out_end = buf->CapacityPointer();
22
23
20
  while (true) {
24
20
    J8EncodeChunk(&in, in_end, &out, out_end, true);  // Fill as much as we can
25
20
    buf->SetLengthFrom(out);
26
27
20
    if (in >= in_end) {
28
12
      break;
29
12
    }
30
31
    // Same growth policy as below
32
8
    capacity = capacity * 3 / 2;
33
    // printf("[2] new capacity %d\n", capacity);
34
8
    buf->EnsureMoreSpace(capacity);
35
36
    // Recompute pointers
37
8
    out = buf->LengthPointer();
38
8
    out_end = buf->CapacityPointer();
39
8
  }
40
41
12
  buf->WriteConst("'");
42
12
}
43
44
0
void WriteBashDollarString(BigStr* s, mylib::BufWriter* buf, int capacity) {
45
0
  uint8_t* in = reinterpret_cast<uint8_t*>(s->data_);
46
0
  uint8_t* in_end = reinterpret_cast<uint8_t*>(s->data_ + len(s));
47
48
0
  buf->WriteConst("$'");
49
50
  // Set up pointers after writing opening quote
51
0
  uint8_t* out = buf->LengthPointer();  // mutated
52
0
  uint8_t* out_end = buf->CapacityPointer();
53
54
0
  while (true) {
55
0
    BashDollarEncodeChunk(&in, in_end, &out,
56
0
                          out_end);  // Fill as much as we can
57
0
    buf->SetLengthFrom(out);
58
59
0
    if (in >= in_end) {
60
0
      break;
61
0
    }
62
63
    // Same growth policy as below
64
0
    capacity = capacity * 3 / 2;
65
    // printf("[2] new capacity %d\n", capacity);
66
0
    buf->EnsureMoreSpace(capacity);
67
68
    // Recompute pointers
69
0
    out = buf->LengthPointer();
70
0
    out_end = buf->CapacityPointer();
71
0
  }
72
73
0
  buf->WriteConst("'");
74
0
}
75
76
// Style is COPIED from pyj8::WriteString()
77
// Functionality is like j8_libc.c ShellEncodeString, that is:
78
//
79
// call BourneShellEncodeChunk()
80
// then either
81
//   WriteBString()
82
//   WriteBashDollarString()
83
84
0
void ShellEncodeString(BigStr* s, int ysh_fallback, mylib::BufWriter* buf) {
85
0
  uint8_t* in = reinterpret_cast<uint8_t*>(s->data_);
86
0
  uint8_t* in_end = reinterpret_cast<uint8_t*>(s->data_ + len(s));
87
88
  // Growth policy: Start at a fixed size max(N + 3 + 2, J8_MIN_CAPACITY)
89
0
  int capacity = len(s) + 3 + 2;     // 3 for quotes, 2 potential \" \n
90
0
  if (capacity < J8_MIN_CAPACITY) {  // account for J8_MAX_BYTES_PER_INPUT_BYTE
91
0
    capacity = J8_MIN_CAPACITY;
92
0
  }
93
  // printf("[1] capacity %d\n", capacity);
94
95
0
  buf->EnsureMoreSpace(capacity);
96
97
0
  int begin = buf->Length();  // maybe Truncate to this position
98
0
  buf->WriteConst("'");
99
100
  // Set up pointers after writing opening quote
101
0
  uint8_t* out = buf->LengthPointer();  // mutated
102
0
  uint8_t* out_end = buf->CapacityPointer();
103
104
0
  while (true) {
105
    // Fill in as much as we can
106
0
    int cannot_encode = BourneShellEncodeChunk(&in, in_end, &out, out_end);
107
0
    if (cannot_encode) {
108
0
      buf->Truncate(begin);
109
0
      if (ysh_fallback) {
110
0
        WriteBString(s, buf, capacity);  // fall back to b''
111
0
      } else {
112
0
        WriteBashDollarString(s, buf, capacity);  // fall back to $''
113
0
      }
114
0
      return;
115
0
    }
116
0
    buf->SetLengthFrom(out);
117
118
    // printf("[1] len %d\n", out_buf->len);
119
120
0
    if (in >= in_end) {
121
0
      break;
122
0
    }
123
124
    // Growth policy: every time through the loop, increase 1.5x
125
    //
126
    // The worst blowup is 6x, and 1.5 ** 5 > 6, so it will take 5 reallocs.
127
    // This seems like a reasonable tradeoff between over-allocating and too
128
    // many realloc().
129
0
    capacity = capacity * 3 / 2;
130
    // printf("[1] new capacity %d\n", capacity);
131
0
    buf->EnsureMoreSpace(capacity);
132
133
    // Recompute pointers
134
0
    out = buf->LengthPointer();  // mutated
135
0
    out_end = buf->CapacityPointer();
136
    // printf("[1] out %p out_end %p\n", out, out_end);
137
0
  }
138
139
0
  buf->WriteConst("'");
140
0
}
141
142
}  // namespace
143
144
namespace fastfunc {
145
146
82
bool CanOmitQuotes(BigStr* s) {
147
82
  return ::CanOmitQuotes(reinterpret_cast<unsigned char*>(s->data_), len(s));
148
82
}
149
150
35
BigStr* J8EncodeString(BigStr* s, int j8_fallback) {
151
35
  auto buf = Alloc<mylib::BufWriter>();
152
35
  int options = j8_fallback ? 0 : LOSSY_JSON;
153
35
  pyj8::WriteString(s, options, buf);
154
35
  return buf->getvalue();
155
35
}
156
157
0
BigStr* ShellEncodeString(BigStr* s, int ysh_fallback) {
158
0
  auto buf = Alloc<mylib::BufWriter>();
159
0
  ::ShellEncodeString(s, ysh_fallback, buf);
160
0
  return buf->getvalue();
161
0
}
162
163
8
Tuple2<int, int> Utf8DecodeOne(BigStr* s, int start) {
164
  // Bounds check for safety
165
8
  DCHECK(0 <= start && start < len(s));
166
167
0
  const unsigned char* string = reinterpret_cast<unsigned char*>(s->data());
168
169
8
  Utf8Result_t decode_result;
170
8
  utf8_decode(string + start, &decode_result);
171
8
  int32_t codepoint_or_error;
172
8
  if (decode_result.error) {
173
5
    codepoint_or_error = -decode_result.error;
174
5
  } else {
175
3
    codepoint_or_error = decode_result.codepoint;
176
3
  }
177
178
8
  return Tuple2<int, int>(codepoint_or_error, decode_result.bytes_read);
179
8
}
180
181
}  // namespace fastfunc
182
183
namespace pyj8 {
184
185
5
bool PartIsUtf8(BigStr* s, int start, int end) {
186
5
  Utf8Result result;
187
188
9
  for (int i = start; i < end;) {
189
6
    utf8_decode(reinterpret_cast<unsigned char*>(s->data_ + i), &result);
190
6
    if (result.error) {
191
2
      return false;
192
2
    }
193
194
4
    i += result.bytes_read;
195
4
  }
196
197
3
  return true;
198
5
}
199
200
79
void WriteString(BigStr* s, int options, mylib::BufWriter* buf) {
201
79
  bool j8_fallback = !(options & LOSSY_JSON);
202
203
79
  uint8_t* in = reinterpret_cast<uint8_t*>(s->data_);
204
79
  uint8_t* in_end = reinterpret_cast<uint8_t*>(s->data_ + len(s));
205
206
  // Growth policy: Start at a fixed size max(N + 3 + 2, J8_MIN_CAPACITY)
207
79
  int capacity = len(s) + 3 + 2;     // 3 for quotes, 2 potential \" \n
208
79
  if (capacity < J8_MIN_CAPACITY) {  // account for J8_MAX_BYTES_PER_INPUT_BYTE
209
55
    capacity = J8_MIN_CAPACITY;
210
55
  }
211
  // printf("[1] capacity %d\n", capacity);
212
213
79
  buf->EnsureMoreSpace(capacity);
214
215
79
  int begin = buf->Length();  // maybe Truncate to this position
216
79
  buf->WriteConst("\"");
217
218
  // Set up pointers after writing opening quote
219
79
  uint8_t* out = buf->LengthPointer();  // mutated
220
79
  uint8_t* out_end = buf->CapacityPointer();
221
222
111
  while (true) {
223
    // Fill in as much as we can
224
111
    int invalid_utf8 = J8EncodeChunk(&in, in_end, &out, out_end, false);
225
111
    if (invalid_utf8 && j8_fallback) {
226
12
      buf->Truncate(begin);
227
12
      WriteBString(s, buf, capacity);  // fall back to b''
228
12
      return;
229
12
    }
230
99
    buf->SetLengthFrom(out);
231
232
    // printf("[1] len %d\n", out_buf->len);
233
234
99
    if (in >= in_end) {
235
67
      break;
236
67
    }
237
238
    // Growth policy: every time through the loop, increase 1.5x
239
    //
240
    // The worst blowup is 6x, and 1.5 ** 5 > 6, so it will take 5 reallocs.
241
    // This seems like a reasonable tradeoff between over-allocating and too
242
    // many realloc().
243
32
    capacity = capacity * 3 / 2;
244
    // printf("[1] new capacity %d\n", capacity);
245
32
    buf->EnsureMoreSpace(capacity);
246
247
    // Recompute pointers
248
32
    out = buf->LengthPointer();  // mutated
249
32
    out_end = buf->CapacityPointer();
250
    // printf("[1] out %p out_end %p\n", out, out_end);
251
32
  }
252
253
67
  buf->WriteConst("\"");
254
67
}
255
256
}  // namespace pyj8
257
258
namespace j8 {
259
260
2
int HeapValueId(value_asdl::value_t* val) {
261
2
#ifndef OPTIMIZED
262
  // ASDL generates headers with HeapTag::Scanned, but HeapTag::FixedSize would
263
  // also be valid.
264
2
  ObjHeader* h = ObjHeader::FromObject(val);
265
2
  DCHECK(h->heap_tag == HeapTag::Scanned || h->heap_tag == HeapTag::FixedSize);
266
0
#endif
267
268
0
  return ObjectId(val);
269
2
}
270
271
}  // namespace j8