/home/andy/git/oilshell/oil/cpp/data_lang.cc
Line | Count | Source (jump to first uncovered line) |
1 | | // data_lang.cc |
2 | | |
3 | | #include "cpp/data_lang.h" |
4 | | |
5 | | #include "data_lang/j8.h" |
6 | | #include "data_lang/utf8.h" |
7 | | |
8 | | // TODO: remove duplication |
9 | 79 | #define LOSSY_JSON (1 << 3) |
10 | | |
11 | | namespace { |
12 | | |
13 | 12 | void WriteBString(BigStr* s, mylib::BufWriter* buf, int capacity) { |
14 | 12 | uint8_t* in = reinterpret_cast<uint8_t*>(s->data_); |
15 | 12 | uint8_t* in_end = reinterpret_cast<uint8_t*>(s->data_ + len(s)); |
16 | | |
17 | 12 | buf->WriteConst("b'"); |
18 | | |
19 | | // Set up pointers after writing opening quote |
20 | 12 | uint8_t* out = buf->LengthPointer(); // mutated |
21 | 12 | uint8_t* out_end = buf->CapacityPointer(); |
22 | | |
23 | 20 | while (true) { |
24 | 20 | J8EncodeChunk(&in, in_end, &out, out_end, true); // Fill as much as we can |
25 | 20 | buf->SetLengthFrom(out); |
26 | | |
27 | 20 | if (in >= in_end) { |
28 | 12 | break; |
29 | 12 | } |
30 | | |
31 | | // Same growth policy as below |
32 | 8 | capacity = capacity * 3 / 2; |
33 | | // printf("[2] new capacity %d\n", capacity); |
34 | 8 | buf->EnsureMoreSpace(capacity); |
35 | | |
36 | | // Recompute pointers |
37 | 8 | out = buf->LengthPointer(); |
38 | 8 | out_end = buf->CapacityPointer(); |
39 | 8 | } |
40 | | |
41 | 12 | buf->WriteConst("'"); |
42 | 12 | } |
43 | | |
44 | 0 | void WriteBashDollarString(BigStr* s, mylib::BufWriter* buf, int capacity) { |
45 | 0 | uint8_t* in = reinterpret_cast<uint8_t*>(s->data_); |
46 | 0 | uint8_t* in_end = reinterpret_cast<uint8_t*>(s->data_ + len(s)); |
47 | |
|
48 | 0 | buf->WriteConst("$'"); |
49 | | |
50 | | // Set up pointers after writing opening quote |
51 | 0 | uint8_t* out = buf->LengthPointer(); // mutated |
52 | 0 | uint8_t* out_end = buf->CapacityPointer(); |
53 | |
|
54 | 0 | while (true) { |
55 | 0 | BashDollarEncodeChunk(&in, in_end, &out, |
56 | 0 | out_end); // Fill as much as we can |
57 | 0 | buf->SetLengthFrom(out); |
58 | |
|
59 | 0 | if (in >= in_end) { |
60 | 0 | break; |
61 | 0 | } |
62 | | |
63 | | // Same growth policy as below |
64 | 0 | capacity = capacity * 3 / 2; |
65 | | // printf("[2] new capacity %d\n", capacity); |
66 | 0 | buf->EnsureMoreSpace(capacity); |
67 | | |
68 | | // Recompute pointers |
69 | 0 | out = buf->LengthPointer(); |
70 | 0 | out_end = buf->CapacityPointer(); |
71 | 0 | } |
72 | |
|
73 | 0 | buf->WriteConst("'"); |
74 | 0 | } |
75 | | |
76 | | // Style is COPIED from pyj8::WriteString() |
77 | | // Functionality is like j8_libc.c ShellEncodeString, that is: |
78 | | // |
79 | | // call BourneShellEncodeChunk() |
80 | | // then either |
81 | | // WriteBString() |
82 | | // WriteBashDollarString() |
83 | | |
84 | 0 | void ShellEncodeString(BigStr* s, int ysh_fallback, mylib::BufWriter* buf) { |
85 | 0 | uint8_t* in = reinterpret_cast<uint8_t*>(s->data_); |
86 | 0 | uint8_t* in_end = reinterpret_cast<uint8_t*>(s->data_ + len(s)); |
87 | | |
88 | | // Growth policy: Start at a fixed size max(N + 3 + 2, J8_MIN_CAPACITY) |
89 | 0 | int capacity = len(s) + 3 + 2; // 3 for quotes, 2 potential \" \n |
90 | 0 | if (capacity < J8_MIN_CAPACITY) { // account for J8_MAX_BYTES_PER_INPUT_BYTE |
91 | 0 | capacity = J8_MIN_CAPACITY; |
92 | 0 | } |
93 | | // printf("[1] capacity %d\n", capacity); |
94 | |
|
95 | 0 | buf->EnsureMoreSpace(capacity); |
96 | |
|
97 | 0 | int begin = buf->Length(); // maybe Truncate to this position |
98 | 0 | buf->WriteConst("'"); |
99 | | |
100 | | // Set up pointers after writing opening quote |
101 | 0 | uint8_t* out = buf->LengthPointer(); // mutated |
102 | 0 | uint8_t* out_end = buf->CapacityPointer(); |
103 | |
|
104 | 0 | while (true) { |
105 | | // Fill in as much as we can |
106 | 0 | int cannot_encode = BourneShellEncodeChunk(&in, in_end, &out, out_end); |
107 | 0 | if (cannot_encode) { |
108 | 0 | buf->Truncate(begin); |
109 | 0 | if (ysh_fallback) { |
110 | 0 | WriteBString(s, buf, capacity); // fall back to b'' |
111 | 0 | } else { |
112 | 0 | WriteBashDollarString(s, buf, capacity); // fall back to $'' |
113 | 0 | } |
114 | 0 | return; |
115 | 0 | } |
116 | 0 | buf->SetLengthFrom(out); |
117 | | |
118 | | // printf("[1] len %d\n", out_buf->len); |
119 | |
|
120 | 0 | if (in >= in_end) { |
121 | 0 | break; |
122 | 0 | } |
123 | | |
124 | | // Growth policy: every time through the loop, increase 1.5x |
125 | | // |
126 | | // The worst blowup is 6x, and 1.5 ** 5 > 6, so it will take 5 reallocs. |
127 | | // This seems like a reasonable tradeoff between over-allocating and too |
128 | | // many realloc(). |
129 | 0 | capacity = capacity * 3 / 2; |
130 | | // printf("[1] new capacity %d\n", capacity); |
131 | 0 | buf->EnsureMoreSpace(capacity); |
132 | | |
133 | | // Recompute pointers |
134 | 0 | out = buf->LengthPointer(); // mutated |
135 | 0 | out_end = buf->CapacityPointer(); |
136 | | // printf("[1] out %p out_end %p\n", out, out_end); |
137 | 0 | } |
138 | | |
139 | 0 | buf->WriteConst("'"); |
140 | 0 | } |
141 | | |
142 | | } // namespace |
143 | | |
144 | | namespace fastfunc { |
145 | | |
146 | 82 | bool CanOmitQuotes(BigStr* s) { |
147 | 82 | return ::CanOmitQuotes(reinterpret_cast<unsigned char*>(s->data_), len(s)); |
148 | 82 | } |
149 | | |
150 | 35 | BigStr* J8EncodeString(BigStr* s, int j8_fallback) { |
151 | 35 | auto buf = Alloc<mylib::BufWriter>(); |
152 | 35 | int options = j8_fallback ? 0 : LOSSY_JSON; |
153 | 35 | pyj8::WriteString(s, options, buf); |
154 | 35 | return buf->getvalue(); |
155 | 35 | } |
156 | | |
157 | 0 | BigStr* ShellEncodeString(BigStr* s, int ysh_fallback) { |
158 | 0 | auto buf = Alloc<mylib::BufWriter>(); |
159 | 0 | ::ShellEncodeString(s, ysh_fallback, buf); |
160 | 0 | return buf->getvalue(); |
161 | 0 | } |
162 | | |
163 | 8 | Tuple2<int, int> Utf8DecodeOne(BigStr* s, int start) { |
164 | | // Bounds check for safety |
165 | 8 | DCHECK(0 <= start && start < len(s)); |
166 | | |
167 | 0 | const unsigned char* string = reinterpret_cast<unsigned char*>(s->data()); |
168 | | |
169 | 8 | Utf8Result_t decode_result; |
170 | 8 | utf8_decode(string + start, &decode_result); |
171 | 8 | int32_t codepoint_or_error; |
172 | 8 | if (decode_result.error) { |
173 | 5 | codepoint_or_error = -decode_result.error; |
174 | 5 | } else { |
175 | 3 | codepoint_or_error = decode_result.codepoint; |
176 | 3 | } |
177 | | |
178 | 8 | return Tuple2<int, int>(codepoint_or_error, decode_result.bytes_read); |
179 | 8 | } |
180 | | |
181 | | } // namespace fastfunc |
182 | | |
183 | | namespace pyj8 { |
184 | | |
185 | 5 | bool PartIsUtf8(BigStr* s, int start, int end) { |
186 | 5 | Utf8Result result; |
187 | | |
188 | 9 | for (int i = start; i < end;) { |
189 | 6 | utf8_decode(reinterpret_cast<unsigned char*>(s->data_ + i), &result); |
190 | 6 | if (result.error) { |
191 | 2 | return false; |
192 | 2 | } |
193 | | |
194 | 4 | i += result.bytes_read; |
195 | 4 | } |
196 | | |
197 | 3 | return true; |
198 | 5 | } |
199 | | |
200 | 79 | void WriteString(BigStr* s, int options, mylib::BufWriter* buf) { |
201 | 79 | bool j8_fallback = !(options & LOSSY_JSON); |
202 | | |
203 | 79 | uint8_t* in = reinterpret_cast<uint8_t*>(s->data_); |
204 | 79 | uint8_t* in_end = reinterpret_cast<uint8_t*>(s->data_ + len(s)); |
205 | | |
206 | | // Growth policy: Start at a fixed size max(N + 3 + 2, J8_MIN_CAPACITY) |
207 | 79 | int capacity = len(s) + 3 + 2; // 3 for quotes, 2 potential \" \n |
208 | 79 | if (capacity < J8_MIN_CAPACITY) { // account for J8_MAX_BYTES_PER_INPUT_BYTE |
209 | 55 | capacity = J8_MIN_CAPACITY; |
210 | 55 | } |
211 | | // printf("[1] capacity %d\n", capacity); |
212 | | |
213 | 79 | buf->EnsureMoreSpace(capacity); |
214 | | |
215 | 79 | int begin = buf->Length(); // maybe Truncate to this position |
216 | 79 | buf->WriteConst("\""); |
217 | | |
218 | | // Set up pointers after writing opening quote |
219 | 79 | uint8_t* out = buf->LengthPointer(); // mutated |
220 | 79 | uint8_t* out_end = buf->CapacityPointer(); |
221 | | |
222 | 111 | while (true) { |
223 | | // Fill in as much as we can |
224 | 111 | int invalid_utf8 = J8EncodeChunk(&in, in_end, &out, out_end, false); |
225 | 111 | if (invalid_utf8 && j8_fallback) { |
226 | 12 | buf->Truncate(begin); |
227 | 12 | WriteBString(s, buf, capacity); // fall back to b'' |
228 | 12 | return; |
229 | 12 | } |
230 | 99 | buf->SetLengthFrom(out); |
231 | | |
232 | | // printf("[1] len %d\n", out_buf->len); |
233 | | |
234 | 99 | if (in >= in_end) { |
235 | 67 | break; |
236 | 67 | } |
237 | | |
238 | | // Growth policy: every time through the loop, increase 1.5x |
239 | | // |
240 | | // The worst blowup is 6x, and 1.5 ** 5 > 6, so it will take 5 reallocs. |
241 | | // This seems like a reasonable tradeoff between over-allocating and too |
242 | | // many realloc(). |
243 | 32 | capacity = capacity * 3 / 2; |
244 | | // printf("[1] new capacity %d\n", capacity); |
245 | 32 | buf->EnsureMoreSpace(capacity); |
246 | | |
247 | | // Recompute pointers |
248 | 32 | out = buf->LengthPointer(); // mutated |
249 | 32 | out_end = buf->CapacityPointer(); |
250 | | // printf("[1] out %p out_end %p\n", out, out_end); |
251 | 32 | } |
252 | | |
253 | 67 | buf->WriteConst("\""); |
254 | 67 | } |
255 | | |
256 | | } // namespace pyj8 |
257 | | |
258 | | namespace j8 { |
259 | | |
260 | 2 | int HeapValueId(value_asdl::value_t* val) { |
261 | 2 | #ifndef OPTIMIZED |
262 | | // ASDL generates headers with HeapTag::Scanned, but HeapTag::FixedSize would |
263 | | // also be valid. |
264 | 2 | ObjHeader* h = ObjHeader::FromObject(val); |
265 | 2 | DCHECK(h->heap_tag == HeapTag::Scanned || h->heap_tag == HeapTag::FixedSize); |
266 | 0 | #endif |
267 | | |
268 | 0 | return ObjectId(val); |
269 | 2 | } |
270 | | |
271 | | } // namespace j8 |