cpp

Coverage Report

Created: 2023-11-29 23:45

/home/andy/git/oilshell/oil/mycpp/gc_str.h
Line
Count
Source (jump to first uncovered line)
1
#ifndef MYCPP_GC_STR_H
2
#define MYCPP_GC_STR_H
3
4
#include "mycpp/common.h"  // DISALLOW_COPY_AND_ASSIGN
5
#include "mycpp/gc_obj.h"  // GC_OBJ
6
#include "mycpp/hash.h"    // HashFunc
7
8
template <typename T>
9
class List;
10
11
class BigStr {
12
 public:
13
  // Don't call this directly.  Call NewStr() instead, which calls this.
14
5.39k
  BigStr() {
15
5.39k
  }
16
17
530
  char* data() {
18
530
    return data_;
19
530
  }
20
21
  // Call this after writing into buffer created by OverAllocatedStr()
22
  void MaybeShrink(int str_len);
23
24
  BigStr* at(int i);
25
26
  int find(BigStr* needle, int pos = 0);
27
  int rfind(BigStr* needle);
28
29
  BigStr* slice(int begin);
30
  BigStr* slice(int begin, int end);
31
  BigStr* slice(int begin, int end, int step);
32
33
  BigStr* strip();
34
  // Used for CommandSub in osh/cmd_exec.py
35
  BigStr* rstrip(BigStr* chars);
36
  BigStr* rstrip();
37
38
  BigStr* lstrip(BigStr* chars);
39
  BigStr* lstrip();
40
41
  BigStr* ljust(int width, BigStr* fillchar);
42
  BigStr* rjust(int width, BigStr* fillchar);
43
44
  bool startswith(BigStr* s);
45
  bool endswith(BigStr* s);
46
47
  BigStr* replace(BigStr* old, BigStr* new_str);
48
  BigStr* join(List<BigStr*>* items);
49
50
  List<BigStr*>* split(BigStr* sep);
51
  List<BigStr*>* split(BigStr* sep, int max_split);
52
  List<BigStr*>* splitlines(bool keep);
53
54
  bool isdigit();
55
  bool isalpha();
56
  bool isupper();
57
58
  BigStr* upper();
59
  BigStr* lower();
60
61
  // for raw_input() to look like GNU readline
62
  void RemoveNewlineHack();
63
64
  // Other options for fast comparison / hashing / string interning:
65
  // - unique_id_: an index into intern table.  I don't think this works unless
66
  //   you want to deal with rehashing all strings when the set grows.
67
  //   - although note that the JVM has -XX:StringTableSize=FIXED, which means
68
  //   - it can degrade into linked list performance
69
  // - Hashed strings become GLOBAL_STR().  Never deallocated.
70
  // - Hashed strings become part of the "large object space", which might be
71
  //   managed by mark and sweep.  This requires linked list overhead.
72
  //   (doubly-linked?)
73
  // - Intern strings at GARBAGE COLLECTION TIME, with
74
  //   LayoutForwarded::new_location_?  Is this possible?  Does it introduce
75
  //   too much coupling between strings, hash tables, and GC?
76
77
5.39k
  static constexpr ObjHeader obj_header() {
78
5.39k
    return ObjHeader::BigStr();
79
5.39k
  }
80
81
  unsigned hash(HashFunc h);
82
83
  int len_;
84
  unsigned hash_ : 31;
85
  unsigned is_hashed_ : 1;
86
  char data_[1];  // flexible array
87
88
 private:
89
  int _strip_left_pos();
90
  int _strip_right_pos();
91
92
  DISALLOW_COPY_AND_ASSIGN(BigStr)
93
};
94
95
constexpr int kStrHeaderSize = offsetof(BigStr, data_);
96
97
// Note: for SmallStr, we might copy into the VALUE
98
156
inline void BigStr::MaybeShrink(int str_len) {
99
156
  len_ = str_len;
100
156
  data_[len_] = '\0';  // NUL terminate
101
156
}
102
103
13.4k
inline int len(const BigStr* s) {
104
13.4k
  return s->len_;
105
13.4k
}
106
107
BigStr* StrFormat(const char* fmt, ...);
108
BigStr* StrFormat(BigStr* fmt, ...);
109
110
// NOTE: This iterates over bytes.
111
class StrIter {
112
 public:
113
126
  explicit StrIter(BigStr* s) : s_(s), i_(0), len_(len(s)) {
114
    // Cheney only: s_ could be moved during iteration.
115
    // gHeap.PushRoot(reinterpret_cast<RawObject**>(&s_));
116
126
  }
117
126
  ~StrIter() {
118
    // gHeap.PopRoot();
119
126
  }
120
158
  void Next() {
121
158
    i_++;
122
158
  }
123
284
  bool Done() {
124
284
    return i_ >= len_;
125
284
  }
126
  BigStr* Value();  // similar to at()
127
128
 private:
129
  BigStr* s_;
130
  int i_;
131
  int len_;
132
133
  DISALLOW_COPY_AND_ASSIGN(StrIter)
134
};
135
136
bool maybe_str_equals(BigStr* left, BigStr* right);
137
138
extern BigStr* kEmptyString;
139
140
// GlobalStr notes:
141
// - sizeof("foo") == 4, for the NUL terminator.
142
// - gc_heap_test.cc has a static_assert that GlobalStr matches BigStr.  We
143
// don't put it here because it triggers -Winvalid-offsetof
144
145
template <int N>
146
class GlobalStr {
147
  // A template type with the same layout as BigStr with length N-1 (which needs
148
  // a buffer of size N).  For initializing global constant instances.
149
 public:
150
  int len_;
151
  unsigned hash_ : 31;
152
  unsigned is_hashed_ : 1;
153
  const char data_[N];
154
155
  DISALLOW_COPY_AND_ASSIGN(GlobalStr)
156
};
157
158
union Str {
159
 public:
160
  // Instead of this at the start of every function:
161
  //   Str* s = nullptr;
162
  // It will now be:
163
  //   Str s(nullptr);
164
  //
165
  //   StackRoot _root(&s);
166
12
  explicit Str(BigStr* big) : big_(big) {
167
12
  }
168
169
10
  char* data() {
170
10
    return big_->data();
171
10
  }
172
173
10
  Str at(int i) {
174
10
    return Str(big_->at(i));
175
10
  }
176
177
0
  Str upper() {
178
0
    return Str(big_->upper());
179
0
  }
180
181
  uint64_t raw_bytes_;
182
  BigStr* big_;
183
  // TODO: add SmallStr, see mycpp/small_str_test.cc
184
};
185
186
12
inline int len(const Str s) {
187
12
  return len(s.big_);
188
12
}
189
190
// This macro is a workaround for the fact that it's impossible to have a
191
// a constexpr initializer for char[N].  The "String Literals as Non-Type
192
// Template Parameters" feature of C++ 20 would have done it, but it's not
193
// there.
194
//
195
// https://old.reddit.com/r/cpp_questions/comments/j0khh6/how_to_constexpr_initialize_class_member_thats/
196
// https://stackoverflow.com/questions/10422487/how-can-i-initialize-char-arrays-in-a-constructor
197
//
198
// TODO: Can we hash values at compile time so they can be in the intern table?
199
200
#define GLOBAL_STR(name, val)                                                \
201
  GcGlobal<GlobalStr<sizeof(val)>> _##name = {                               \
202
      ObjHeader::Global(TypeTag::BigStr),                                    \
203
      {.len_ = sizeof(val) - 1, .hash_ = 0, .is_hashed_ = 0, .data_ = val}}; \
204
  BigStr* name = reinterpret_cast<BigStr*>(&_##name.obj);
205
206
// New style for SmallStr compatibility
207
#define GLOBAL_STR2(name, val)                                               \
208
  GcGlobal<GlobalStr<sizeof(val)>> _##name = {                               \
209
      ObjHeader::Global(TypeTag::BigStr),                                    \
210
      {.len_ = sizeof(val) - 1, .hash_ = 0, .is_hashed_ = 0, .data_ = val}}; \
211
  Str name(reinterpret_cast<BigStr*>(&_##name.obj));
212
213
#endif  // MYCPP_GC_STR_H