The Pedigree Project  0.1
String.cc
1 /*
2  * Copyright (c) 2008-2014, Pedigree Developers
3  *
4  * Please see the CONTRIB file in the root of the source tree for a full
5  * list of contributors.
6  *
7  * Permission to use, copy, modify, and distribute this software for any
8  * purpose with or without fee is hereby granted, provided that the above
9  * copyright notice and this permission notice appear in all copies.
10  *
11  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
12  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
13  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
14  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
15  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
16  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
17  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
18  */
19 
20 #include "pedigree/kernel/utilities/String.h"
21 #include "pedigree/kernel/Log.h"
22 #include "pedigree/kernel/utilities/StringView.h"
23 #include "pedigree/kernel/utilities/assert.h"
24 #include "pedigree/kernel/utilities/utility.h"
25 #include <stdarg.h>
26 
28  : m_Data(0), m_ConstData(nullptr), m_Length(0), m_Size(StaticSize),
29  m_HeapData(true), m_Hash(0)
30 {
31  m_Static[0] = '\0';
32 }
33 
34 String::String(const char *s) : String()
35 {
36  assign(s);
37 }
38 
39 String::String(const char *s, size_t length) : String()
40 {
41  assign(s, length);
42 }
43 
44 #if !STRING_DISABLE_COPY_CONSTRUCTION
45 String::String(const String &x) : String()
46 {
47  assign(x);
48 }
49 #endif
50 
52 {
53  move(pedigree_std::move(x));
54 }
55 
56 String::~String()
57 {
58  free();
59 }
60 
61 void String::move(String &&other)
62 {
63  // take ownership of the object
64  m_Data = pedigree_std::move(other.m_Data);
65  m_ConstData = pedigree_std::move(other.m_ConstData);
66  m_Length = pedigree_std::move(other.m_Length);
67  m_Size = pedigree_std::move(other.m_Size);
68  m_HeapData = pedigree_std::move(other.m_HeapData);
69  m_Hash = pedigree_std::move(other.m_Hash);
70  if (m_Size == StaticSize)
71  {
72  MemoryCopy(m_Static, other.m_Static, m_Length + 1);
73  }
74 
75  // free other string but don't destroy the heap pointer if we had one
76  // as it is now owned by this new instance
77  other.m_Data = 0;
78  other.free();
79 }
80 
81 String &String::operator=(String &&x)
82 {
83  move(pedigree_std::move(x));
84  return *this;
85 }
86 
87 #if !STRING_DISABLE_COPY_CONSTRUCTION
88 String &String::operator=(const String &x)
89 {
90  assign(x);
91  return *this;
92 }
93 
94 String &String::operator=(const char *s)
95 {
96  assign(s);
97  return *this;
98 }
99 #endif
100 
101 String &String::operator+=(const String &x)
102 {
103  // Switch from const to dynamic string.
104  if (!m_HeapData)
105  {
106  assign(m_ConstData, m_Length);
107  }
108 
109  size_t newLength = x.length() + m_Length;
110 
111  char *dst = m_Static;
112 
113  // Do we need to transfer static into dynamic for this?
114  if (newLength >= StaticSize)
115  {
116  reserve(newLength + 1);
117  if (m_Length < StaticSize)
118  MemoryCopy(m_Data, m_Static, m_Length);
119  dst = m_Data;
120  }
121 
122  const char *src = x.m_Static;
123  if (x.length() > StaticSize)
124  src = x.m_Data;
125 
126  // Copy!
127  MemoryCopy(&dst[m_Length], src, x.length() + 1);
128  m_Length += x.length();
129  m_Hash = 0; // hash is no longer valid
130 #if STRING_DISABLE_JIT_HASHING
131  computeHash();
132 #endif
133  return *this;
134 }
135 
136 String &String::operator+=(const char *s)
137 {
138  // Switch from const to dynamic string.
139  if (!m_HeapData)
140  {
141  assign(m_ConstData, m_Length);
142  }
143 
144  size_t slen = StringLength(s);
145  size_t newLength = slen + m_Length;
146  if (newLength < StaticSize)
147  {
148  // By the nature of the two lengths combined being below the static
149  // size, we can be assured that we can use the static buffer in
150  // both strings.
151  MemoryCopy(&m_Static[m_Length], s, slen + 1);
152  }
153  else
154  {
155  reserve(slen + m_Length + 1);
156  if (m_Length < StaticSize)
157  MemoryCopy(m_Data, m_Static, m_Length);
158  MemoryCopy(&m_Data[m_Length], s, slen + 1);
159  }
160 
161  m_Length += slen;
162  m_Hash = 0;
163 #if STRING_DISABLE_JIT_HASHING
164  computeHash();
165 #endif
166  return *this;
167 }
168 
169 bool String::operator==(const String &s) const
170 {
171  if (m_Length != s.m_Length)
172  {
173  return false;
174  }
175  else if (m_Hash && (m_Hash != s.hash()))
176  {
177  // precomputed hash didn't match, don't bother
178  return false;
179  }
180 
181  const char *buf = extract();
182  const char *other_buf = s.extract();
183 
184  // Neither of these can be null because of the above conditions.
185  return !StringMatchN(buf, other_buf, m_Length + 1);
186 }
187 
188 bool String::operator==(const StringView &s) const
189 {
190  // use StringView::operator==(const String &)
191  return s == *this;
192 }
193 
194 bool String::operator==(const char *s) const
195 {
196  const char *buf = extract();
197 
198  if ((!m_Length) && (s == 0))
199  {
200  return true;
201  }
202  else if (s == 0)
203  {
204  // m_Length > 0 but other buffer is null.
205  return false;
206  }
207  else if ((!m_Length) && *s)
208  {
209  // Quick check when we're zero-length.
210  return false;
211  }
212  else
213  {
214  return StringMatchN(buf, s, m_Length + 1) == 0;
215  }
216 }
217 
218 uint32_t String::hash() const
219 {
220  if (!m_Hash)
221  {
222  return computeHash();
223  }
224 
225  return m_Hash;
226 }
227 
228 uint32_t String::hash()
229 {
230  if (!m_Hash)
231  {
232  computeHash();
233  }
234 
235  return m_Hash;
236 }
237 
238 size_t String::nextCharacter(size_t c) const
239 {
240  const char *buf = extract();
241  return ::nextCharacter(buf, c);
242 }
243 
244 size_t String::prevCharacter(size_t c) const
245 {
246  const char *buf = extract();
247  return ::prevCharacter(buf, c);
248 }
249 
250 void String::assign(const String &x)
251 {
252  m_Length = x.length();
253  if (m_Length < StaticSize)
254  {
255  MemoryCopy(m_Static, x.m_Static, m_Length + 1);
256  if (m_HeapData)
257  {
258  delete[] m_Data;
259  }
260  m_Data = 0;
261  m_Size = StaticSize;
262  }
263  else
264  {
265  // Length is bigger than a static buffer, no need to check for empty
266  // buffer.
267  reserve(m_Length + 1, false);
268  MemoryCopy(m_Data, x.m_Data, m_Length + 1);
269  }
270 
271  m_HeapData = true;
272  // m_ConstData = nullptr;
273 
274  // no need to recompute in this case
275  m_Hash = x.m_Hash;
276 
277 #ifdef ADDITIONAL_CHECKS
278  if (*this != x)
279  {
280  ERROR("mismatch: '" << *this << "' != '" << x << "'");
281  if (m_ConstData)
282  {
283  ERROR("const data was " << m_ConstData);
284  }
285  }
286  assert(*this == x);
287 #endif
288 }
289 
290 void String::assign(const char *s, size_t len, bool unsafe)
291 {
292  size_t copyLength = 0;
293  // len overrides all other optimizations
294  if (len)
295  {
296  // Fix up length if the passed string is much smaller than the 'len'
297  // parameter (otherwise we think we have a giant string).
298  if (!unsafe)
299  {
300  size_t trueLength = StringLength(s);
301  if (trueLength < len)
302  {
303  len = trueLength;
304  }
305  }
306  m_Length = len;
307  copyLength = len;
308  }
309  else if (!s || !*s)
310  {
311  m_Length = 0;
312  }
313  else
314  {
315  m_Length = StringLength(s);
316  copyLength = m_Length;
317  }
318 
319  if (!m_Length)
320  {
321  ByteSet(m_Static, 0, StaticSize);
322  if (m_HeapData)
323  {
324  delete[] m_Data;
325  }
326  m_Data = 0;
327  m_Size = StaticSize;
328  }
329  else if (m_Length < StaticSize)
330  {
331  MemoryCopy(m_Static, s, copyLength);
332  if (m_HeapData)
333  {
334  delete[] m_Data;
335  }
336  m_Data = 0;
337  m_Size = StaticSize;
338  m_Static[copyLength] = '\0';
339  }
340  else
341  {
342  reserve(m_Length + 1, false);
343  MemoryCopy(m_Data, s, copyLength);
344  m_Data[copyLength] = '\0';
345  }
346 
347  m_HeapData = true;
348  m_ConstData = nullptr;
349 
350 #ifdef ADDITIONAL_CHECKS
351  if (!len)
352  {
353  assert(*this == s);
354  }
355 #endif
356 
357  m_Hash = 0;
358 #if STRING_DISABLE_JIT_HASHING
359  computeHash();
360 #endif
361 }
362 
363 void String::reserve(size_t size)
364 {
365  reserve(size, true);
366 }
367 
368 void String::reserve(size_t size, bool zero)
369 {
370  // Don't reserve if we're a static string.
371  if (size <= StaticSize)
372  {
373  if (m_Size > StaticSize)
374  {
375  m_Size = StaticSize;
376  MemoryCopy(m_Static, m_Data, size);
377  if (m_HeapData)
378  {
379  delete[] m_Data;
380  }
381  m_Data = 0;
382  }
383 
384  return;
385  }
386  else if (size > m_Size)
387  {
388  char *tmp = m_Data;
389  m_Data = new char[size];
390  if (tmp)
391  {
392  MemoryCopy(m_Data, tmp, m_Size > size ? size : m_Size);
393  if (m_HeapData)
394  {
395  delete[] tmp;
396  }
397  }
398  else if (zero)
399  {
400  ByteSet(m_Data, 0, size);
401  }
402  m_Size = size;
403  }
404 }
406 {
407  if (m_HeapData && m_Data)
408  {
409  delete[] m_Data;
410  }
411  m_Static[0] = '\0';
412  m_Data = 0;
413  m_Length = 0;
414  m_Size = 0;
415  m_Hash = 0;
416 }
417 
418 String String::split(size_t offset)
419 {
420  String result;
421  split(offset, result);
422  return result;
423 }
424 
425 void String::split(size_t offset, String &back)
426 {
427  if (offset >= m_Length)
428  {
429  back.free();
430  return;
431  }
432 
433  char *buf = extract();
434 
435  back.assign(&buf[offset]);
436  m_Length = offset;
437 
438  // Handle the case where the split causes our string to suddenly be shorter
439  // than the static size.
440  if ((m_Length < StaticSize) && (buf == m_Data))
441  {
442  MemoryCopy(m_Static, buf, m_Length);
443  buf = m_Static;
444  if (m_HeapData)
445  {
446  delete[] m_Data;
447  }
448  m_Data = 0;
449  m_Size = StaticSize;
450  }
451 
452  buf[m_Length] = 0;
453 
454  m_Hash = 0;
455 #if STRING_DISABLE_JIT_HASHING
456  computeHash();
457 #endif
458 }
459 
461 {
462  lstrip();
463  rstrip();
464 }
465 
467 {
468  char *buf = extract();
469 
470  if (!iswhitespace(buf[0]))
471  return;
472 
473  // finish up the byte tail
474  size_t n = 0;
475  while (n < m_Length && iswhitespace(buf[n]))
476  n++;
477 
478  // Move the data to cover up the whitespace and avoid reallocating m_Data
479  m_Length -= n;
480  MemoryCopy(buf, (buf + n), m_Length);
481  buf[m_Length] = 0;
482 
483  // Did we suddenly drop below the static size?
484  if ((buf == m_Data) && (m_Length < StaticSize))
485  {
486  MemoryCopy(m_Static, m_Data, m_Length + 1);
487  m_Size = StaticSize;
488  if (m_HeapData)
489  {
490  delete[] m_Data;
491  }
492  m_Data = 0;
493  }
494 
495  m_Hash = 0;
496 #if STRING_DISABLE_JIT_HASHING
497  computeHash();
498 #endif
499 }
500 
502 {
503  char *buf = extract();
504 
505  if (!iswhitespace(buf[m_Length - 1]))
506  return;
507 
508  size_t n = m_Length;
509  while (n > 0 && iswhitespace(buf[n - 1]))
510  n--;
511 
512  // m_Size is still valid - it's the size of the buffer. m_Length is now
513  // updated to contain the proper length of the string, but the buffer is
514  // not reallocated.
515  m_Length = n;
516  buf[m_Length] = 0;
517 
518  // Did we suddenly drop below the static size?
519  if ((buf == m_Data) && (m_Length < StaticSize))
520  {
521  MemoryCopy(m_Static, m_Data, m_Length + 1);
522  m_Size = StaticSize;
523  if (m_HeapData)
524  {
525  delete[] m_Data;
526  }
527  m_Data = 0;
528  }
529 
530  m_Hash = 0;
531 #if STRING_DISABLE_JIT_HASHING
532  computeHash();
533 #endif
534 }
535 
536 Vector<String> String::tokenise(char token)
537 {
538  Vector<String> list;
539  tokenise(token, list);
540  return list;
541 }
542 
543 size_t String::Utf32ToUtf8(uint32_t utf32, char *utf8)
544 {
545  // clear out the string before conversion
546  ByteSet(utf8, 0, 4);
547 
548  size_t nbuf = 0;
549  if (utf32 <= 0x7F)
550  {
551  utf8[0] = utf32 & 0x7F;
552  nbuf = 1;
553  }
554  else if (utf32 <= 0x7FF)
555  {
556  utf8[0] = 0xC0 | ((utf32 >> 6) & 0x1F);
557  utf8[1] = 0x80 | (utf32 & 0x3F);
558  nbuf = 2;
559  }
560  else if (utf32 <= 0xFFFF)
561  {
562  utf8[0] = 0xE0 | ((utf32 >> 12) & 0x0F);
563  utf8[1] = 0x80 | ((utf32 >> 6) & 0x3F);
564  utf8[2] = 0x80 | (utf32 & 0x3F);
565  nbuf = 3;
566  }
567  else if (utf32 <= 0x10FFFF)
568  {
569  utf8[0] = 0xF0 | ((utf32 >> 18) & 0x07);
570  utf8[1] = 0x80 | ((utf32 >> 12) & 0x3F);
571  utf8[2] = 0x80 | ((utf32 >> 6) & 0x3F);
572  utf8[3] = 0x80 | (utf32 & 0x3F);
573  nbuf = 4;
574  }
575 
576  return nbuf;
577 }
578 
579 void String::tokenise(char token, Vector<StringView> &output) const
580 {
581  const char *orig_buffer = extract();
582  const char *buffer = orig_buffer;
583 
584  output.clear();
585 
586  const char *pos = buffer ? StringFind(buffer, token) : nullptr;
587  while (pos && (*buffer))
588  {
589  if (pos == buffer)
590  {
591  ++buffer;
592  continue;
593  }
594 
595  if (pos > buffer)
596  {
597  output.pushBack(StringView(buffer, pos - buffer));
598  }
599 
600  buffer = pos + 1;
601 
602  pos = StringFind(buffer, token);
603  }
604 
605  if (buffer && !pos)
606  {
607  // might be able to just copy this string rather than copy & move
608  if (buffer == orig_buffer)
609  {
610  output.pushBack(view());
611  }
612  else
613  {
614  size_t length = m_Length - (buffer - orig_buffer);
615  if (length)
616  {
617  output.pushBack(StringView(buffer, length));
618  }
619  }
620  }
621 }
622 
623 void String::tokenise(char token, Vector<String> &output) const
624 {
625  Vector<StringView> views;
626  tokenise(token, views);
627 
628  output.clear();
629  for (auto &it : views)
630  {
631  output.pushBack(it.toString());
632  }
633 }
634 
636 {
637  char *buf = extract();
638 
639  StringCopy(buf, &buf[1]);
640  --m_Length;
641 
642  // Did we suddenly drop below the static size?
643  if ((buf == m_Data) && (m_Length < StaticSize))
644  {
645  MemoryCopy(m_Static, m_Data, m_Length + 1);
646  m_Size = StaticSize;
647  if (m_HeapData)
648  {
649  delete[] m_Data;
650  }
651  m_Data = 0;
652  }
653 
654  m_Hash = 0;
655 #if STRING_DISABLE_JIT_HASHING
656  computeHash();
657 #endif
658 }
659 
661 {
662  char *buf = extract();
663 
664  m_Length--;
665  buf[m_Length] = '\0';
666 
667  // Did we suddenly drop below the static size?
668  if ((buf == m_Data) && (m_Length < StaticSize))
669  {
670  MemoryCopy(m_Static, m_Data, m_Length + 1);
671  m_Size = StaticSize;
672  if (m_HeapData)
673  {
674  delete[] m_Data;
675  }
676  m_Data = 0;
677  }
678 
679  m_Hash = 0;
680 #if STRING_DISABLE_JIT_HASHING
681  computeHash();
682 #endif
683 }
684 
685 void String::Format(const char *fmt, ...)
686 {
687  reserve(256);
688  va_list vl;
689  va_start(vl, fmt);
690  m_Length = VStringFormat(m_Data, fmt, vl);
691  va_end(vl);
692 
693  if (m_Length < StaticSize)
694  {
695  MemoryCopy(m_Static, m_Data, m_Length + 1);
696  m_Size = StaticSize;
697  if (m_HeapData)
698  {
699  delete[] m_Data;
700  }
701  m_Data = 0;
702  }
703 
704  m_Hash = 0;
705 #if STRING_DISABLE_JIT_HASHING
706  computeHash();
707 #endif
708 }
709 
710 bool String::endswith(const char c) const
711 {
712  if (!m_Length)
713  {
714  return false;
715  }
716 
717  const char *buf = extract();
718  return buf[m_Length - 1] == c;
719 }
720 
721 bool String::endswith(const String &s) const
722 {
723  // Not a suffix check.
724  if (m_Length == s.length())
725  return *this == s;
726 
727  const char *otherbuf = s.extract();
728  return endswith(otherbuf, s.length());
729 }
730 
731 bool String::endswith(const char *s, size_t len) const
732 {
733  if (!len)
734  {
735  len = StringLength(s);
736  }
737 
738  // Suffix exceeds our length.
739  if (m_Length < len)
740  return false;
741 
742  const char *mybuf = extract();
743  mybuf += m_Length - len;
744 
745  return !MemoryCompare(mybuf, s, len);
746 }
747 
748 bool String::startswith(const char c) const
749 {
750  if (!m_Length)
751  {
752  return false;
753  }
754 
755  const char *buf = extract();
756  return buf[0] == c;
757 }
758 
759 bool String::startswith(const String &s) const
760 {
761  // Not a prefix check.
762  if (m_Length == s.length())
763  return *this == s;
764 
765  const char *otherbuf = s.extract();
766  return startswith(otherbuf, s.length());
767 }
768 
769 bool String::startswith(const char *s, size_t len) const
770 {
771  if (!len)
772  {
773  len = StringLength(s);
774  }
775 
776  // Prefix exceeds our length.
777  if (m_Length < len)
778  return false;
779 
780  const char *mybuf = extract();
781 
782  // Do the check.
783  return !MemoryCompare(mybuf, s, len);
784 }
785 
786 bool String::iswhitespace(const char c) const
787 {
788  return (c <= ' ' || c == '\x7f');
789 }
790 
791 char *String::extract() const
792 {
793  if (!m_HeapData)
794  {
795  return const_cast<char *>(m_ConstData);
796  }
797 
798  if (m_Length < StaticSize)
799  {
800  // const_cast because we don't have a side effect but need to return
801  // a pointer to our object regardless
802  return const_cast<char *>(m_Static);
803  }
804  else
805  {
806  return m_Data;
807  }
808 }
809 
810 ssize_t String::find(const char c) const
811 {
812  if (!m_Length)
813  return -1;
814 
818  ssize_t signedLength = m_Length;
819 
820  char *buf = extract();
821  for (ssize_t i = 0; i < signedLength; ++i)
822  {
823  if (buf[i] == c)
824  {
825  return i;
826  }
827  }
828 
829  return -1;
830 }
831 
832 ssize_t String::rfind(const char c) const
833 {
834  if (!m_Length)
835  return -1;
836 
837  char *buf = extract();
838  for (ssize_t i = m_Length - 1, n = 0; i >= 0; --i, ++n)
839  {
840  if (buf[i] == c)
841  {
842  return n;
843  }
844  }
845 
846  return -1;
847 }
848 
850 {
851  if (m_Length)
852  {
853  m_Hash = spookyHash(extract(), m_Length);
854  }
855  else
856  {
857  m_Hash = 0;
858  }
859 }
860 
861 uint32_t String::computeHash() const
862 {
863  if (m_Length)
864  {
865  return spookyHash(extract(), m_Length);
866  }
867  else
868  {
869  return 0;
870  }
871 }
872 
874 {
875  String result;
876  result.assign(*this);
877  return pedigree_std::move(result);
878 }
879 
881 {
882  // hash already calculated, enable hashing
883  return StringView(extract(), m_Length, m_Hash, true);
884 }
void chomp()
Definition: String.cc:660
const char * m_ConstData
Definition: String.h:206
void pushBack(const T &value)
Definition: Vector.h:270
String split(size_t offset)
Definition: String.cc:418
A vector / dynamic array.
bool iswhitespace(const char c) const
Definition: String.cc:786
String copy() const
Definition: String.cc:873
static constexpr const size_t StaticSize
Definition: String.h:202
uint32_t hash() const
Definition: String.cc:218
Definition: String.h:49
size_t nextCharacter(size_t c) const
Definition: String.cc:238
bool m_HeapData
Definition: String.h:214
StringView view() const
Definition: String.cc:880
char m_Static[StaticSize]
Definition: String.h:212
size_t m_Length
Definition: String.h:208
size_t prevCharacter(size_t c) const
Definition: String.cc:244
#define assert(x)
Definition: assert.h:37
char * m_Data
Definition: String.h:204
uint32_t m_Hash
Definition: String.h:219
ssize_t find(const char c) const
Definition: String.cc:810
String()
Definition: String.cc:27
void computeHash()
Definition: String.cc:849
void strip()
Definition: String.cc:460
size_t m_Size
Definition: String.h:210
void lstrip()
Definition: String.cc:466
void rstrip()
Definition: String.cc:501
static size_t Utf32ToUtf8(uint32_t utf32, char *utf8)
Definition: String.cc:543
void lchomp()
Definition: String.cc:635
#define ERROR(text)
Definition: Log.h:82
void clear(bool freeMem=false)
Definition: Vector.h:337
char * extract() const
Definition: String.cc:791
void move(String &&other)
Definition: String.cc:61
bool endswith(const char c) const
Definition: String.cc:710
bool startswith(const char c) const
Definition: String.cc:748
void free()
Definition: String.cc:405