The Pedigree Project  0.1
lib/string.c
1 /*
2  * Copyright (c) 2008-2014, Pedigree Developers
3  *
4  * Please see the CONTRIB file in the root of the source tree for a full
5  * list of contributors.
6  *
7  * Permission to use, copy, modify, and distribute this software for any
8  * purpose with or without fee is hereby granted, provided that the above
9  * copyright notice and this permission notice appear in all copies.
10  *
11  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
12  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
13  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
14  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
15  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
16  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
17  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
18  */
19 
20 #include "pedigree/kernel/compiler.h"
21 #include "pedigree/kernel/processor/types.h"
22 #include "pedigree/kernel/utilities/utility.h"
23 #include <stdarg.h>
24 #include <stddef.h>
25 
26 extern void *malloc(size_t);
27 extern void free(void *);
28 
29 EXPORTED_PUBLIC size_t strlen(const char *s);
30 char *strcpy(char *dest, const char *src);
31 char *strncpy(char *dest, const char *src, size_t len);
32 EXPORTED_PUBLIC int strcmp(const char *p1, const char *p2);
33 EXPORTED_PUBLIC int strncmp(const char *p1, const char *p2, size_t n);
34 char *strcat(char *dest, const char *src);
35 char *strncat(char *dest, const char *src, size_t n);
36 char *strchr(const char *str, int target);
37 char *strrchr(const char *str, int target);
38 int vsprintf(char *buf, const char *fmt, va_list arg);
39 unsigned long strtoul(const char *nptr, char const **endptr, int base);
40 
41 #define ULONG_MAX -1
42 
43 char toUpper(char c)
44 {
45  if (c < 'a' || c > 'z')
46  return c; // special chars
47  c += ('A' - 'a');
48  return c;
49 }
50 
51 char toLower(char c)
52 {
53  if (c < 'A' || c > 'Z')
54  return c; // special chars
55  c -= ('A' - 'a');
56  return c;
57 }
58 
59 int max(size_t a, size_t b)
60 {
61  return a > b ? a : b;
62 }
63 
64 int min(size_t a, size_t b)
65 {
66  return a > b ? b : a;
67 }
68 
69 WEAK size_t _StringLength(const char *src)
70 {
71  if (!src)
72  {
73  return 0;
74  }
75 
76  // Unrolled loop that still avoids reading past the end of src (instead of
77  // e.g. doing bitmasks with 64-bit views of src).
78  const char *orig = src;
79  size_t result = 0;
80  while (1)
81  {
82 #define UNROLL(n) \
83  if (!*(src + n)) \
84  return (src + n) - orig;
85  UNROLL(0);
86  UNROLL(1);
87  UNROLL(2);
88  UNROLL(3);
89  UNROLL(4);
90  UNROLL(5);
91  UNROLL(6);
92  UNROLL(7);
93 #undef UNROLL
94  src += 8;
95  }
96 }
97 
98 char *StringCopy(char *dest, const char *src)
99 {
100  char *orig_dest = dest;
101  while (*src)
102  {
103  *dest = *src;
104  ++dest;
105  ++src;
106  }
107  *dest = '\0';
108 
109  return orig_dest;
110 }
111 
112 char *StringCopyN(char *dest, const char *src, size_t len)
113 {
114  char *orig_dest = dest;
115  while (len && LIKELY(*src))
116  {
117  *dest = *src;
118  --len;
119  ++dest;
120  ++src;
121  }
122 
123  // zero-pad if we hit the end of src but len is still non-zero
124  while (len)
125  {
126  *dest = '\0';
127  --len;
128  ++dest;
129  }
130 
131  return orig_dest;
132 }
133 
134 int StringFormat(char *buf, const char *fmt, ...)
135 {
136  va_list args;
137  int i;
138 
139  va_start(args, fmt);
140  i = VStringFormat(buf, fmt, args);
141  va_end(args);
142 
143  return i;
144 }
145 
146 WEAK int StringCompare(const char *p1, const char *p2)
147 {
148  if (p1 == p2)
149  return 0;
150 
151  while (*p1 && *p2)
152  {
153  char c = *p1 - *p2;
154  if (c)
155  return c;
156  ++p1;
157  ++p2;
158  }
159 
160  return *p1 - *p2;
161 }
162 
163 WEAK int StringCompareN(const char *p1, const char *p2, size_t n)
164 {
165  if (!n)
166  return 0;
167  if (p1 == p2)
168  return 0;
169 
170  while (*p1 && *p2)
171  {
172  char c = *p1 - *p2;
173  if (c)
174  return c;
175  else if (!--n)
176  return *p1 - *p2;
177 
178  ++p1;
179  ++p2;
180  }
181 
182  return *p1 - *p2;
183 }
184 
185 WEAK int
186 StringCompareNOffset(const char *p1, const char *p2, size_t n, size_t *offset)
187 {
188  if (!n)
189  {
190  return 0;
191  }
192  if (p1 == p2)
193  {
194  return 0;
195  }
196 
197  size_t orig_n = n;
198 
199  while (*p1 && *p2)
200  {
201  char c = *p1 - *p2;
202  if (c || !--n)
203  break;
204 
205  ++p1;
206  ++p2;
207  }
208 
209  char c = *p1 - *p2;
210  if (c && offset)
211  {
212  *offset = orig_n - n;
213  }
214  return c;
215 }
216 
217 WEAK int StringMatch(const char *p1, const char *p2)
218 {
219  if (p1 == p2)
220  return 0;
221 
222  while (*p1 && *p2)
223  {
224  if (*p1 != *p2)
225  {
226  return 1;
227  }
228  ++p1;
229  ++p2;
230  }
231 
232  return (*p1 == *p2) ? 0 : 1;
233 }
234 
235 WEAK int StringMatchN(const char *p1, const char *p2, size_t n)
236 {
237  if (!n)
238  {
239  return 0;
240  }
241  else if (p1 == p2)
242  {
243  return 0;
244  }
245 
246  size_t i;
247  for (i = 0; i < n; ++i)
248  {
249  if (p1[i] != p2[i])
250  {
251  return 1;
252  }
253  }
254 
255  return 0;
256 }
257 
258 WEAK int
259 StringMatchNOffset(const char *p1, const char *p2, size_t n, size_t *offset)
260 {
261  if (!n)
262  {
263  return 0;
264  }
265  else if (p1 == p2)
266  {
267  return 0;
268  }
269 
270  size_t i;
271  for (i = 0; i < n; ++i)
272  {
273  if (p1[i] != p2[i])
274  {
275  *offset = i;
276  return 1;
277  }
278  }
279 
280  return 0;
281 }
282 
283 char *StringConcat(char *dest, const char *src)
284 {
285  char *origDest = dest;
286  while (*dest) ++dest;
287  while (src && *src)
288  {
289  *dest++ = *src++;
290  }
291 
292  *dest++ = 0;
293 
294  return origDest;
295 }
296 
297 char *StringConcatN(char *dest, const char *src, size_t n)
298 {
299  char *origDest = dest;
300  while (*dest) ++dest;
301  while (src && *src && n)
302  {
303  *dest++ = *src++;
304  --n;
305  }
306 
307  *dest++ = 0;
308 
309  return origDest;
310 }
311 
312 int isspace(int c)
313 {
314  return (c == ' ' || c == '\n' || c == '\r' || c == '\t');
315 }
316 
317 int isupper(int c)
318 {
319  return (c >= 'A' && c <= 'Z');
320 }
321 
322 int islower(int c)
323 {
324  return (c >= 'a' && c <= 'z');
325 }
326 
327 int isdigit(int c)
328 {
329  return (c >= '0' && c <= '9');
330 }
331 
332 int isalpha(int c)
333 {
334  return isupper(c) || islower(c) || isdigit(c);
335 }
336 
337 unsigned long
338 StringToUnsignedLong(const char *nptr, char const **endptr, int base)
339 {
340  register const char *s = nptr;
341  register unsigned long acc;
342  register int c;
343  register unsigned long cutoff;
344  register int neg = 0, any, cutlim;
345 
346  /*
347  * See strtol for comments as to the logic used.
348  */
349  do
350  {
351  c = *s++;
352  } while (isspace(c));
353  if (c == '-')
354  {
355  neg = 1;
356  c = *s++;
357  }
358  else if (c == '+')
359  c = *s++;
360  if ((base == 0 || base == 16) && c == '0' && (*s == 'x' || *s == 'X'))
361  {
362  c = s[1];
363  s += 2;
364  base = 16;
365  }
366  if (base == 0)
367  base = c == '0' ? 8 : 10;
368  cutoff = (unsigned long) ULONG_MAX / (unsigned long) base;
369  cutlim = (unsigned long) ULONG_MAX % (unsigned long) base;
370  for (acc = 0, any = 0;; c = *s++)
371  {
372  if (isdigit(c))
373  c -= '0';
374  else if (isalpha(c))
375  c -= isupper(c) ? 'A' - 10 : 'a' - 10;
376  else
377  break;
378  if (c >= base)
379  break;
380  if (any < 0 || acc > cutoff || (acc == cutoff && c > cutlim))
381  any = -1;
382  else
383  {
384  any = 1;
385  acc *= base;
386  acc += c;
387  }
388  }
389  if (any < 0)
390  {
391  acc = ULONG_MAX;
392  }
393  else if (neg)
394  acc = -acc;
395  if (endptr != 0)
396  *endptr = (const char *) (any ? s - 1 : nptr);
397 
398  return (acc);
399 }
400 
401 // Intentionally casting const char * to char * in these functions, don't warn
402 #pragma GCC diagnostic push
403 #pragma GCC diagnostic ignored "-Wcast-qual"
404 
405 char *StringFind(const char *str, int target)
406 {
407  const char *s;
408  char ch;
409  while (1)
410  {
411 #define UNROLL(n) \
412  s = str + n; \
413  ch = *s; \
414  if (!ch) \
415  return NULL; \
416  if (ch == target) \
417  return (char *) s;
418 
419  UNROLL(0);
420  UNROLL(1);
421  UNROLL(2);
422  UNROLL(3);
423  UNROLL(4);
424  UNROLL(5);
425  UNROLL(6);
426  UNROLL(7);
427 #undef UNROLL
428  str += 8;
429  }
430 }
431 
432 char *StringReverseFind(const char *str, int target)
433 {
434  // StringLength must traverse the entire string once to find the length,
435  // so rather than finding the length and then traversing in reverse, we just
436  // traverse the string once. This gives a small performance boost.
437  const char *s;
438  const char *result = NULL;
439  char ch;
440  while (1)
441  {
442 #define UNROLL(n) \
443  s = str + n; \
444  ch = *s; \
445  if (!ch) \
446  return (char *) result; \
447  if (ch == target) \
448  result = s;
449 
450  UNROLL(0);
451  UNROLL(1);
452  UNROLL(2);
453  UNROLL(3);
454  UNROLL(4);
455  UNROLL(5);
456  UNROLL(6);
457  UNROLL(7);
458 #undef UNROLL
459  str += 8;
460  }
461 }
462 
463 #pragma GCC diagnostic pop
464 
465 int StringContains(const char *str, const char *search)
466 {
467  size_t alen = StringLength(str);
468  size_t blen = StringLength(search);
469  return StringContainsN(str, alen, search, blen);
470 }
471 
472 static int isPrefix(const char *word, size_t wordLength, size_t pos)
473 {
474  size_t suffixLength = wordLength - pos;
475  return StringCompareN(word, word + pos, suffixLength) == 0 ? 1 : 0;
476 }
477 
478 static size_t suffixLength(const char *word, size_t wordLength, size_t pos)
479 {
480  size_t i = 0;
481  for (; (word[pos - i] == word[wordLength - 1 - i]) && (i < pos); i++)
482  ;
483  return i;
484 }
485 
486 int StringContainsN(
487  const char *str, size_t len, const char *search, size_t slen)
488 {
489  // Quick exit cases (these shouldn't really be "contains" queries).
490  if (len < slen)
491  {
492  return 0;
493  }
494  else if (!slen)
495  {
496  return 1;
497  }
498  else if (!len)
499  {
500  return 0;
501  }
502  else if (len == slen)
503  {
504  return StringCompareN(str, search, slen) == 0;
505  }
506 
507  // Boyer-Moore string searching (around 2x faster than a naive search)
508  size_t delta1[256];
509  size_t *delta2 = (size_t *) malloc(slen * sizeof(size_t));
510 
511  for (size_t i = 0; i < 256; ++i)
512  {
513  delta1[i] = slen;
514  }
515 
516  // Build delta1 array (deltas of rightmost unique character in pattern).
517  for (size_t i = 0; i < slen; ++i)
518  {
519  delta1[(int) search[i]] = slen - 1 - i;
520  }
521 
522  // Build delta2 array (full match alignment).
523  ByteSet(delta2, 0, slen * sizeof(size_t));
524 
525  ssize_t lastPrefix = slen - 1;
526  for (ssize_t i = slen - 1; i >= 0; --i)
527  {
528  if (isPrefix(search, slen, i + 1))
529  {
530  lastPrefix = i + 1;
531  }
532  delta2[i] = lastPrefix + (slen - 1 - i);
533  }
534  for (size_t i = 0; i < slen - 1; ++i)
535  {
536  size_t suffixLen = suffixLength(search, slen, i);
537  if (search[i - suffixLen] != search[slen - 1 - suffixLen])
538  {
539  delta2[slen - 1 - suffixLen] = slen - 1 - i + suffixLen;
540  }
541  }
542 
543  for (size_t i = slen - 1; i < len;)
544  {
545  ssize_t j = slen - 1;
546  while (j >= 0 && (str[i] == search[j]))
547  {
548  --i;
549  --j;
550  }
551 
552  if (j < 0)
553  {
554  free(delta2);
555  return 1;
556  }
557 
558  i += max(delta1[(int) str[i]], delta2[j]);
559  }
560 
561  free(delta2);
562  return 0;
563 }
564 
565 int StringCompareCase(
566  const char *s1, const char *s2, int sensitive, size_t length,
567  size_t *offset)
568 {
569  // Case-sensitive compare is just strncmp, basically.
570  if (LIKELY(sensitive))
571  {
572  return StringCompareNOffset(s1, s2, length, offset);
573  }
574 
575  if (!length)
576  {
577  return 0;
578  }
579  else if (s1 == s2)
580  {
581  if (offset)
582  {
583  *offset = StringLength(s1);
584  }
585  return 0;
586  }
587  else if (!s1)
588  {
589  return -1;
590  }
591  else if (!s2)
592  {
593  return 1;
594  }
595 
596  static size_t local = 0;
597  if (UNLIKELY(!offset))
598  {
599  offset = &local;
600  }
601 
602  // Case insensitive search.
603  size_t i = 0;
604  while (*s1 && *s2)
605  {
606  char c = *s1 - *s2;
607  if (c)
608  {
609  // Didn't match, check if that's because the case was wrong.
610  c = toLower(*s1) - toLower(*s2);
611  if (c)
612  {
613  break;
614  }
615  }
616  else if (!--length)
617  {
618  break;
619  }
620 
621  ++s1;
622  ++s2;
623  ++i;
624  }
625 
626  *offset = i;
627  return toLower(*s1) - toLower(*s2);
628 }
629 
630 size_t nextCharacter(const char *s, size_t i)
631 {
632  if (UNLIKELY(!s))
633  {
634  return i;
635  }
636 
637  // UTF-8 version of getting the next character
638  const uint8_t *u8buf = (const uint8_t *) s;
639  if (LIKELY(u8buf[i] <= 0x7F))
640  {
641  return i + 1;
642  }
643  else if ((u8buf[i] & 0xC0) == 0xC0)
644  {
645  if ((u8buf[i] & 0xF8) == 0xF0)
646  {
647  return i + 4; // 4-byte sequence
648  }
649  else if ((u8buf[i] & 0xF0) == 0xE0)
650  {
651  return i + 3;
652  }
653  else
654  {
655  return i + 2;
656  }
657  }
658  return i + 1;
659 }
660 
661 size_t prevCharacter(const char *s, size_t i)
662 {
663  if (!s)
664  {
665  return i;
666  }
667 
668  // TODO handle multibyte chars.
669  return i - 1;
670 }
671 
672 #ifndef UTILITY_LINUX
673 // Provide forwarding functions to handle GCC optimising things.
674 size_t strlen(const char *s)
675 {
676  return StringLength(s);
677 }
678 
679 char *strcpy(char *dest, const char *src)
680 {
681  return StringCopy(dest, src);
682 }
683 
684 char *strncpy(char *dest, const char *src, size_t len)
685 {
686  return StringCopyN(dest, src, len);
687 }
688 
689 int strcmp(const char *p1, const char *p2)
690 {
691  return StringCompare(p1, p2);
692 }
693 
694 int strncmp(const char *p1, const char *p2, size_t n)
695 {
696  return StringCompareN(p1, p2, n);
697 }
698 
699 char *strcat(char *dest, const char *src)
700 {
701  return StringConcat(dest, src);
702 }
703 
704 char *strncat(char *dest, const char *src, size_t n)
705 {
706  return StringConcatN(dest, src, n);
707 }
708 
709 char *strchr(const char *str, int target)
710 {
711  return StringFind(str, target);
712 }
713 
714 char *strrchr(const char *str, int target)
715 {
716  return StringReverseFind(str, target);
717 }
718 
719 int vsprintf(char *buf, const char *fmt, va_list arg)
720 {
721  return VStringFormat(buf, fmt, arg);
722 }
723 
724 unsigned long strtoul(const char *nptr, char const **endptr, int base)
725 {
726  return StringToUnsignedLong(nptr, endptr, base);
727 }
728 #endif