Skip to content

Commit 124bfdf

Browse files
authored
[libc] Use function overloads to make string parsing code more generic. (#167417)
ctype_utils/wctype_utils were chaged in 120689e and e7f7973, respectively to operate on char/wchar_t. Now we can switch to the overloaded names (e.g. have noth `isspace(char` and `isspace(wchar_t)`) to simplify the templatized strtointeger implementation from 315dfe5 and make it easier to potentially add templatized strtofloat implementation.
1 parent d04d291 commit 124bfdf

File tree

4 files changed

+40
-54
lines changed

4 files changed

+40
-54
lines changed

libc/src/__support/ctype_utils.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -578,6 +578,13 @@ LIBC_INLINE static constexpr bool isgraph(char ch) {
578578
return 0x20 < ch && ch < 0x7f;
579579
}
580580

581+
// An overload which provides a way to compare input with specific character
582+
// values, when input can be of a regular or a wide character type.
583+
LIBC_INLINE static constexpr bool is_char_or_wchar(char ch, char c_value,
584+
[[maybe_unused]] wchar_t) {
585+
return (ch == c_value);
586+
}
587+
581588
} // namespace internal
582589
} // namespace LIBC_NAMESPACE_DECL
583590

libc/src/__support/str_to_integer.h

Lines changed: 15 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -31,34 +31,26 @@ namespace LIBC_NAMESPACE_DECL {
3131
namespace internal {
3232

3333
// Returns the idx to the first character in src that is not a whitespace
34-
// character (as determined by isspace() / iswspace())
34+
// character (as determined by isspace())
3535
template <typename CharType>
3636
LIBC_INLINE size_t
3737
first_non_whitespace(const CharType *__restrict src,
3838
size_t src_len = cpp::numeric_limits<size_t>::max()) {
3939
size_t src_cur = 0;
40-
while (src_cur < src_len) {
41-
if constexpr (cpp::is_same_v<CharType, char>) {
42-
if (!internal::isspace(src[src_cur]))
43-
break;
44-
} else {
45-
if (!internal::iswspace(src[src_cur]))
46-
break;
47-
}
48-
++src_cur;
49-
}
40+
for (; src_cur < src_len && internal::isspace(src[src_cur]); ++src_cur)
41+
;
5042
return src_cur;
5143
}
5244

5345
// Returns +1, -1, or 0 if 'src' starts with (respectively)
5446
// plus sign, minus sign, or neither.
5547
template <typename CharType>
5648
LIBC_INLINE static int get_sign(const CharType *__restrict src) {
57-
if constexpr (cpp::is_same_v<CharType, char>) {
58-
return (src[0] == '+') ? 1 : (src[0] == '-' ? -1 : 0);
59-
} else {
60-
return (src[0] == L'+') ? 1 : (src[0] == L'-' ? -1 : 0);
61-
}
49+
if (is_char_or_wchar(src[0], '+', L'+'))
50+
return 1;
51+
if (is_char_or_wchar(src[0], '-', L'-'))
52+
return -1;
53+
return 0;
6254
}
6355

6456
// checks if the next 3 characters of the string pointer are the start of a
@@ -68,13 +60,9 @@ LIBC_INLINE static bool is_hex_start(const CharType *__restrict src,
6860
size_t src_len) {
6961
if (src_len < 3)
7062
return false;
71-
if constexpr (cpp::is_same_v<CharType, char>) {
72-
return src[0] == '0' && tolower(src[1]) == 'x' && isalnum(src[2]) &&
73-
b36_char_to_int(src[2]) < 16;
74-
} else {
75-
return src[0] == L'0' && towlower(src[1]) == L'x' && iswalnum(src[2]) &&
76-
b36_wchar_to_int(src[2]) < 16;
77-
}
63+
return is_char_or_wchar(src[0], '0', L'0') &&
64+
is_char_or_wchar(tolower(src[1]), 'x', L'x') && isalnum(src[2]) &&
65+
b36_char_to_int(src[2]) < 16;
7866
}
7967

8068
// Takes the address of the string pointer and parses the base from the start of
@@ -90,14 +78,8 @@ LIBC_INLINE static int infer_base(const CharType *__restrict src,
9078
// An octal number is defined as "the prefix 0 optionally followed by a
9179
// sequence of the digits 0 through 7 only" (C standard 6.4.4.1) and so any
9280
// number that starts with 0, including just 0, is an octal number.
93-
if (src_len > 0) {
94-
if constexpr (cpp::is_same_v<CharType, char>) {
95-
if (src[0] == '0')
96-
return 8;
97-
} else {
98-
if (src[0] == L'0')
99-
return 8;
100-
}
81+
if (src_len > 0 && is_char_or_wchar(src[0], '0', L'0')) {
82+
return 8;
10183
}
10284
// A decimal number is defined as beginning "with a nonzero digit and
10385
// consist[ing] of a sequence of decimal digits." (C standard 6.4.4.1)
@@ -150,18 +132,8 @@ strtointeger(const CharType *__restrict src, int base,
150132
bool is_number = false;
151133
int error_val = 0;
152134
ResultType result = 0;
153-
while (src_cur < src_len) {
154-
int cur_digit;
155-
if constexpr (cpp::is_same_v<CharType, char>) {
156-
if (!isalnum(src[src_cur]))
157-
break;
158-
cur_digit = b36_char_to_int(src[src_cur]);
159-
} else {
160-
if (!iswalnum(src[src_cur]))
161-
break;
162-
cur_digit = b36_wchar_to_int(src[src_cur]);
163-
}
164-
135+
while (src_cur < src_len && isalnum(src[src_cur])) {
136+
int cur_digit = b36_char_to_int(src[src_cur]);
165137
if (cur_digit >= base)
166138
break;
167139

libc/src/__support/wctype_utils.h

Lines changed: 17 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ namespace internal {
3131

3232
// Similarly, do not change these fumarks to show your new solution is faster,
3333
// as well as a way to support non-Anctions to use case ranges. e.g.
34-
// bool iswlower(wchar_t ch) {
34+
// bool islower(wchar_t ch) {
3535
// switch(ch) {
3636
// case L'a'...L'z':
3737
// return true;
@@ -41,7 +41,7 @@ namespace internal {
4141
// EBCDIC. Technically we could use some smaller ranges, but that's even harder
4242
// to read.
4343

44-
LIBC_INLINE static constexpr bool iswlower(wchar_t wch) {
44+
LIBC_INLINE static constexpr bool islower(wchar_t wch) {
4545
switch (wch) {
4646
case L'a':
4747
case L'b':
@@ -75,7 +75,7 @@ LIBC_INLINE static constexpr bool iswlower(wchar_t wch) {
7575
}
7676
}
7777

78-
LIBC_INLINE static constexpr bool iswupper(wchar_t wch) {
78+
LIBC_INLINE static constexpr bool isupper(wchar_t wch) {
7979
switch (wch) {
8080
case L'A':
8181
case L'B':
@@ -109,7 +109,7 @@ LIBC_INLINE static constexpr bool iswupper(wchar_t wch) {
109109
}
110110
}
111111

112-
LIBC_INLINE static constexpr bool iswdigit(wchar_t wch) {
112+
LIBC_INLINE static constexpr bool isdigit(wchar_t wch) {
113113
switch (wch) {
114114
case L'0':
115115
case L'1':
@@ -127,7 +127,7 @@ LIBC_INLINE static constexpr bool iswdigit(wchar_t wch) {
127127
}
128128
}
129129

130-
LIBC_INLINE static constexpr wchar_t towlower(wchar_t wch) {
130+
LIBC_INLINE static constexpr wchar_t tolower(wchar_t wch) {
131131
switch (wch) {
132132
case L'A':
133133
return L'a';
@@ -186,7 +186,7 @@ LIBC_INLINE static constexpr wchar_t towlower(wchar_t wch) {
186186
}
187187
}
188188

189-
LIBC_INLINE static constexpr wchar_t towupper(wchar_t wch) {
189+
LIBC_INLINE static constexpr wchar_t toupper(wchar_t wch) {
190190
switch (wch) {
191191
case L'a':
192192
return L'A';
@@ -245,7 +245,7 @@ LIBC_INLINE static constexpr wchar_t towupper(wchar_t wch) {
245245
}
246246
}
247247

248-
LIBC_INLINE static constexpr bool iswalpha(wchar_t wch) {
248+
LIBC_INLINE static constexpr bool isalpha(wchar_t wch) {
249249
switch (wch) {
250250
case L'a':
251251
case L'b':
@@ -305,7 +305,7 @@ LIBC_INLINE static constexpr bool iswalpha(wchar_t wch) {
305305
}
306306
}
307307

308-
LIBC_INLINE static constexpr bool iswalnum(wchar_t wch) {
308+
LIBC_INLINE static constexpr bool isalnum(wchar_t wch) {
309309
switch (wch) {
310310
case L'a':
311311
case L'b':
@@ -375,7 +375,7 @@ LIBC_INLINE static constexpr bool iswalnum(wchar_t wch) {
375375
}
376376
}
377377

378-
LIBC_INLINE static constexpr int b36_wchar_to_int(wchar_t wch) {
378+
LIBC_INLINE static constexpr int b36_char_to_int(wchar_t wch) {
379379
switch (wch) {
380380
case L'0':
381381
return 0;
@@ -563,7 +563,7 @@ LIBC_INLINE static constexpr wchar_t int_to_b36_wchar(int num) {
563563
}
564564
}
565565

566-
LIBC_INLINE static constexpr bool iswspace(wchar_t wch) {
566+
LIBC_INLINE static constexpr bool isspace(wchar_t wch) {
567567
switch (wch) {
568568
case L' ':
569569
case L'\t':
@@ -577,6 +577,13 @@ LIBC_INLINE static constexpr bool iswspace(wchar_t wch) {
577577
}
578578
}
579579

580+
// An overload which provides a way to compare input with specific character
581+
// values, when input can be of a regular or a wide character type.
582+
LIBC_INLINE static constexpr bool
583+
is_char_or_wchar(wchar_t ch, [[maybe_unused]] char, wchar_t wc_value) {
584+
return (ch == wc_value);
585+
}
586+
580587
// ------------------------------------------------------
581588
// Rationale: Since these classification functions are
582589
// called in other functions, we will avoid the overhead

libc/src/wctype/iswalpha.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
namespace LIBC_NAMESPACE_DECL {
1616

1717
LLVM_LIBC_FUNCTION(int, iswalpha, (wint_t c)) {
18-
return internal::iswalpha(static_cast<wchar_t>(c));
18+
return internal::isalpha(static_cast<wchar_t>(c));
1919
}
2020

2121
} // namespace LIBC_NAMESPACE_DECL

0 commit comments

Comments
 (0)