1919
2020#include "uv.h"
2121#include "idna.h"
22+ #include <assert.h>
2223#include <string.h>
2324
2425static unsigned uv__utf8_decode1_slow (const char * * p ,
@@ -32,7 +33,7 @@ static unsigned uv__utf8_decode1_slow(const char** p,
3233 if (a > 0xF7 )
3334 return -1 ;
3435
35- switch (* p - pe ) {
36+ switch (pe - * p ) {
3637 default :
3738 if (a > 0xEF ) {
3839 min = 0x10000 ;
@@ -62,6 +63,8 @@ static unsigned uv__utf8_decode1_slow(const char** p,
6263 a = 0 ;
6364 break ;
6465 }
66+ /* Fall through. */
67+ case 0 :
6568 return -1 ; /* Invalid continuation byte. */
6669 }
6770
@@ -88,6 +91,8 @@ static unsigned uv__utf8_decode1_slow(const char** p,
8891unsigned uv__utf8_decode1 (const char * * p , const char * pe ) {
8992 unsigned a ;
9093
94+ assert (* p < pe );
95+
9196 a = (unsigned char ) * (* p )++ ;
9297
9398 if (a < 128 )
@@ -96,9 +101,6 @@ unsigned uv__utf8_decode1(const char** p, const char* pe) {
96101 return uv__utf8_decode1_slow (p , pe , a );
97102}
98103
99- #define foreach_codepoint (c , p , pe ) \
100- for (; (void) (*p <= pe && (c = uv__utf8_decode1(p, pe))), *p <= pe;)
101-
102104static int uv__idna_toascii_label (const char * s , const char * se ,
103105 char * * d , char * de ) {
104106 static const char alphabet [] = "abcdefghijklmnopqrstuvwxyz0123456789" ;
@@ -121,25 +123,36 @@ static int uv__idna_toascii_label(const char* s, const char* se,
121123 ss = s ;
122124 todo = 0 ;
123125
124- foreach_codepoint (c , & s , se ) {
126+ /* Note: after this loop we've visited all UTF-8 characters and know
127+ * they're legal so we no longer need to check for decode errors.
128+ */
129+ while (s < se ) {
130+ c = uv__utf8_decode1 (& s , se );
131+
132+ if (c == -1u )
133+ return UV_EINVAL ;
134+
125135 if (c < 128 )
126136 h ++ ;
127- else if (c == (unsigned ) -1 )
128- return UV_EINVAL ;
129137 else
130138 todo ++ ;
131139 }
132140
141+ /* Only write "xn--" when there are non-ASCII characters. */
133142 if (todo > 0 ) {
134143 if (* d < de ) * (* d )++ = 'x' ;
135144 if (* d < de ) * (* d )++ = 'n' ;
136145 if (* d < de ) * (* d )++ = '-' ;
137146 if (* d < de ) * (* d )++ = '-' ;
138147 }
139148
149+ /* Write ASCII characters. */
140150 x = 0 ;
141151 s = ss ;
142- foreach_codepoint (c , & s , se ) {
152+ while (s < se ) {
153+ c = uv__utf8_decode1 (& s , se );
154+ assert (c != -1u );
155+
143156 if (c > 127 )
144157 continue ;
145158
@@ -166,10 +179,15 @@ static int uv__idna_toascii_label(const char* s, const char* se,
166179 while (todo > 0 ) {
167180 m = -1 ;
168181 s = ss ;
169- foreach_codepoint (c , & s , se )
182+
183+ while (s < se ) {
184+ c = uv__utf8_decode1 (& s , se );
185+ assert (c != -1u );
186+
170187 if (c >= n )
171188 if (c < m )
172189 m = c ;
190+ }
173191
174192 x = m - n ;
175193 y = h + 1 ;
@@ -181,7 +199,10 @@ static int uv__idna_toascii_label(const char* s, const char* se,
181199 n = m ;
182200
183201 s = ss ;
184- foreach_codepoint (c , & s , se ) {
202+ while (s < se ) {
203+ c = uv__utf8_decode1 (& s , se );
204+ assert (c != -1u );
205+
185206 if (c < n )
186207 if (++ delta == 0 )
187208 return UV_E2BIG ; /* Overflow. */
@@ -245,8 +266,6 @@ static int uv__idna_toascii_label(const char* s, const char* se,
245266 return 0 ;
246267}
247268
248- #undef foreach_codepoint
249-
250269long uv__idna_toascii (const char * s , const char * se , char * d , char * de ) {
251270 const char * si ;
252271 const char * st ;
@@ -256,10 +275,14 @@ long uv__idna_toascii(const char* s, const char* se, char* d, char* de) {
256275
257276 ds = d ;
258277
259- for (si = s ; si < se ; /* empty */ ) {
278+ si = s ;
279+ while (si < se ) {
260280 st = si ;
261281 c = uv__utf8_decode1 (& si , se );
262282
283+ if (c == -1u )
284+ return UV_EINVAL ;
285+
263286 if (c != '.' )
264287 if (c != 0x3002 ) /* 。 */
265288 if (c != 0xFF0E ) /* . */
0 commit comments