Skip to content

Commit 5e0f58c

Browse files
committed
url: ignore IDN errors when domainname have hyphens
There are valid domain names with hyphens at 3 and 4th position, new node WHATWG URL parser was failing for it assume its an invalid IDN. Also filters IDN errors when domain label start or end with hyphen. Also fix error in ToUnicode Fixes: #12965 Refs: https://www.icann.org/news/announcement-2000-01-07-en Refs: whatwg/url#309 (comment)
1 parent 311667b commit 5e0f58c

File tree

3 files changed

+4
-39
lines changed

3 files changed

+4
-39
lines changed

src/node_i18n.cc

Lines changed: 3 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -435,8 +435,7 @@ bool InitializeICUDirectory(const std::string& path) {
435435

436436
int32_t ToUnicode(MaybeStackBuffer<char>* buf,
437437
const char* input,
438-
size_t length,
439-
bool lenient) {
438+
size_t length) {
440439
UErrorCode status = U_ZERO_ERROR;
441440
uint32_t options = UIDNA_DEFAULT;
442441
options |= UIDNA_NONTRANSITIONAL_TO_UNICODE;
@@ -461,29 +460,7 @@ int32_t ToUnicode(MaybeStackBuffer<char>* buf,
461460
&status);
462461
}
463462

464-
// UTS #46's ToUnicode operation applies no validation of domain name length
465-
// (nor a flag requesting it to do so, like VerifyDnsLength for ToASCII). For
466-
// that reason, unlike ToASCII below, ICU4C correctly accepts long domain
467-
// names. However, ICU4C still sets the EMPTY_LABEL error in contrary to UTS
468-
// #46. Therefore, explicitly filters out that error here.
469-
info.errors &= ~UIDNA_ERROR_EMPTY_LABEL;
470-
471-
// These error conditions are mandated unconditionally by UTS #46 version
472-
// 9.0.0 (rev. 17), but were found to be incompatible with many actual domain
473-
// names in the wild. As such, in the current UTS #46 draft (rev. 18) these
474-
// checks are made optional depending on the CheckHyphens flag, which will be
475-
// disabled in WHATWG URL's "domain to unicode" algorithm as soon as the UTS
476-
// #46 draft becomes standard.
477-
// Refs:
478-
// - https://github.com/whatwg/url/issues/53
479-
// - http://www.unicode.org/review/pri317/
480-
// - http://www.unicode.org/reports/tr46/tr46-18.html
481-
// - https://www.icann.org/news/announcement-2000-01-07-en
482-
info.errors &= ~UIDNA_ERROR_HYPHEN_3_4;
483-
info.errors &= ~UIDNA_ERROR_LEADING_HYPHEN;
484-
info.errors &= ~UIDNA_ERROR_TRAILING_HYPHEN;
485-
486-
if (U_FAILURE(status) || (!lenient && info.errors != 0)) {
463+
if (U_FAILURE(status)) {
487464
len = -1;
488465
buf->SetLength(0);
489466
} else {
@@ -563,11 +540,9 @@ static void ToUnicode(const FunctionCallbackInfo<Value>& args) {
563540
CHECK_GE(args.Length(), 1);
564541
CHECK(args[0]->IsString());
565542
Utf8Value val(env->isolate(), args[0]);
566-
// optional arg
567-
bool lenient = args[1]->BooleanValue(env->context()).FromJust();
568543

569544
MaybeStackBuffer<char> buf;
570-
int32_t len = ToUnicode(&buf, *val, val.length(), lenient);
545+
int32_t len = ToUnicode(&buf, *val, val.length());
571546

572547
if (len < 0) {
573548
return env->ThrowError("Cannot convert name to Unicode");

src/node_i18n.h

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -43,8 +43,7 @@ int32_t ToASCII(MaybeStackBuffer<char>* buf,
4343
bool lenient = false);
4444
int32_t ToUnicode(MaybeStackBuffer<char>* buf,
4545
const char* input,
46-
size_t length,
47-
bool lenient = false);
46+
size_t length);
4847

4948
} // namespace i18n
5049
} // namespace node

test/fixtures/url-idna.js

Lines changed: 0 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -219,15 +219,6 @@ module.exports = {
219219
{
220220
url: '\ufffd.com',
221221
mode: 'ascii'
222-
},
223-
{
224-
url: '\ufffd.com',
225-
mode: 'unicode'
226-
},
227-
// invalid Punycode
228-
{
229-
url: 'xn---abc.com',
230-
mode: 'unicode'
231222
}
232223
]
233224
}

0 commit comments

Comments
 (0)