1- /* auto-generated on 2025-04-28 12:16:36 -0400. Do not edit! */
1+ /* auto-generated on 2025-06-30 19:51:09 -0400. Do not edit! */
22/* begin file src/ada.cpp */
33#include "ada.h"
44/* begin file src/checkers.cpp */
@@ -134,7 +134,7 @@ ada_really_inline constexpr bool verify_dns_length(
134134
135135ADA_PUSH_DISABLE_ALL_WARNINGS
136136/* begin file src/ada_idna.cpp */
137- /* auto-generated on 2025-03-08 13:17:11 -0500 . Do not edit! */
137+ /* auto-generated on 2025-06-26 23:04:30 -0300 . Do not edit! */
138138/* begin file src/idna.cpp */
139139/* begin file src/unicode_transcoding.cpp */
140140
@@ -8157,7 +8157,7 @@ bool utf32_to_punycode(std::u32string_view input, std::string &out) {
81578157 ++h;
81588158 out.push_back(char(c));
81598159 }
8160- if (c > 0x10ffff || (c >= 0xd880 && c < 0xe000)) {
8160+ if (c > 0x10ffff || (c >= 0xd800 && c < 0xe000)) {
81618161 return false;
81628162 }
81638163 }
@@ -9547,6 +9547,10 @@ bool is_label_valid(const std::u32string_view label) {
95479547#include <ranges>
95489548
95499549
9550+ #ifdef ADA_USE_SIMDUTF
9551+ #include "simdutf.h"
9552+ #endif
9553+
95509554namespace ada::idna {
95519555
95529556bool constexpr is_ascii(std::u32string_view view) {
@@ -9659,11 +9663,20 @@ std::string to_ascii(std::string_view ut8_string) {
96599663 }
96609664 static const std::string error = "";
96619665 // We convert to UTF-32
9666+
9667+ #ifdef ADA_USE_SIMDUTF
9668+ size_t utf32_length =
9669+ simdutf::utf32_length_from_utf8(ut8_string.data(), ut8_string.size());
9670+ std::u32string utf32(utf32_length, '\0');
9671+ size_t actual_utf32_length = simdutf::convert_utf8_to_utf32(
9672+ ut8_string.data(), ut8_string.size(), utf32.data());
9673+ #else
96629674 size_t utf32_length =
96639675 ada::idna::utf32_length_from_utf8(ut8_string.data(), ut8_string.size());
96649676 std::u32string utf32(utf32_length, '\0');
96659677 size_t actual_utf32_length = ada::idna::utf8_to_utf32(
96669678 ut8_string.data(), ut8_string.size(), utf32.data());
9679+ #endif
96679680 if (actual_utf32_length == 0) {
96689681 return error;
96699682 }
@@ -9755,6 +9768,10 @@ std::string to_ascii(std::string_view ut8_string) {
97559768#include <string>
97569769
97579770
9771+ #ifdef ADA_USE_SIMDUTF
9772+ #include "simdutf.h"
9773+ #endif
9774+
97589775namespace ada::idna {
97599776std::string to_unicode(std::string_view input) {
97609777 std::string output;
@@ -9773,11 +9790,19 @@ std::string to_unicode(std::string_view input) {
97739790 if (ada::idna::verify_punycode(label_view)) {
97749791 std::u32string tmp_buffer;
97759792 if (ada::idna::punycode_to_utf32(label_view, tmp_buffer)) {
9793+ #ifdef ADA_USE_SIMDUTF
9794+ auto utf8_size = simdutf::utf8_length_from_utf32(tmp_buffer.data(),
9795+ tmp_buffer.size());
9796+ std::string final_utf8(utf8_size, '\0');
9797+ simdutf::convert_utf32_to_utf8(tmp_buffer.data(), tmp_buffer.size(),
9798+ final_utf8.data());
9799+ #else
97769800 auto utf8_size = ada::idna::utf8_length_from_utf32(tmp_buffer.data(),
97779801 tmp_buffer.size());
97789802 std::string final_utf8(utf8_size, '\0');
97799803 ada::idna::utf32_to_utf8(tmp_buffer.data(), tmp_buffer.size(),
97809804 final_utf8.data());
9805+ #endif
97819806 output.append(final_utf8);
97829807 } else {
97839808 // ToUnicode never fails. If any step fails, then the original input
@@ -11042,7 +11067,7 @@ bool can_parse(std::string_view input, const std::string_view* base_input) {
1104211067 return result.is_valid;
1104311068}
1104411069
11045- ada_warn_unused std::string to_string(ada::encoding_type type) {
11070+ ada_warn_unused std::string_view to_string(ada::encoding_type type) {
1104611071 switch (type) {
1104711072 case ada::encoding_type::UTF8:
1104811073 return "UTF-8";
@@ -12536,35 +12561,67 @@ bool url::set_host_or_hostname(const std::string_view input) {
1253612561 // Note: the 'found_colon' value is true if and only if a colon was
1253712562 // encountered while not inside brackets.
1253812563 if (found_colon) {
12564+ // If buffer is the empty string, host-missing validation error, return
12565+ // failure.
12566+ std::string_view buffer = host_view.substr(0, location);
12567+ if (buffer.empty()) {
12568+ return false;
12569+ }
12570+
12571+ // If state override is given and state override is hostname state, then
12572+ // return failure.
1253912573 if constexpr (override_hostname) {
1254012574 return false;
1254112575 }
12542- std::string_view buffer = new_host.substr(location + 1);
12543- if (!buffer.empty()) {
12544- set_port(buffer);
12576+
12577+ // Let host be the result of host parsing buffer with url is not special.
12578+ bool succeeded = parse_host(buffer);
12579+ if (!succeeded) {
12580+ host = std::move(previous_host);
12581+ update_base_port(previous_port);
12582+ return false;
1254512583 }
12546- }
12547- // If url is special and host_view is the empty string, validation error,
12548- // return failure. Otherwise, if state override is given, host_view is the
12549- // empty string, and either url includes credentials or url's port is
12550- // non-null, return.
12551- else if (host_view.empty() &&
12552- (is_special() || has_credentials() || port.has_value())) {
12553- return false;
12554- }
1255512584
12556- // Let host be the result of host parsing host_view with url is not special.
12557- if (host_view.empty() && !is_special()) {
12558- host = "";
12585+ // Set url's host to host, buffer to the empty string, and state to port
12586+ // state.
12587+ std::string_view port_buffer = new_host.substr(location + 1);
12588+ if (!port_buffer.empty()) {
12589+ set_port(port_buffer);
12590+ }
1255912591 return true;
1256012592 }
12593+ // Otherwise, if one of the following is true:
12594+ // - c is the EOF code point, U+002F (/), U+003F (?), or U+0023 (#)
12595+ // - url is special and c is U+005C (\)
12596+ else {
12597+ // If url is special and host_view is the empty string, host-missing
12598+ // validation error, return failure.
12599+ if (host_view.empty() && is_special()) {
12600+ return false;
12601+ }
1256112602
12562- bool succeeded = parse_host(host_view);
12563- if (!succeeded) {
12564- host = std::move(previous_host);
12565- update_base_port(previous_port);
12603+ // Otherwise, if state override is given, host_view is the empty string,
12604+ // and either url includes credentials or url's port is non-null, then
12605+ // return failure.
12606+ if (host_view.empty() && (has_credentials() || port.has_value())) {
12607+ return false;
12608+ }
12609+
12610+ // Let host be the result of host parsing host_view with url is not
12611+ // special.
12612+ if (host_view.empty() && !is_special()) {
12613+ host = "";
12614+ return true;
12615+ }
12616+
12617+ bool succeeded = parse_host(host_view);
12618+ if (!succeeded) {
12619+ host = std::move(previous_host);
12620+ update_base_port(previous_port);
12621+ return false;
12622+ }
12623+ return true;
1256612624 }
12567- return succeeded;
1256812625 }
1256912626
1257012627 size_t location = new_host.find_first_of("/\\?");
@@ -12621,10 +12678,16 @@ bool url::set_port(const std::string_view input) {
1262112678 if (cannot_have_credentials_or_port()) {
1262212679 return false;
1262312680 }
12681+
12682+ if (input.empty()) {
12683+ port = std::nullopt;
12684+ return true;
12685+ }
12686+
1262412687 std::string trimmed(input);
1262512688 helpers::remove_ascii_tab_or_newline(trimmed);
12689+
1262612690 if (trimmed.empty()) {
12627- port = std::nullopt;
1262812691 return true;
1262912692 }
1263012693
@@ -12633,9 +12696,15 @@ bool url::set_port(const std::string_view input) {
1263312696 return false;
1263412697 }
1263512698
12699+ // Find the first non-digit character to determine the length of digits
12700+ auto first_non_digit =
12701+ std::ranges::find_if_not(trimmed, ada::unicode::is_ascii_digit);
12702+ std::string_view digits_to_parse =
12703+ std::string_view(trimmed.data(), first_non_digit - trimmed.begin());
12704+
1263612705 // Revert changes if parse_port fails.
1263712706 std::optional<uint16_t> previous_port = port;
12638- parse_port(trimmed );
12707+ parse_port(digits_to_parse );
1263912708 if (is_valid) {
1264012709 return true;
1264112710 }
@@ -13966,10 +14035,16 @@ bool url_aggregator::set_port(const std::string_view input) {
1396614035 if (cannot_have_credentials_or_port()) {
1396714036 return false;
1396814037 }
14038+
14039+ if (input.empty()) {
14040+ clear_port();
14041+ return true;
14042+ }
14043+
1396914044 std::string trimmed(input);
1397014045 helpers::remove_ascii_tab_or_newline(trimmed);
14046+
1397114047 if (trimmed.empty()) {
13972- clear_port();
1397314048 return true;
1397414049 }
1397514050
@@ -13978,9 +14053,15 @@ bool url_aggregator::set_port(const std::string_view input) {
1397814053 return false;
1397914054 }
1398014055
14056+ // Find the first non-digit character to determine the length of digits
14057+ auto first_non_digit =
14058+ std::ranges::find_if_not(trimmed, ada::unicode::is_ascii_digit);
14059+ std::string_view digits_to_parse =
14060+ std::string_view(trimmed.data(), first_non_digit - trimmed.begin());
14061+
1398114062 // Revert changes if parse_port fails.
1398214063 uint32_t previous_port = components.port;
13983- parse_port(trimmed );
14064+ parse_port(digits_to_parse );
1398414065 if (is_valid) {
1398514066 return true;
1398614067 }
@@ -14223,43 +14304,75 @@ bool url_aggregator::set_host_or_hostname(const std::string_view input) {
1422314304 // Note: the 'found_colon' value is true if and only if a colon was
1422414305 // encountered while not inside brackets.
1422514306 if (found_colon) {
14307+ // If buffer is the empty string, host-missing validation error, return
14308+ // failure.
14309+ std::string_view host_buffer = host_view.substr(0, location);
14310+ if (host_buffer.empty()) {
14311+ return false;
14312+ }
14313+
14314+ // If state override is given and state override is hostname state, then
14315+ // return failure.
1422614316 if constexpr (override_hostname) {
1422714317 return false;
1422814318 }
14229- std::string_view sub_buffer = new_host.substr(location + 1);
14230- if (!sub_buffer.empty()) {
14231- set_port(sub_buffer);
14319+
14320+ // Let host be the result of host parsing buffer with url is not special.
14321+ bool succeeded = parse_host(host_buffer);
14322+ if (!succeeded) {
14323+ update_base_hostname(previous_host);
14324+ update_base_port(previous_port);
14325+ return false;
1423214326 }
14327+
14328+ // Set url's host to host, buffer to the empty string, and state to port
14329+ // state.
14330+ std::string_view port_buffer = new_host.substr(location + 1);
14331+ if (!port_buffer.empty()) {
14332+ set_port(port_buffer);
14333+ }
14334+ return true;
1423314335 }
14234- // If url is special and host_view is the empty string, validation error,
14235- // return failure. Otherwise, if state override is given, host_view is the
14236- // empty string, and either url includes credentials or url's port is
14237- // non-null, return.
14238- else if (host_view.empty() &&
14239- (is_special() || has_credentials() || has_port())) {
14240- return false;
14241- }
14336+ // Otherwise, if one of the following is true:
14337+ // - c is the EOF code point, U+002F (/), U+003F (?), or U+0023 (#)
14338+ // - url is special and c is U+005C (\)
14339+ else {
14340+ // If url is special and host_view is the empty string, host-missing
14341+ // validation error, return failure.
14342+ if (host_view.empty() && is_special()) {
14343+ return false;
14344+ }
14345+
14346+ // Otherwise, if state override is given, host_view is the empty string,
14347+ // and either url includes credentials or url's port is non-null, then
14348+ // return failure.
14349+ if (host_view.empty() && (has_credentials() || has_port())) {
14350+ return false;
14351+ }
1424214352
14243- // Let host be the result of host parsing host_view with url is not special.
14244- if (host_view.empty() && !is_special()) {
14245- if (has_hostname()) {
14246- clear_hostname(); // easy!
14353+ // Let host be the result of host parsing host_view with url is not
14354+ // special.
14355+ if (host_view.empty() && !is_special()) {
14356+ if (has_hostname()) {
14357+ clear_hostname(); // easy!
14358+ } else if (has_dash_dot()) {
14359+ add_authority_slashes_if_needed();
14360+ delete_dash_dot();
14361+ }
14362+ return true;
14363+ }
14364+
14365+ bool succeeded = parse_host(host_view);
14366+ if (!succeeded) {
14367+ update_base_hostname(previous_host);
14368+ update_base_port(previous_port);
14369+ return false;
1424714370 } else if (has_dash_dot()) {
14248- add_authority_slashes_if_needed();
14371+ // Should remove dash_dot from pathname
1424914372 delete_dash_dot();
1425014373 }
1425114374 return true;
1425214375 }
14253-
14254- bool succeeded = parse_host(host_view);
14255- if (!succeeded) {
14256- update_base_hostname(previous_host);
14257- update_base_port(previous_port);
14258- } else if (has_dash_dot()) {
14259- // Should remove dash_dot from pathname
14260- delete_dash_dot();
14261- }
14262- return succeeded;
1426314376 }
1426414377
1426514378 size_t location = new_host.find_first_of("/\\?");
0 commit comments