Skip to content

Commit 16cfc4e

Browse files
committed
Add URI::Generic#decoded_#{user,password}
URI::Generic#{user,password} return the encoded values, which are not that useful if you want to do authentication with them. Automatic decoding by default would break backwards compatibility. Optional automatic decoding via a keyword to URI.parse would require threading the option through at least 3 other methods, and would make semantics confusing (user= takes encoded or unencoded password?) or require more work. Thus, adding this as a separate method seemed the simplest approach. Unfortunately, URI lacks a method for correct decoding. Unlike in www form components, + in earlier parts of the URI such as the userinfo section is treated verbatim and not as an encoded space. Add URI.#{en,de}code_uri_component methods, which are almost the same as URI.#{en,de}code_www_form_component, but without the special SP => + handling. Implements [Feature #9045]
1 parent 92352e6 commit 16cfc4e

File tree

4 files changed

+106
-10
lines changed

4 files changed

+106
-10
lines changed

lib/uri/common.rb

Lines changed: 33 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -295,6 +295,7 @@ def self.regexp(schemes = nil)
295295
256.times do |i|
296296
TBLENCWWWCOMP_[-i.chr] = -('%%%02X' % i)
297297
end
298+
TBLENCURICOMP_ = TBLENCWWWCOMP_.dup.freeze
298299
TBLENCWWWCOMP_[' '] = '+'
299300
TBLENCWWWCOMP_.freeze
300301
TBLDECWWWCOMP_ = {} # :nodoc:
@@ -320,6 +321,33 @@ def self.regexp(schemes = nil)
320321
#
321322
# See URI.decode_www_form_component, URI.encode_www_form.
322323
def self.encode_www_form_component(str, enc=nil)
324+
_encode_uri_component(/[^*\-.0-9A-Z_a-z]/, TBLENCWWWCOMP_, str, enc)
325+
end
326+
327+
# Decodes given +str+ of URL-encoded form data.
328+
#
329+
# This decodes + to SP.
330+
#
331+
# See URI.encode_www_form_component, URI.decode_www_form.
332+
def self.decode_www_form_component(str, enc=Encoding::UTF_8)
333+
_decode_uri_component(/\+|%\h\h/, str, enc)
334+
end
335+
336+
# Encodes +str+ using URL encoding
337+
#
338+
# This encodes SP to %20 instead of +.
339+
def self.encode_uri_component(str, enc=nil)
340+
_encode_uri_component(/[^*\-.0-9A-Z_a-z]/, TBLENCURICOMP_, str, enc)
341+
end
342+
343+
# Decodes given +str+ of URL-encoded data.
344+
#
345+
# This does not decode + to SP.
346+
def self.decode_uri_component(str, enc=Encoding::UTF_8)
347+
_decode_uri_component(/%\h\h/, str, enc)
348+
end
349+
350+
def self._encode_uri_component(regexp, table, str, enc)
323351
str = str.to_s.dup
324352
if str.encoding != Encoding::ASCII_8BIT
325353
if enc && enc != Encoding::ASCII_8BIT
@@ -328,19 +356,16 @@ def self.encode_www_form_component(str, enc=nil)
328356
end
329357
str.force_encoding(Encoding::ASCII_8BIT)
330358
end
331-
str.gsub!(/[^*\-.0-9A-Z_a-z]/, TBLENCWWWCOMP_)
359+
str.gsub!(regexp, table)
332360
str.force_encoding(Encoding::US_ASCII)
333361
end
362+
private_class_method :_encode_uri_component
334363

335-
# Decodes given +str+ of URL-encoded form data.
336-
#
337-
# This decodes + to SP.
338-
#
339-
# See URI.encode_www_form_component, URI.decode_www_form.
340-
def self.decode_www_form_component(str, enc=Encoding::UTF_8)
364+
def self._decode_uri_component(regexp, str, enc)
341365
raise ArgumentError, "invalid %-encoding (#{str})" if /%(?!\h\h)/.match?(str)
342-
str.b.gsub(/\+|%\h\h/, TBLDECWWWCOMP_).force_encoding(enc)
366+
str.b.gsub(regexp, TBLDECWWWCOMP_).force_encoding(enc)
343367
end
368+
private_class_method :_decode_uri_component
344369

345370
# Generates URL-encoded form data from given +enum+.
346371
#

lib/uri/generic.rb

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -564,16 +564,26 @@ def userinfo
564564
end
565565
end
566566

567-
# Returns the user component.
567+
# Returns the user component (without URI decoding).
568568
def user
569569
@user
570570
end
571571

572-
# Returns the password component.
572+
# Returns the password component (without URI decoding).
573573
def password
574574
@password
575575
end
576576

577+
# Returns the user component after URI decoding.
578+
def decoded_user
579+
URI.decode_uri_component(@user) if @user
580+
end
581+
582+
# Returns the password component after URI decoding.
583+
def decoded_password
584+
URI.decode_uri_component(@password) if @password
585+
end
586+
577587
#
578588
# Checks the host +v+ component for RFC2396 compliance
579589
# and against the URI::Parser Regexp for :HOST.

test/uri/test_common.rb

Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -130,6 +130,58 @@ def test_decode_www_form_component
130130
assert_nothing_raised(ArgumentError){URI.decode_www_form_component("x"*(1024*1024))}
131131
end
132132

133+
def test_encode_uri_component
134+
assert_equal("%00%20%21%22%23%24%25%26%27%28%29*%2B%2C-.%2F09%3A%3B%3C%3D%3E%3F%40" \
135+
"AZ%5B%5C%5D%5E_%60az%7B%7C%7D%7E",
136+
URI.encode_uri_component("\x00 !\"\#$%&'()*+,-./09:;<=>?@AZ[\\]^_`az{|}~"))
137+
assert_equal("%95A", URI.encode_uri_component(
138+
"\x95\x41".force_encoding(Encoding::Shift_JIS)))
139+
assert_equal("0B", URI.encode_uri_component(
140+
"\x30\x42".force_encoding(Encoding::UTF_16BE)))
141+
assert_equal("%1B%24B%24%22%1B%28B", URI.encode_uri_component(
142+
"\e$B$\"\e(B".force_encoding(Encoding::ISO_2022_JP)))
143+
144+
assert_equal("%E3%81%82", URI.encode_uri_component(
145+
"\u3042", Encoding::ASCII_8BIT))
146+
assert_equal("%82%A0", URI.encode_uri_component(
147+
"\u3042", Encoding::Windows_31J))
148+
assert_equal("%E3%81%82", URI.encode_uri_component(
149+
"\u3042", Encoding::UTF_8))
150+
151+
assert_equal("%82%A0", URI.encode_uri_component(
152+
"\u3042".encode("sjis"), Encoding::ASCII_8BIT))
153+
assert_equal("%A4%A2", URI.encode_uri_component(
154+
"\u3042".encode("sjis"), Encoding::EUC_JP))
155+
assert_equal("%E3%81%82", URI.encode_uri_component(
156+
"\u3042".encode("sjis"), Encoding::UTF_8))
157+
assert_equal("B0", URI.encode_uri_component(
158+
"\u3042".encode("sjis"), Encoding::UTF_16LE))
159+
assert_equal("%26%23730%3B", URI.encode_uri_component(
160+
"\u02DA", Encoding::WINDOWS_1252))
161+
162+
# invalid
163+
assert_equal("%EF%BF%BD%EF%BF%BD", URI.encode_uri_component(
164+
"\xE3\x81\xFF", "utf-8"))
165+
assert_equal("%E6%9F%8A%EF%BF%BD%EF%BF%BD", URI.encode_uri_component(
166+
"\x95\x41\xff\xff".force_encoding(Encoding::Shift_JIS), "utf-8"))
167+
end
168+
169+
def test_decode_uri_component
170+
assert_equal(" +!\"\#$%&'()*+,-./09:;<=>?@AZ[\\]^_`az{|}~",
171+
URI.decode_uri_component(
172+
"%20+%21%22%23%24%25%26%27%28%29*%2B%2C-.%2F09%3A%3B%3C%3D%3E%3F%40" \
173+
"AZ%5B%5C%5D%5E_%60az%7B%7C%7D%7E"))
174+
assert_equal("\xA1\xA2".force_encoding(Encoding::EUC_JP),
175+
URI.decode_uri_component("%A1%A2", "EUC-JP"))
176+
assert_equal("\xE3\x81\x82\xE3\x81\x82".force_encoding("UTF-8"),
177+
URI.decode_uri_component("\xE3\x81\x82%E3%81%82".force_encoding("UTF-8")))
178+
179+
assert_raise(ArgumentError){URI.decode_uri_component("%")}
180+
assert_raise(ArgumentError){URI.decode_uri_component("%a")}
181+
assert_raise(ArgumentError){URI.decode_uri_component("x%a_")}
182+
assert_nothing_raised(ArgumentError){URI.decode_uri_component("x"*(1024*1024))}
183+
end
184+
133185
def test_encode_www_form
134186
assert_equal("a=1", URI.encode_www_form("a" => "1"))
135187
assert_equal("a=1", URI.encode_www_form(a: 1))

test/uri/test_parser.rb

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,15 @@ def test_parse_query_pct_encoded
5050
assert_raise(URI::InvalidURIError) { URI.parse('https://www.example.com/search?q=%XX') }
5151
end
5252

53+
def test_parse_auth
54+
str = "http://al%40ice:p%40s%25sword@example.com/dir%2Fname/subdir?foo=bar%40example.com"
55+
uri = URI.parse(str)
56+
assert_equal "al%40ice", uri.user
57+
assert_equal "p%40s%25sword", uri.password
58+
assert_equal "al@ice", uri.decoded_user
59+
assert_equal "p@s%sword", uri.decoded_password
60+
end
61+
5362
def test_raise_bad_uri_for_integer
5463
assert_raise(URI::InvalidURIError) do
5564
URI.parse(1)

0 commit comments

Comments
 (0)