@@ -66,7 +66,7 @@ use core::{fmt, mem, ops, slice, str};
6666/// /// https://url.spec.whatwg.org/#fragment-percent-encode-set
6767/// const FRAGMENT: &AsciiSet = &CONTROLS.add(b' ').add(b'"').add(b'<').add(b'>').add(b'`');
6868/// ```
69- #[ derive( Debug , PartialEq , Eq ) ]
69+ #[ derive( Clone , Copy , Debug , PartialEq , Eq ) ]
7070pub struct AsciiSet {
7171 mask : [ Chunk ; ASCII_RANGE_LEN / BITS_PER_CHUNK ] ,
7272}
@@ -79,7 +79,7 @@ const BITS_PER_CHUNK: usize = 8 * mem::size_of::<Chunk>();
7979
8080impl AsciiSet {
8181 /// An empty set.
82- pub const EMPTY : AsciiSet = AsciiSet {
82+ pub const EMPTY : & ' static AsciiSet = & AsciiSet {
8383 mask : [ 0 ; ASCII_RANGE_LEN / BITS_PER_CHUNK ] ,
8484 } ;
8585
@@ -108,7 +108,7 @@ impl AsciiSet {
108108 }
109109
110110 /// Return the union of two sets.
111- pub const fn union ( & self , other : Self ) -> Self {
111+ pub const fn union ( & self , other : & Self ) -> Self {
112112 let mask = [
113113 self . mask [ 0 ] | other. mask [ 0 ] ,
114114 self . mask [ 1 ] | other. mask [ 1 ] ,
@@ -128,15 +128,31 @@ impl AsciiSet {
128128impl ops:: Add for AsciiSet {
129129 type Output = Self ;
130130
131- fn add ( self , other : Self ) -> Self {
131+ fn add ( self , other : Self ) -> Self :: Output {
132+ self . union ( & other)
133+ }
134+ }
135+
136+ impl ops:: Add for & AsciiSet {
137+ type Output = AsciiSet ;
138+
139+ fn add ( self , other : Self ) -> Self :: Output {
132140 self . union ( other)
133141 }
134142}
135143
136144impl ops:: Not for AsciiSet {
137145 type Output = Self ;
138146
139- fn not ( self ) -> Self {
147+ fn not ( self ) -> Self :: Output {
148+ self . complement ( )
149+ }
150+ }
151+
152+ impl ops:: Not for & AsciiSet {
153+ type Output = AsciiSet ;
154+
155+ fn not ( self ) -> Self :: Output {
140156 self . complement ( )
141157 }
142158}
@@ -268,7 +284,7 @@ pub fn percent_encode_byte(byte: u8) -> &'static str {
268284/// assert_eq!(percent_encode(b"foo bar?", NON_ALPHANUMERIC).to_string(), "foo%20bar%3F");
269285/// ```
270286#[ inline]
271- pub fn percent_encode < ' a > ( input : & ' a [ u8 ] , ascii_set : & ' static AsciiSet ) -> PercentEncode < ' a > {
287+ pub fn percent_encode < ' a > ( input : & ' a [ u8 ] , ascii_set : & ' a AsciiSet ) -> PercentEncode < ' a > {
272288 PercentEncode {
273289 bytes : input,
274290 ascii_set,
@@ -287,15 +303,15 @@ pub fn percent_encode<'a>(input: &'a [u8], ascii_set: &'static AsciiSet) -> Perc
287303/// assert_eq!(utf8_percent_encode("foo bar?", NON_ALPHANUMERIC).to_string(), "foo%20bar%3F");
288304/// ```
289305#[ inline]
290- pub fn utf8_percent_encode < ' a > ( input : & ' a str , ascii_set : & ' static AsciiSet ) -> PercentEncode < ' a > {
306+ pub fn utf8_percent_encode < ' a > ( input : & ' a str , ascii_set : & ' a AsciiSet ) -> PercentEncode < ' a > {
291307 percent_encode ( input. as_bytes ( ) , ascii_set)
292308}
293309
294310/// The return type of [`percent_encode`] and [`utf8_percent_encode`].
295311#[ derive( Clone ) ]
296312pub struct PercentEncode < ' a > {
297313 bytes : & ' a [ u8 ] ,
298- ascii_set : & ' static AsciiSet ,
314+ ascii_set : & ' a AsciiSet ,
299315}
300316
301317impl < ' a > Iterator for PercentEncode < ' a > {
@@ -372,6 +388,19 @@ pub fn percent_decode_str(input: &str) -> PercentDecode<'_> {
372388 percent_decode ( input. as_bytes ( ) )
373389}
374390
391+ /// Percent-decode the given string preserving the given ascii_set.
392+ ///
393+ /// <https://url.spec.whatwg.org/#string-percent-decode>
394+ ///
395+ /// See [`percent_decode`] regarding the return type.
396+ #[ inline]
397+ pub fn percent_decode_str_with_set < ' a > (
398+ input : & ' a str ,
399+ ascii_set : & ' a AsciiSet ,
400+ ) -> PercentDecode < ' a > {
401+ percent_decode_with_set ( input. as_bytes ( ) , ascii_set)
402+ }
403+
375404/// Percent-decode the given bytes.
376405///
377406/// <https://url.spec.whatwg.org/#percent-decode>
@@ -394,13 +423,44 @@ pub fn percent_decode_str(input: &str) -> PercentDecode<'_> {
394423pub fn percent_decode ( input : & [ u8 ] ) -> PercentDecode < ' _ > {
395424 PercentDecode {
396425 bytes : input. iter ( ) ,
426+ ascii_set : None ,
427+ }
428+ }
429+
430+ /// Percent-decode the given bytes preserving the given ascii_set.
431+ ///
432+ /// <https://url.spec.whatwg.org/#percent-decode>
433+ ///
434+ /// Any sequence of `%` followed by two hexadecimal digits expect for the given [AsciiSet] is decoded.
435+ /// The return type:
436+ ///
437+ /// * Implements `Into<Cow<u8>>` borrowing `input` when it contains no percent-encoded sequence,
438+ /// * Implements `Iterator<Item = u8>` and therefore has a `.collect::<Vec<u8>>()` method,
439+ /// * Has `decode_utf8()` and `decode_utf8_lossy()` methods.
440+ ///
441+ /// # Examples
442+ ///
443+ /// ```
444+ /// use percent_encoding::{percent_decode_with_set, NON_ALPHANUMERIC};
445+ ///
446+ /// assert_eq!(percent_decode_with_set(b"%66oo%20bar%3f", &!NON_ALPHANUMERIC).decode_utf8().unwrap(), "%66oo bar?");
447+ /// ```
448+ #[ inline]
449+ pub fn percent_decode_with_set < ' a > (
450+ input : & ' a [ u8 ] ,
451+ ascii_set : & ' a AsciiSet ,
452+ ) -> PercentDecode < ' a > {
453+ PercentDecode {
454+ bytes : input. iter ( ) ,
455+ ascii_set : Some ( ascii_set) ,
397456 }
398457}
399458
400459/// The return type of [`percent_decode`].
401460#[ derive( Clone , Debug ) ]
402461pub struct PercentDecode < ' a > {
403462 bytes : slice:: Iter < ' a , u8 > ,
463+ ascii_set : Option < & ' a AsciiSet > ,
404464}
405465
406466fn after_percent_sign ( iter : & mut slice:: Iter < ' _ , u8 > ) -> Option < u8 > {
@@ -411,13 +471,35 @@ fn after_percent_sign(iter: &mut slice::Iter<'_, u8>) -> Option<u8> {
411471 Some ( h as u8 * 0x10 + l as u8 )
412472}
413473
474+ fn after_percent_sign_lookahead < ' a > (
475+ iter : & mut slice:: Iter < ' a , u8 > ,
476+ ) -> Option < ( u8 , slice:: Iter < ' a , u8 > ) > {
477+ let mut cloned_iter = iter. clone ( ) ;
478+ let h = char:: from ( * cloned_iter. next ( ) ?) . to_digit ( 16 ) ?;
479+ let l = char:: from ( * cloned_iter. next ( ) ?) . to_digit ( 16 ) ?;
480+ Some ( ( h as u8 * 0x10 + l as u8 , cloned_iter) )
481+ }
482+
414483impl < ' a > Iterator for PercentDecode < ' a > {
415484 type Item = u8 ;
416485
417486 fn next ( & mut self ) -> Option < u8 > {
418487 self . bytes . next ( ) . map ( |& byte| {
419- if byte == b'%' {
420- after_percent_sign ( & mut self . bytes ) . unwrap_or ( byte)
488+ if byte != b'%' {
489+ return byte;
490+ }
491+
492+ let Some ( ( decoded_byte, iter) ) = after_percent_sign_lookahead ( & mut self . bytes ) else {
493+ return byte;
494+ } ;
495+
496+ let should_decode = self
497+ . ascii_set
498+ . map_or ( true , |ascii_set| !ascii_set. contains ( decoded_byte) ) ;
499+
500+ if should_decode {
501+ self . bytes = iter;
502+ decoded_byte
421503 } else {
422504 byte
423505 }
@@ -447,11 +529,20 @@ impl<'a> PercentDecode<'a> {
447529 let mut bytes_iter = self . bytes . clone ( ) ;
448530 while bytes_iter. any ( |& b| b == b'%' ) {
449531 if let Some ( decoded_byte) = after_percent_sign ( & mut bytes_iter) {
532+ if let Some ( ascii_set) = self . ascii_set {
533+ if ascii_set. contains ( decoded_byte) {
534+ continue ;
535+ }
536+ }
537+
450538 let initial_bytes = self . bytes . as_slice ( ) ;
451539 let unchanged_bytes_len = initial_bytes. len ( ) - bytes_iter. len ( ) - 3 ;
452540 let mut decoded = initial_bytes[ ..unchanged_bytes_len] . to_owned ( ) ;
453541 decoded. push ( decoded_byte) ;
454- decoded. extend ( PercentDecode { bytes : bytes_iter } ) ;
542+ decoded. extend ( PercentDecode {
543+ bytes : bytes_iter,
544+ ascii_set : self . ascii_set ,
545+ } ) ;
455546 return Some ( decoded) ;
456547 }
457548 }
@@ -542,8 +633,8 @@ mod tests {
542633 /// useful for defining sets in a modular way.
543634 #[ test]
544635 fn union ( ) {
545- const A : AsciiSet = AsciiSet :: EMPTY . add ( b'A' ) ;
546- const B : AsciiSet = AsciiSet :: EMPTY . add ( b'B' ) ;
636+ const A : & AsciiSet = & AsciiSet :: EMPTY . add ( b'A' ) ;
637+ const B : & AsciiSet = & AsciiSet :: EMPTY . add ( b'B' ) ;
547638 const UNION : AsciiSet = A . union ( B ) ;
548639 const EXPECTED : AsciiSet = AsciiSet :: EMPTY . add ( b'A' ) . add ( b'B' ) ;
549640 assert_eq ! ( UNION , EXPECTED ) ;
0 commit comments