Victory
diff --git a/‎src/libcore/char.rs‎
Lines changed: 58 additions & 38 deletions b/‎src/libcore/char.rs‎
Lines changed: 58 additions & 38 deletions
diff --git a/‎src/libcore/str/mod.rs‎
Lines changed: 67 additions & 49 deletions b/‎src/libcore/str/mod.rs‎
Lines changed: 67 additions & 49 deletions
diff --git a/‎src/libstd/ffi/mod.rs‎
Lines changed: 5 additions & 0 deletions b/‎src/libstd/ffi/mod.rs‎
Lines changed: 5 additions & 0 deletions
@@ -258,49 +258,69 @@ impl CharExt for char {
  #[inline]
  #[unstable = "pending decision about Iterator/Writer/Reader"]
  fn encode_utf8(self, dst: &mut [u8]) -> Option<uint> {
- // Marked #[inline] to allow llvm optimizing it away
- let code = self as u32;
- if code < MAX_ONE_B && dst.len() >= 1 {
- dst[0] = code as u8;
- Some(1)
- } else if code < MAX_TWO_B && dst.len() >= 2 {
- dst[0] = (code >> 6u & 0x1F_u32) as u8 | TAG_TWO_B;
- dst[1] = (code & 0x3F_u32) as u8 | TAG_CONT;
- Some(2)
- } else if code < MAX_THREE_B && dst.len() >= 3 {
- dst[0] = (code >> 12u & 0x0F_u32) as u8 | TAG_THREE_B;
- dst[1] = (code >> 6u & 0x3F_u32) as u8 | TAG_CONT;
- dst[2] = (code & 0x3F_u32) as u8 | TAG_CONT;
- Some(3)
- } else if dst.len() >= 4 {
- dst[0] = (code >> 18u & 0x07_u32) as u8 | TAG_FOUR_B;
- dst[1] = (code >> 12u & 0x3F_u32) as u8 | TAG_CONT;
- dst[2] = (code >> 6u & 0x3F_u32) as u8 | TAG_CONT;
- dst[3] = (code & 0x3F_u32) as u8 | TAG_CONT;
- Some(4)
- } else {
- None
- }
+ encode_utf8_raw(self as u32, dst)
  }
 
  #[inline]
  #[unstable = "pending decision about Iterator/Writer/Reader"]
  fn encode_utf16(self, dst: &mut [u16]) -> Option<uint> {
- // Marked #[inline] to allow llvm optimizing it away
- let mut ch = self as u32;
- if (ch & 0xFFFF_u32) == ch && dst.len() >= 1 {
- // The BMP falls through (assuming non-surrogate, as it should)
- dst[0] = ch as u16;
- Some(1)
- } else if dst.len() >= 2 {
- // Supplementary planes break into surrogates.
- ch -= 0x1_0000_u32;
- dst[0] = 0xD800_u16 | ((ch >> 10) as u16);
- dst[1] = 0xDC00_u16 | ((ch as u16) & 0x3FF_u16);
- Some(2)
- } else {
- None
- }
+ encode_utf16_raw(self as u32, dst)
+ }
+}
+
+/// Encodes a raw u32 value as UTF-8 into the provided byte buffer,
+/// and then returns the number of bytes written.
+///
+/// If the buffer is not large enough, nothing will be written into it
+/// and a `None` will be returned.
+#[inline]
+#[unstable]
+pub fn encode_utf8_raw(code: u32, dst: &mut [u8]) -> Option<uint> {
+ // Marked #[inline] to allow llvm optimizing it away
+ if code < MAX_ONE_B && dst.len() >= 1 {
+ dst[0] = code as u8;
+ Some(1)
+ } else if code < MAX_TWO_B && dst.len() >= 2 {
+ dst[0] = (code >> 6u & 0x1F_u32) as u8 | TAG_TWO_B;
+ dst[1] = (code & 0x3F_u32) as u8 | TAG_CONT;
+ Some(2)
+ } else if code < MAX_THREE_B && dst.len() >= 3 {
+ dst[0] = (code >> 12u & 0x0F_u32) as u8 | TAG_THREE_B;
+ dst[1] = (code >> 6u & 0x3F_u32) as u8 | TAG_CONT;
+ dst[2] = (code & 0x3F_u32) as u8 | TAG_CONT;
+ Some(3)
+ } else if dst.len() >= 4 {
+ dst[0] = (code >> 18u & 0x07_u32) as u8 | TAG_FOUR_B;
+ dst[1] = (code >> 12u & 0x3F_u32) as u8 | TAG_CONT;
+ dst[2] = (code >> 6u & 0x3F_u32) as u8 | TAG_CONT;
+ dst[3] = (code & 0x3F_u32) as u8 | TAG_CONT;
+ Some(4)
+ } else {
+ None
+ }
+}
+
+/// Encodes a raw u32 value as UTF-16 into the provided `u16` buffer,
+/// and then returns the number of `u16`s written.
+///
+/// If the buffer is not large enough, nothing will be written into it
+/// and a `None` will be returned.
+#[inline]
+#[unstable]
+pub fn encode_utf16_raw(mut ch: u32, dst: &mut [u16]) -> Option<uint> {
+ // Marked #[inline] to allow llvm optimizing it away
+ if (ch & 0xFFFF_u32) == ch && dst.len() >= 1 {
+ // The BMP falls through (assuming non-surrogate, as it should)
+ dst[0] = ch as u16;
+ Some(1)
+ } else if dst.len() >= 2 {
+ // Supplementary planes break into surrogates.
+ ch -= 0x1_0000_u32;
+ dst[0] = 0xD800_u16 | ((ch >> 10) as u16);
+ dst[1] = 0xDC00_u16 | ((ch as u16) & 0x3FF_u16);
+ Some(2)
+ } else {
+ None
  }
 }
 
 
@@ -305,43 +305,52 @@ fn unwrap_or_0(opt: Option<&u8>) -> u8 {
  }
 }
 
+/// Reads the next code point out of a byte iterator (assuming a
+/// UTF-8-like encoding).
+#[unstable]
+pub fn next_code_point(bytes: &mut slice::Iter<u8>) -> Option<u32> {
+ // Decode UTF-8
+ let x = match bytes.next() {
+ None => return None,
+ Some(&next_byte) if next_byte < 128 => return Some(next_byte as u32),
+ Some(&next_byte) => next_byte,
+ };
+
+ // Multibyte case follows
+ // Decode from a byte combination out of: [[[x y] z] w]
+ // NOTE: Performance is sensitive to the exact formulation here
+ let init = utf8_first_byte!(x, 2);
+ let y = unwrap_or_0(bytes.next());
+ let mut ch = utf8_acc_cont_byte!(init, y);
+ if x >= 0xE0 {
+ // [[x y z] w] case
+ // 5th bit in 0xE0 .. 0xEF is always clear, so `init` is still valid
+ let z = unwrap_or_0(bytes.next());
+ let y_z = utf8_acc_cont_byte!((y & CONT_MASK) as u32, z);
+ ch = init << 12 | y_z;
+ if x >= 0xF0 {
+ // [x y z w] case
+ // use only the lower 3 bits of `init`
+ let w = unwrap_or_0(bytes.next());
+ ch = (init & 7) << 18 | utf8_acc_cont_byte!(y_z, w);
+ }
+ }
+
+ Some(ch)
+}
+
 #[stable]
 impl<'a> Iterator for Chars<'a> {
  type Item = char;
 
  #[inline]
  fn next(&mut self) -> Option<char> {
- // Decode UTF-8, using the valid UTF-8 invariant
- let x = match self.iter.next() {
- None => return None,
- Some(&next_byte) if next_byte < 128 => return Some(next_byte as char),
- Some(&next_byte) => next_byte,
- };
-
- // Multibyte case follows
- // Decode from a byte combination out of: [[[x y] z] w]
- // NOTE: Performance is sensitive to the exact formulation here
- let init = utf8_first_byte!(x, 2);
- let y = unwrap_or_0(self.iter.next());
- let mut ch = utf8_acc_cont_byte!(init, y);
- if x >= 0xE0 {
- // [[x y z] w] case
- // 5th bit in 0xE0 .. 0xEF is always clear, so `init` is still valid
- let z = unwrap_or_0(self.iter.next());
- let y_z = utf8_acc_cont_byte!((y & CONT_MASK) as u32, z);
- ch = init << 12 | y_z;
- if x >= 0xF0 {
- // [x y z w] case
- // use only the lower 3 bits of `init`
- let w = unwrap_or_0(self.iter.next());
- ch = (init & 7) << 18 | utf8_acc_cont_byte!(y_z, w);
+ next_code_point(&mut self.iter).map(|ch| {
+ // str invariant says `ch` is a valid Unicode Scalar Value
+ unsafe {
+ mem::transmute(ch)
  }
- }
-
- // str invariant says `ch` is a valid Unicode Scalar Value
- unsafe {
- Some(mem::transmute(ch))
- }
+ })
  }
 
  #[inline]
@@ -1517,25 +1526,8 @@ impl StrExt for str {
 
  #[inline]
  fn char_range_at(&self, i: uint) -> CharRange {
- if self.as_bytes()[i] < 128u8 {
- return CharRange {ch: self.as_bytes()[i] as char, next: i + 1 };
- }
-
- // Multibyte case is a fn to allow char_range_at to inline cleanly
- fn multibyte_char_range_at(s: &str, i: uint) -> CharRange {
- let mut val = s.as_bytes()[i] as u32;
- let w = UTF8_CHAR_WIDTH[val as uint] as uint;
- assert!((w != 0));
-
- val = utf8_first_byte!(val, w);
- val = utf8_acc_cont_byte!(val, s.as_bytes()[i + 1]);
- if w > 2 { val = utf8_acc_cont_byte!(val, s.as_bytes()[i + 2]); }
- if w > 3 { val = utf8_acc_cont_byte!(val, s.as_bytes()[i + 3]); }
-
- return CharRange {ch: unsafe { mem::transmute(val) }, next: i + w};
- }
-
- return multibyte_char_range_at(self, i);
+ let (c, n) = char_range_at_raw(self.as_bytes(), i);
+ CharRange { ch: unsafe { mem::transmute(c) }, next: n }
  }
 
  #[inline]
@@ -1653,6 +1645,32 @@ impl StrExt for str {
  fn parse<T: FromStr>(&self) -> Option<T> { FromStr::from_str(self) }
 }
 
+/// Pluck a code point out of a UTF-8-like byte slice and return the
+/// index of the next code point.
+#[inline]
+#[unstable]
+pub fn char_range_at_raw(bytes: &[u8], i: uint) -> (u32, usize) {
+ if bytes[i] < 128u8 {
+ return (bytes[i] as u32, i + 1);
+ }
+
+ // Multibyte case is a fn to allow char_range_at to inline cleanly
+ fn multibyte_char_range_at(bytes: &[u8], i: uint) -> (u32, usize) {
+ let mut val = bytes[i] as u32;
+ let w = UTF8_CHAR_WIDTH[val as uint] as uint;
+ assert!((w != 0));
+
+ val = utf8_first_byte!(val, w);
+ val = utf8_acc_cont_byte!(val, bytes[i + 1]);
+ if w > 2 { val = utf8_acc_cont_byte!(val, bytes[i + 2]); }
+ if w > 3 { val = utf8_acc_cont_byte!(val, bytes[i + 3]); }
+
+ return (val, i + w);
+ }
+
+ multibyte_char_range_at(bytes, i)
+}
+
 #[stable]
 impl<'a> Default for &'a str {
  #[stable]
 
@@ -17,4 +17,9 @@ pub use self::c_str::CString;
 pub use self::c_str::c_str_to_bytes;
 pub use self::c_str::c_str_to_bytes_with_nul;
 
+pub use self::os_str::OsString;
+pub use self::os_str::OsStr;
+pub use self::os_str::AsOsStr;
+
 mod c_str;
+mod os_str;