rust-lang · Jules-Bertholet · Mar 16, 2024 · Mar 16, 2024 · Mar 16, 2024 · Mar 16, 2024
diff --git a/library/alloc/src/lib.rs b/library/alloc/src/lib.rs
@@ -160,6 +160,7 @@
 #![feature(std_internals)]
 #![feature(str_internals)]
 #![feature(strict_provenance)]
+#![feature(titlecase)]
 #![feature(trusted_fused)]
 #![feature(trusted_len)]
 #![feature(trusted_random_access)]

diff --git a/library/alloc/src/str.rs b/library/alloc/src/str.rs
@@ -410,9 +410,9 @@ impl str {
  }
 
  fn case_ignorable_then_cased<I: Iterator<Item = char>>(iter: I) -> bool {
- use core::unicode::{Case_Ignorable, Cased};
+ use core::unicode::Case_Ignorable;
  match iter.skip_while(|&c| Case_Ignorable(c)).next() {
- Some(c) => Cased(c),
+ Some(c) => c.is_cased(),
  None => false,
  }
  }

diff --git a/library/core/src/char/methods.rs b/library/core/src/char/methods.rs
@@ -742,8 +742,73 @@ impl char {
  #[inline]
  pub fn is_alphabetic(self) -> bool {
  match self {
- 'a'..='z' | 'A'..='Z' => true,
- c => c > '\x7f' && unicode::Alphabetic(c),
+ 'A'..='Z' | 'a'..='z' => true,
+ '\0'..='\u{A9}' => false,
+ _ => unicode::Alphabetic(self),
+ }
+ }
+
+ /// Returns `true` if this `char` has the `Cased` property.
+ /// A character is cased if and only if it is uppercase, lowercase, or titlecase.
+ ///
+ /// `Cased` is described in Chapter 3 (Conformance) of the [Unicode Standard] and
+ /// specified in the [Unicode Character Database][ucd] [`DerivedCoreProperties.txt`].
+ ///
+ /// [Unicode Standard]: https://www.unicode.org/versions/latest/
+ /// [ucd]: https://www.unicode.org/reports/tr44/
+ /// [`DerivedCoreProperties.txt`]: https://www.unicode.org/Public/UCD/latest/ucd/DerivedCoreProperties.txt
+ ///
+ /// # Examples
+ ///
+ /// Basic usage:
+ ///
+ /// ```
+ /// #![feature(titlecase)]
+ /// assert!('A'.is_cased());
+ /// assert!('a'.is_cased());
+ /// assert!(!'京'.is_cased());
+ /// ```
+ #[must_use]
+ #[unstable(feature = "titlecase", issue = "none")]
+ #[inline]
+ pub fn is_cased(self) -> bool {
+ match self {
+ 'A'..='Z' | 'a'..='z' => true,
+ '\0'..='\u{A9}' => false,
+ _ => unicode::Cased(self),
+ }
+ }
+
+ /// Returns the case of this character:
+ /// [`Some(CharCase::Upper)`][`CharCase::Upper`] if [`self.is_uppercase()`][`char::is_uppercase`],
+ /// [`Some(CharCase::Lower)`][`CharCase::Lower`] if [`self.is_lowercase()`][`char::is_lowercase`],
+ /// [`Some(CharCase::Title)`][`CharCase::Title`] if [`self.is_titlecase()`][`char::is_titlecase`], and
+ /// `None` if [`!self.is_cased()`][`char::is_cased`].
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// #![feature(titlecase)]
+ /// use core::char::CharCase;
+ /// assert_eq!('a'.case(), Some(CharCase::Lower));
+ /// assert_eq!('δ'.case(), Some(CharCase::Lower));
+ /// assert_eq!('A'.case(), Some(CharCase::Upper));
+ /// assert_eq!('Δ'.case(), Some(CharCase::Upper));
+ /// assert_eq!('ǅ'.case(), Some(CharCase::Title));
+ /// assert_eq!('中'.case(), None);
+ /// ```
+ #[must_use]
+ #[unstable(feature = "titlecase", issue = "none")]
+ #[inline]
+ pub fn case(self) -> Option<CharCase> {
+ match self {
+ 'A'..='Z' => Some(CharCase::Upper),
+ 'a'..='z' => Some(CharCase::Lower),
+ '\0'..='\u{A9}' => None,
+ _ if !self.is_cased() => None,
+ _ if self.is_lowercase() => Some(CharCase::Lower),
+ _ if self.is_uppercase() => Some(CharCase::Upper),
+ _ => Some(CharCase::Title),
  }
  }
 
@@ -785,7 +850,41 @@ impl char {
  pub const fn is_lowercase(self) -> bool {
  match self {
  'a'..='z' => true,
- c => c > '\x7f' && unicode::Lowercase(c),
+ '\0'..='\u{A9}' => false,
+ _ => unicode::Lowercase(self),
+ }
+ }
+
+ /// Returns `true` if this `char` has the general category for titlecase letters.
+ ///
+ /// Titlecase letters (code points with the general category of `Lt`) are described in Chapter 4
+ /// (Character Properties) of the [Unicode Standard] and specified in the [Unicode Character
+ /// Database][ucd] [`UnicodeData.txt`].
+ ///
+ /// [Unicode Standard]: https://www.unicode.org/versions/latest/
+ /// [ucd]: https://www.unicode.org/reports/tr44/
+ /// [`UnicodeData.txt`]: https://www.unicode.org/Public/UCD/latest/ucd/UnicodeData.txt
+ ///
+ /// # Examples
+ ///
+ /// Basic usage:
+ ///
+ /// ```
+ /// #![feature(titlecase)]
+ /// assert!('ǅ'.is_titlecase());
+ /// assert!('ᾨ'.is_titlecase());
+ /// assert!(!'D'.is_titlecase());
+ /// assert!(!'z'.is_titlecase());
+ /// assert!(!'中'.is_titlecase());
+ /// assert!(!' '.is_titlecase());
+ /// ```
+ #[must_use]
+ #[unstable(feature = "titlecase", issue = "none")]
+ #[inline]
+ pub fn is_titlecase(self) -> bool {
+ match self {
+ '\0'..='\u{01C4}' => false,
+ _ => self.is_cased() && !self.is_lowercase() && !self.is_uppercase(),
  }
  }
 
@@ -827,7 +926,8 @@ impl char {
  pub const fn is_uppercase(self) -> bool {
  match self {
  'A'..='Z' => true,
- c => c > '\x7f' && unicode::Uppercase(c),
+ '\0'..='\u{BF}' => false,
+ _ => unicode::Uppercase(self),
  }
  }
 
@@ -859,7 +959,8 @@ impl char {
  pub fn is_whitespace(self) -> bool {
  match self {
  ' ' | '\x09'..='\x0d' => true,
- c => c > '\x7f' && unicode::White_Space(c),
+ '\0'..='\u{84}' => false,
+ _ => unicode::White_Space(self),
  }
  }
 
@@ -927,7 +1028,7 @@ impl char {
  #[must_use]
  #[inline]
  pub(crate) fn is_grapheme_extended(self) -> bool {
- self > '\x7f' && unicode::Grapheme_Extend(self)
+ self > '\u{02FF}' && unicode::Grapheme_Extend(self)
  }
 
  /// Returns `true` if this `char` has one of the general categories for numbers.
@@ -969,12 +1070,14 @@ impl char {
  pub fn is_numeric(self) -> bool {
  match self {
  '0'..='9' => true,
- c => c > '\x7f' && unicode::N(c),
+ '\0'..='\u{B1}' => false,
+ _ => unicode::N(self),
  }
  }
 
  /// Returns an iterator that yields the lowercase mapping of this `char` as one or more
- /// `char`s.
+ /// `char`s. The iterator also has implementations of [`Display`][core::fmt::Display]
+ /// and [`PartialEq`].
  ///
  /// If this `char` does not have a lowercase mapping, the iterator yields the same `char`.
  ///
@@ -1032,16 +1135,138 @@ impl char {
  /// // convert into themselves.
  /// assert_eq!('山'.to_lowercase().to_string(), "山");
  /// ```
- #[must_use = "this returns the lowercase character as a new iterator, \
+ ///
+ /// Check if a string is in lowercase:
+ ///
+ /// ```
+ /// let s = "abcde\u{0301} 山";
+ /// assert!(s.chars().all(|c| c.to_lowercase() == c));
+ /// ```
+ #[must_use = "this returns the lowercased character as a new iterator, \
  without modifying the original"]
  #[stable(feature = "rust1", since = "1.0.0")]
  #[inline]
  pub fn to_lowercase(self) -> ToLowercase {
  ToLowercase(CaseMappingIter::new(conversions::to_lower(self)))
  }
 
+ /// Returns an iterator that yields the titlecase mapping of this `char` as one or more
+ /// `char`s. The iterator also has implementations of [`Display`][core::fmt::Display]
+ /// and [`PartialEq`].
+ ///
+ /// If this `char` does not have an titlecase mapping, the iterator yields the same `char`.
+ ///
+ /// If this `char` has a one-to-one titlecase mapping given by the [Unicode Character
+ /// Database][ucd] [`UnicodeData.txt`], the iterator yields that `char`.
+ ///
+ /// [ucd]: https://www.unicode.org/reports/tr44/
+ /// [`UnicodeData.txt`]: https://www.unicode.org/Public/UCD/latest/ucd/UnicodeData.txt
+ ///
+ /// If this `char` requires special considerations (e.g. multiple `char`s) the iterator yields
+ /// the `char`(s) given by [`SpecialCasing.txt`].
+ ///
+ /// [`SpecialCasing.txt`]: https://www.unicode.org/Public/UCD/latest/ucd/SpecialCasing.txt
+ ///
+ /// This operation performs an unconditional mapping without tailoring. That is, the conversion
+ /// is independent of context and language.
+ ///
+ /// In the [Unicode Standard], Chapter 4 (Character Properties) discusses case mapping in
+ /// general and Chapter 3 (Conformance) discusses the default algorithm for case conversion.
+ ///
+ /// [Unicode Standard]: https://www.unicode.org/versions/latest/
+ ///
+ /// # Examples
+ ///
+ /// As an iterator:
+ ///
+ /// ```
+ /// #![feature(titlecase)]
+ /// for c in 'ß'.to_titlecase() {
+ /// print!("{c}");
+ /// }
+ /// println!();
+ /// ```
+ ///
+ /// Using `println!` directly:
+ ///
+ /// ```
+ /// #![feature(titlecase)]
+ /// println!("{}", 'ß'.to_titlecase());
+ /// ```
+ ///
+ /// Both are equivalent to:
+ ///
+ /// ```
+ /// #![feature(titlecase)]
+ /// println!("Ss");
+ /// ```
+ ///
+ /// Using [`to_string`](../std/string/trait.ToString.html#tymethod.to_string):
+ ///
+ /// ```
+ /// #![feature(titlecase)]
+ /// assert_eq!('c'.to_titlecase().to_string(), "C");
+ ///
+ /// // Sometimes the result is more than one character:
+ /// assert_eq!('ß'.to_titlecase().to_string(), "Ss");
+ ///
+ /// // Characters that do not have separate cased forms
+ /// // convert into themselves.
+ /// assert_eq!('山'.to_titlecase().to_string(), "山");
+ /// ```
+ ///
+ /// Check if a word is in titlecase:
+ ///
+ /// ```
+ /// #![feature(titlecase)]
+ /// let word = "Dross";
+ /// let mut chars = word.chars();
+ /// let first_cased_char = chars.find(|c| c.is_cased());
+ /// let word_is_in_titlecase = if let Some(f) = first_cased_char {
+ /// f.to_titlecase() == f && chars.all(|c| c.to_lowercase() == c)
+ /// } else {
+ /// true
+ /// };
+ /// assert!(word_is_in_titlecase);
+ /// ```
+ ///
+ /// # Note on locale
+ ///
+ /// In Turkish and Azeri, the equivalent of 'i' in Latin has five forms instead of two:
+ ///
+ /// * 'Dotless': I / ı, sometimes written ï
+ /// * 'Dotted': İ / i
+ ///
+ /// Note that the lowercase dotted 'i' is the same as the Latin. Therefore:
+ ///
+ /// ```
+ /// #![feature(titlecase)]
+ /// let upper_i = 'i'.to_titlecase().to_string();
+ /// ```
+ ///
+ /// The value of `upper_i` here relies on the language of the text: if we're
+ /// in `en-US`, it should be `"I"`, but if we're in `tr-TR` or `az-AZ`, it should
+ /// be `"İ"`. `to_titlecase()` does not take this into account, and so:
+ ///
+ /// ```
+ /// #![feature(titlecase)]
+ /// let upper_i = 'i'.to_titlecase().to_string();
+ ///
+ /// assert_eq!(upper_i, "I");
+ /// ```
+ ///
+ /// holds across languages.
+ #[must_use = "this returns the titlecased character as a new iterator, \
+ without modifying the original"]
+ #[unstable(feature = "titlecase", issue = "none")]
+ #[inline]
+ pub fn to_titlecase(self) -> ToTitlecase {
+ ToTitlecase(CaseMappingIter::new(conversions::to_title(self)))
+ }
+
  /// Returns an iterator that yields the uppercase mapping of this `char` as one or more
- /// `char`s.
+ /// `char`s. The iterator also has implementations of [`Display`][core::fmt::Display]
+ /// and [`PartialEq`].
  ///
  /// If this `char` does not have an uppercase mapping, the iterator yields the same `char`.
  ///
@@ -1100,9 +1325,16 @@ impl char {
  /// assert_eq!('山'.to_uppercase().to_string(), "山");
  /// ```
  ///
+ /// Check if a string is in uppercase:
+ ///
+ /// ```
+ /// let s = "ABCDE\u{0301} 山";
+ /// assert!(s.chars().all(|c| c.to_uppercase() == c));
+ /// ```
+ ///
  /// # Note on locale
  ///
- /// In Turkish, the equivalent of 'i' in Latin has five forms instead of two:
+ /// In Turkish and Azeri, the equivalent of 'i' in Latin has five forms instead of two:
  ///
  /// * 'Dotless': I / ı, sometimes written ï
  /// * 'Dotted': İ / i
@@ -1114,7 +1346,7 @@ impl char {
  /// ```
  ///
  /// The value of `upper_i` here relies on the language of the text: if we're
- /// in `en-US`, it should be `"I"`, but if we're in `tr_TR`, it should
+ /// in `en-US`, it should be `"I"`, but if we're in `tr-TR` or `az-AZ`, it should
  /// be `"İ"`. `to_uppercase()` does not take this into account, and so:
  ///
  /// ```
@@ -1124,7 +1356,7 @@ impl char {
  /// ```
  ///
  /// holds across languages.
- #[must_use = "this returns the uppercase character as a new iterator, \
+ #[must_use = "this returns the uppercased character as a new iterator, \
  without modifying the original"]
  #[stable(feature = "rust1", since = "1.0.0")]
  #[inline]