Copyright | (c) The University of Glasgow 2001 |
---|---|
License | BSD-style (see the file libraries/base/LICENSE) |
Maintainer | libraries@haskell.org |
Stability | stable |
Portability | portable |
Safe Haskell | Trustworthy |
Language | Haskell2010 |
Data.Char
Contents
Description
The Char type and associated operations.
- data Char :: *
- isControl :: Char -> Bool
- isSpace :: Char -> Bool
- isLower :: Char -> Bool
- isUpper :: Char -> Bool
- isAlpha :: Char -> Bool
- isAlphaNum :: Char -> Bool
- isPrint :: Char -> Bool
- isDigit :: Char -> Bool
- isOctDigit :: Char -> Bool
- isHexDigit :: Char -> Bool
- isLetter :: Char -> Bool
- isMark :: Char -> Bool
- isNumber :: Char -> Bool
- isPunctuation :: Char -> Bool
- isSymbol :: Char -> Bool
- isSeparator :: Char -> Bool
- isAscii :: Char -> Bool
- isLatin1 :: Char -> Bool
- isAsciiUpper :: Char -> Bool
- isAsciiLower :: Char -> Bool
- data GeneralCategory
- = UppercaseLetter
- | LowercaseLetter
- | TitlecaseLetter
- | ModifierLetter
- | OtherLetter
- | NonSpacingMark
- | SpacingCombiningMark
- | EnclosingMark
- | DecimalNumber
- | LetterNumber
- | OtherNumber
- | ConnectorPunctuation
- | DashPunctuation
- | OpenPunctuation
- | ClosePunctuation
- | InitialQuote
- | FinalQuote
- | OtherPunctuation
- | MathSymbol
- | CurrencySymbol
- | ModifierSymbol
- | OtherSymbol
- | Space
- | LineSeparator
- | ParagraphSeparator
- | Control
- | Format
- | Surrogate
- | PrivateUse
- | NotAssigned
- generalCategory :: Char -> GeneralCategory
- toUpper :: Char -> Char
- toLower :: Char -> Char
- toTitle :: Char -> Char
- digitToInt :: Char -> Int
- intToDigit :: Int -> Char
- ord :: Char -> Int
- chr :: Int -> Char
- showLitChar :: Char -> ShowS
- lexLitChar :: ReadS String
- readLitChar :: ReadS Char
Documentation
The character type Char
is an enumeration whose values represent Unicode (or equivalently ISO/IEC 10646) characters (see http://www.unicode.org/ for details). This set extends the ISO 8859-1 (Latin-1) character set (the first 256 characters), which is itself an extension of the ASCII character set (the first 128 characters). A character literal in Haskell has type Char
.
To convert a Char
to or from the corresponding Int
value defined by Unicode, use toEnum
and fromEnum
from the Enum
class respectively (or equivalently ord
and chr
).
Instances
Bounded Char Source # | Since: 2.1 |
Enum Char Source # | Since: 2.1 |
Eq Char | |
Data Char Source # | Since: 4.0.0.0 |
Ord Char | |
Read Char Source # | Since: 2.1 |
Show Char Source # | Since: 2.1 |
Ix Char Source # | Since: 2.1 |
Storable Char Source # | Since: 2.1 |
IsChar Char Source # | Since: 2.1 |
PrintfArg Char Source # | Since: 2.1 |
Generic1 k (URec k Char) Source # | |
Functor (URec * Char) Source # | |
Foldable (URec * Char) Source # | |
Traversable (URec * Char) Source # | |
Eq (URec k Char p) # | |
Ord (URec k Char p) # | |
Show (URec k Char p) Source # | |
Generic (URec k Char p) Source # | |
data URec k Char Source # | Used for marking occurrences of Since: 4.9.0.0 |
type Rep1 k (URec k Char) Source # | |
type Rep (URec k Char p) Source # | |
Character classification
Unicode characters are divided into letters, numbers, marks, punctuation, symbols, separators (including spaces) and others (including control characters).
isControl :: Char -> Bool Source #
Selects control characters, which are the non-printing characters of the Latin-1 subset of Unicode.
isSpace :: Char -> Bool Source #
Returns True
for any Unicode space character, and the control characters \t
, \n
, \r
, \f
, \v
.
isUpper :: Char -> Bool Source #
Selects upper-case or title-case alphabetic Unicode characters (letters). Title case is used by a small number of letter ligatures like the single-character form of Lj.
isAlpha :: Char -> Bool Source #
Selects alphabetic Unicode characters (lower-case, upper-case and title-case letters, plus letters of caseless scripts and modifiers letters). This function is equivalent to isLetter
.
isAlphaNum :: Char -> Bool Source #
Selects alphabetic or numeric digit Unicode characters.
Note that numeric digits outside the ASCII range are selected by this function but not by isDigit
. Such digits may be part of identifiers but are not used by the printer and reader to represent numbers.
isPrint :: Char -> Bool Source #
Selects printable Unicode characters (letters, numbers, marks, punctuation, symbols and spaces).
isOctDigit :: Char -> Bool Source #
Selects ASCII octal digits, i.e. '0'
..'7'
.
isHexDigit :: Char -> Bool Source #
Selects ASCII hexadecimal digits, i.e. '0'
..'9'
, 'a'
..'f'
, 'A'
..'F'
.
isLetter :: Char -> Bool Source #
Selects alphabetic Unicode characters (lower-case, upper-case and title-case letters, plus letters of caseless scripts and modifiers letters). This function is equivalent to isAlpha
.
This function returns True
if its argument has one of the following GeneralCategory
s, or False
otherwise:
These classes are defined in the Unicode Character Database, part of the Unicode standard. The same document defines what is and is not a "Letter".
Examples
Basic usage:
>>>
isLetter 'a'
True>>>
isLetter 'A'
True>>>
isLetter '0'
False>>>
isLetter '%'
False>>>
isLetter '♥'
False>>>
isLetter '\31'
False
Ensure that isLetter
and isAlpha
are equivalent.
>>>
let chars = [(chr 0)..]
>>>
let letters = map isLetter chars
>>>
let alphas = map isAlpha chars
>>>
letters == alphas
True
isMark :: Char -> Bool Source #
Selects Unicode mark characters, for example accents and the like, which combine with preceding characters.
This function returns True
if its argument has one of the following GeneralCategory
s, or False
otherwise:
These classes are defined in the Unicode Character Database, part of the Unicode standard. The same document defines what is and is not a "Mark".
Examples
Basic usage:
>>>
isMark 'a'
False>>>
isMark '0'
False
Combining marks such as accent characters usually need to follow another character before they become printable:
>>>
map isMark "ò"
[False,True]
Puns are not necessarily supported:
>>>
isMark '✓'
False
isNumber :: Char -> Bool Source #
Selects Unicode numeric characters, including digits from various scripts, Roman numerals, et cetera.
This function returns True
if its argument has one of the following GeneralCategory
s, or False
otherwise:
These classes are defined in the Unicode Character Database, part of the Unicode standard. The same document defines what is and is not a "Number".
Examples
Basic usage:
>>>
isNumber 'a'
False>>>
isNumber '%'
False>>>
isNumber '3'
True
ASCII '0'
through '9'
are all numbers:
>>>
and $ map isNumber ['0'..'9']
True
Unicode Roman numerals are "numbers" as well:
>>>
isNumber 'Ⅸ'
True
isPunctuation :: Char -> Bool Source #
Selects Unicode punctuation characters, including various kinds of connectors, brackets and quotes.
This function returns True
if its argument has one of the following GeneralCategory
s, or False
otherwise:
ConnectorPunctuation
DashPunctuation
OpenPunctuation
ClosePunctuation
InitialQuote
FinalQuote
OtherPunctuation
These classes are defined in the Unicode Character Database, part of the Unicode standard. The same document defines what is and is not a "Punctuation".
Examples
Basic usage:
>>>
isPunctuation 'a'
False>>>
isPunctuation '7'
False>>>
isPunctuation '♥'
False>>>
isPunctuation '"'
True>>>
isPunctuation '?'
True>>>
isPunctuation '—'
True
isSymbol :: Char -> Bool Source #
Selects Unicode symbol characters, including mathematical and currency symbols.
This function returns True
if its argument has one of the following GeneralCategory
s, or False
otherwise:
These classes are defined in the Unicode Character Database, part of the Unicode standard. The same document defines what is and is not a "Symbol".
Examples
Basic usage:
>>>
isSymbol 'a'
False>>>
isSymbol '6'
False>>>
isSymbol '='
True
The definition of "math symbol" may be a little counter-intuitive depending on one's background:
>>>
isSymbol '+'
True>>>
isSymbol '-'
False
isSeparator :: Char -> Bool Source #
Selects Unicode space and separator characters.
This function returns True
if its argument has one of the following GeneralCategory
s, or False
otherwise:
These classes are defined in the Unicode Character Database, part of the Unicode standard. The same document defines what is and is not a "Separator".
Examples
Basic usage:
>>>
isSeparator 'a'
False>>>
isSeparator '6'
False>>>
isSeparator ' '
True
Warning: newlines and tab characters are not considered separators.
>>>
isSeparator '\n'
False>>>
isSeparator '\t'
False
But some more exotic characters are (like HTML's
):
>>>
isSeparator '\160'
True
Subranges
isAscii :: Char -> Bool Source #
Selects the first 128 characters of the Unicode character set, corresponding to the ASCII character set.
isLatin1 :: Char -> Bool Source #
Selects the first 256 characters of the Unicode character set, corresponding to the ISO 8859-1 (Latin-1) character set.
isAsciiUpper :: Char -> Bool Source #
isAsciiLower :: Char -> Bool Source #
Unicode general categories
data GeneralCategory Source #
Unicode General Categories (column 2 of the UnicodeData table) in the order they are listed in the Unicode standard (the Unicode Character Database, in particular).
Examples
Basic usage:
>>>
:t OtherLetter
OtherLetter :: GeneralCategory
Eq
instance:
>>>
UppercaseLetter == UppercaseLetter
True>>>
UppercaseLetter == LowercaseLetter
False
Ord
instance:
>>>
NonSpacingMark <= MathSymbol
True
Enum
instance:
>>>
enumFromTo ModifierLetter SpacingCombiningMark
[ModifierLetter,OtherLetter,NonSpacingMark,SpacingCombiningMark]
Read
instance:
>>>
read "DashPunctuation" :: GeneralCategory
DashPunctuation>>>
read "17" :: GeneralCategory
*** Exception: Prelude.read: no parse
Show
instance:
>>>
show EnclosingMark
"EnclosingMark"
Bounded
instance:
>>>
minBound :: GeneralCategory
UppercaseLetter>>>
maxBound :: GeneralCategory
NotAssigned
Ix
instance:
>>>
import Data.Ix ( index )
>>>
index (OtherLetter,Control) FinalQuote
12>>>
index (OtherLetter,Control) Format
*** Exception: Error in array index
Constructors
UppercaseLetter | Lu: Letter, Uppercase |
LowercaseLetter | Ll: Letter, Lowercase |
TitlecaseLetter | Lt: Letter, Titlecase |
ModifierLetter | Lm: Letter, Modifier |
OtherLetter | Lo: Letter, Other |
NonSpacingMark | Mn: Mark, Non-Spacing |
SpacingCombiningMark | Mc: Mark, Spacing Combining |
EnclosingMark | Me: Mark, Enclosing |
DecimalNumber | Nd: Number, Decimal |
LetterNumber | Nl: Number, Letter |
OtherNumber | No: Number, Other |
ConnectorPunctuation | Pc: Punctuation, Connector |
DashPunctuation | Pd: Punctuation, Dash |
OpenPunctuation | Ps: Punctuation, Open |
ClosePunctuation | Pe: Punctuation, Close |
InitialQuote | Pi: Punctuation, Initial quote |
FinalQuote | Pf: Punctuation, Final quote |
OtherPunctuation | Po: Punctuation, Other |
MathSymbol | Sm: Symbol, Math |
CurrencySymbol | Sc: Symbol, Currency |
ModifierSymbol | Sk: Symbol, Modifier |
OtherSymbol | So: Symbol, Other |
Space | Zs: Separator, Space |
LineSeparator | Zl: Separator, Line |
ParagraphSeparator | Zp: Separator, Paragraph |
Control | Cc: Other, Control |
Format | Cf: Other, Format |
Surrogate | Cs: Other, Surrogate |
PrivateUse | Co: Other, Private Use |
NotAssigned | Cn: Other, Not Assigned |
generalCategory :: Char -> GeneralCategory Source #
The Unicode general category of the character. This relies on the Enum
instance of GeneralCategory
, which must remain in the same order as the categories are presented in the Unicode standard.
Examples
Basic usage:
>>>
generalCategory 'a'
LowercaseLetter>>>
generalCategory 'A'
UppercaseLetter>>>
generalCategory '0'
DecimalNumber>>>
generalCategory '%'
OtherPunctuation>>>
generalCategory '♥'
OtherSymbol>>>
generalCategory '\31'
Control>>>
generalCategory ' '
Space
Case conversion
toUpper :: Char -> Char Source #
Convert a letter to the corresponding upper-case letter, if any. Any other character is returned unchanged.
toLower :: Char -> Char Source #
Convert a letter to the corresponding lower-case letter, if any. Any other character is returned unchanged.
toTitle :: Char -> Char Source #
Convert a letter to the corresponding title-case or upper-case letter, if any. (Title case differs from upper case only for a small number of ligature letters.) Any other character is returned unchanged.
Single digit characters
digitToInt :: Char -> Int Source #
Convert a single digit Char
to the corresponding Int
. This function fails unless its argument satisfies isHexDigit
, but recognises both upper- and lower-case hexadecimal digits (that is, '0'
..'9'
, 'a'
..'f'
, 'A'
..'F'
).
Examples
Characters '0'
through '9'
are converted properly to 0..9
:
>>>
map digitToInt ['0'..'9']
[0,1,2,3,4,5,6,7,8,9]
Both upper- and lower-case 'A'
through 'F'
are converted as well, to 10..15
.
>>>
map digitToInt ['a'..'f']
[10,11,12,13,14,15]>>>
map digitToInt ['A'..'F']
[10,11,12,13,14,15]
Anything else throws an exception:
>>>
digitToInt 'G'
*** Exception: Char.digitToInt: not a digit 'G'>>>
digitToInt '♥'
*** Exception: Char.digitToInt: not a digit '\9829'
intToDigit :: Int -> Char Source #
Numeric representations
String representations
showLitChar :: Char -> ShowS Source #
Convert a character to a string using only printable characters, using Haskell source-language escape conventions. For example:
showLitChar '\n' s = "\\n" ++ s
lexLitChar :: ReadS String Source #
Read a string representation of a character, using Haskell source-language escape conventions. For example:
lexLitChar "\\nHello" = [("\\n", "Hello")]
readLitChar :: ReadS Char Source #
Read a string representation of a character, using Haskell source-language escape conventions, and convert it to the character that it encodes. For example:
readLitChar "\\nHello" = [('\n', "Hello")]