Copyright	(c) 2010 Bryan O'Sullivan
License	BSD-style
Maintainer	bos@serpentine.com
Stability	experimental
Portability	GHC
Safe Haskell	None
Language	Haskell98

Data.Text.ICU.Collate

Contents

Unicode collation API
Functions
- Utility functions

Description

String collation functions for Unicode, implemented as bindings to the International Components for Unicode (ICU) libraries.

Synopsis

data MCollator
data Attribute
- = French Bool
- | AlternateHandling AlternateHandling
- | CaseFirst (Maybe CaseFirst)
- | CaseLevel Bool
- | NormalizationMode Bool
- | Strength Strength
- | HiraganaQuaternaryMode Bool
- | Numeric Bool
data AlternateHandling
- = NonIgnorable
- | Shifted
data CaseFirst
- = UpperFirst
- | LowerFirst
data Strength
- = Primary
- | Secondary
- | Tertiary
- | Quaternary
- | Identical
open :: LocaleName -> IO MCollator
collate :: MCollator -> Text -> Text -> IO Ordering
collateIter :: MCollator -> CharIterator -> CharIterator -> IO Ordering
getAttribute :: MCollator -> Attribute -> IO Attribute
setAttribute :: MCollator -> Attribute -> IO ()
sortKey :: MCollator -> Text -> IO ByteString
clone :: MCollator -> IO MCollator
freeze :: MCollator -> IO Collator

Unicode collation API

data MCollator Source #

String collator type.

data Attribute Source #

Constructors

French Bool	Direction of secondary weights, used in French. `True`, results in secondary weights being considered backwards, while `False` treats secondary weights in the order in which they appear.
AlternateHandling AlternateHandling	For handling variable elements. `NonIgnorable` is default.
CaseFirst (Maybe CaseFirst)	Control the ordering of upper and lower case letters. `Nothing` (the default) orders upper and lower case letters in accordance to their tertiary weights.
CaseLevel Bool	Controls whether an extra case level (positioned before the third level) is generated or not. When `False` (default), case level is not generated; when `True`, the case level is generated. Contents of the case level are affected by the value of the `CaseFirst` attribute. A simple way to ignore accent differences in a string is to set the strength to `Primary` and enable case level.
NormalizationMode Bool	Controls whether the normalization check and necessary normalizations are performed. When `False` (default) no normalization check is performed. The correctness of the result is guaranteed only if the input data is in so-called `FCD` form (see users manual for more info). When `True`, an incremental check is performed to see whether the input data is in `FCD` form. If the data is not in `FCD` form, incremental `NFD` normalization is performed.
Strength Strength
HiraganaQuaternaryMode Bool	When turned on, this attribute positions Hiragana before all non-ignorables on quaternary level. This is a sneaky way to produce JIS sort order.
Numeric Bool	When enabled, this attribute generates a collation key for the numeric value of substrings of digits. This is a way to get '100' to sort after '2'.

Instances

Instances details

Eq Attribute Source #
Instance details Defined in Data.Text.ICU.Collate Methods (==) :: Attribute -> Attribute -> Bool # (/=) :: Attribute -> Attribute -> Bool #
Show Attribute Source #
Instance details Defined in Data.Text.ICU.Collate Methods showsPrec :: Int -> Attribute -> ShowS # show :: Attribute -> String # showList :: [Attribute] -> ShowS #
NFData Attribute Source #
Instance details Defined in Data.Text.ICU.Collate Methods rnf :: Attribute -> () #

data AlternateHandling Source #

Control the handling of variable weight elements.

Constructors

NonIgnorable	Treat all codepoints with non-ignorable primary weights in the same way.
Shifted	Cause codepoints with primary weights that are equal to or below the variable top value to be ignored on primary level and moved to the quaternary level.

Instances

Instances details

Bounded AlternateHandling Source #
Instance details Defined in Data.Text.ICU.Collate Methods minBound :: AlternateHandling # maxBound :: AlternateHandling #
Enum AlternateHandling Source #
Instance details Defined in Data.Text.ICU.Collate Methods succ :: AlternateHandling -> AlternateHandling # pred :: AlternateHandling -> AlternateHandling # toEnum :: Int -> AlternateHandling # fromEnum :: AlternateHandling -> Int # enumFrom :: AlternateHandling -> [AlternateHandling] # enumFromThen :: AlternateHandling -> AlternateHandling -> [AlternateHandling] # enumFromTo :: AlternateHandling -> AlternateHandling -> [AlternateHandling] # enumFromThenTo :: AlternateHandling -> AlternateHandling -> AlternateHandling -> [AlternateHandling] #
Eq AlternateHandling Source #
Instance details Defined in Data.Text.ICU.Collate Methods (==) :: AlternateHandling -> AlternateHandling -> Bool # (/=) :: AlternateHandling -> AlternateHandling -> Bool #
Show AlternateHandling Source #
Instance details Defined in Data.Text.ICU.Collate Methods showsPrec :: Int -> AlternateHandling -> ShowS # show :: AlternateHandling -> String # showList :: [AlternateHandling] -> ShowS #
NFData AlternateHandling Source #
Instance details Defined in Data.Text.ICU.Collate Methods rnf :: AlternateHandling -> () #

data CaseFirst Source #

Control the ordering of upper and lower case letters.

Constructors

UpperFirst	Force upper case letters to sort before lower case.
LowerFirst	Force lower case letters to sort before upper case.

Instances

Instances details

Bounded CaseFirst Source #
Instance details Defined in Data.Text.ICU.Collate Methods minBound :: CaseFirst # maxBound :: CaseFirst #
Enum CaseFirst Source #
Instance details Defined in Data.Text.ICU.Collate Methods succ :: CaseFirst -> CaseFirst # pred :: CaseFirst -> CaseFirst # toEnum :: Int -> CaseFirst # fromEnum :: CaseFirst -> Int # enumFrom :: CaseFirst -> [CaseFirst] # enumFromThen :: CaseFirst -> CaseFirst -> [CaseFirst] # enumFromTo :: CaseFirst -> CaseFirst -> [CaseFirst] # enumFromThenTo :: CaseFirst -> CaseFirst -> CaseFirst -> [CaseFirst] #
Eq CaseFirst Source #
Instance details Defined in Data.Text.ICU.Collate Methods (==) :: CaseFirst -> CaseFirst -> Bool # (/=) :: CaseFirst -> CaseFirst -> Bool #
Show CaseFirst Source #
Instance details Defined in Data.Text.ICU.Collate Methods showsPrec :: Int -> CaseFirst -> ShowS # show :: CaseFirst -> String # showList :: [CaseFirst] -> ShowS #
NFData CaseFirst Source #
Instance details Defined in Data.Text.ICU.Collate Methods rnf :: CaseFirst -> () #

data Strength Source #

The strength attribute. The usual strength for most locales (except Japanese) is tertiary. Quaternary strength is useful when combined with shifted setting for alternate handling attribute and for JIS x 4061 collation, when it is used to distinguish between Katakana and Hiragana (this is achieved by setting HiraganaQuaternaryMode mode to True). Otherwise, quaternary level is affected only by the number of non ignorable code points in the string. Identical strength is rarely useful, as it amounts to codepoints of the NFD form of the string.

Constructors

Primary
Secondary
Tertiary
Quaternary
Identical

Instances

Instances details

Bounded Strength Source #
Instance details Defined in Data.Text.ICU.Collate Methods minBound :: Strength # maxBound :: Strength #
Enum Strength Source #
Instance details Defined in Data.Text.ICU.Collate Methods succ :: Strength -> Strength # pred :: Strength -> Strength # toEnum :: Int -> Strength # fromEnum :: Strength -> Int # enumFrom :: Strength -> [Strength] # enumFromThen :: Strength -> Strength -> [Strength] # enumFromTo :: Strength -> Strength -> [Strength] # enumFromThenTo :: Strength -> Strength -> Strength -> [Strength] #
Eq Strength Source #
Instance details Defined in Data.Text.ICU.Collate Methods (==) :: Strength -> Strength -> Bool # (/=) :: Strength -> Strength -> Bool #
Show Strength Source #
Instance details Defined in Data.Text.ICU.Collate Methods showsPrec :: Int -> Strength -> ShowS # show :: Strength -> String # showList :: [Strength] -> ShowS #
NFData Strength Source #
Instance details Defined in Data.Text.ICU.Collate Methods rnf :: Strength -> () #

Functions

open Source #

Arguments

:: LocaleName	The locale containing the required collation rules.
-> IO MCollator

Open a Collator for comparing strings.

collate :: MCollator -> Text -> Text -> IO Ordering Source #

Compare two strings.

collateIter :: MCollator -> CharIterator -> CharIterator -> IO Ordering Source #

Compare two CharIterators.

If either iterator was constructed from a ByteString, it does not need to be copied or converted internally, so this function can be quite cheap.

Utility functions

getAttribute :: MCollator -> Attribute -> IO Attribute Source #

Get the value of an MCollator attribute.

It is safe to provide a dummy argument to an Attribute constructor when using this function, so the following will work:

getAttribute mcol (NormalizationMode undefined)

setAttribute :: MCollator -> Attribute -> IO () Source #

Set the value of an MCollator attribute.

sortKey :: MCollator -> Text -> IO ByteString Source #

Create a key for sorting the Text using the given Collator. The result of comparing two ByteStrings that have been transformed with sortKey will be the same as the result of collate on the two untransformed Texts.

clone :: MCollator -> IO MCollator Source #

Make a copy of a mutable MCollator. Subsequent changes to the input MCollator will not affect the state of the returned MCollator.

freeze :: MCollator -> IO Collator Source #

Make a safe copy of a mutable MCollator for use in pure code. Subsequent changes to the MCollator will not affect the state of the returned Collator.