{-# LANGUAGE StrictData #-}
{-# LANGUAGE FlexibleInstances #-}
{-# LANGUAGE OverloadedStrings #-}
module Citeproc.CaseTransform
( CaseTransformState(..)
, CaseTransformer(..)
, withUppercaseAll
, withLowercaseAll
, withCapitalizeWords
, withCapitalizeFirst
, withSentenceCase
, withTitleCase
)
where
import Data.Ord ()
import Data.Char (isUpper, isLower, isAscii)
import Data.Text (Text)
import qualified Data.Text as T
import Citeproc.Types (Lang(..))
import qualified Citeproc.Unicode as Unicode
newtype CaseTransformer =
CaseTransformer
{ CaseTransformer -> Maybe Lang -> CaseTransformState -> Text -> Text
unCaseTransformer :: Maybe Lang -> CaseTransformState -> Text -> Text }
data CaseTransformState =
Start
| StartSentence
| AfterWordEnd
| AfterWordChar
| AfterSentenceEndingPunctuation
| BeforeLastWord
deriving (Int -> CaseTransformState -> ShowS
[CaseTransformState] -> ShowS
CaseTransformState -> String
(Int -> CaseTransformState -> ShowS)
-> (CaseTransformState -> String)
-> ([CaseTransformState] -> ShowS)
-> Show CaseTransformState
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [CaseTransformState] -> ShowS
$cshowList :: [CaseTransformState] -> ShowS
show :: CaseTransformState -> String
$cshow :: CaseTransformState -> String
showsPrec :: Int -> CaseTransformState -> ShowS
$cshowsPrec :: Int -> CaseTransformState -> ShowS
Show, CaseTransformState -> CaseTransformState -> Bool
(CaseTransformState -> CaseTransformState -> Bool)
-> (CaseTransformState -> CaseTransformState -> Bool)
-> Eq CaseTransformState
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: CaseTransformState -> CaseTransformState -> Bool
$c/= :: CaseTransformState -> CaseTransformState -> Bool
== :: CaseTransformState -> CaseTransformState -> Bool
$c== :: CaseTransformState -> CaseTransformState -> Bool
Eq)
withUppercaseAll :: CaseTransformer
withUppercaseAll :: CaseTransformer
withUppercaseAll =
(Maybe Lang -> CaseTransformState -> Text -> Text)
-> CaseTransformer
CaseTransformer (\Maybe Lang
mblang CaseTransformState
_ -> Maybe Lang -> Text -> Text
Unicode.toUpper Maybe Lang
mblang)
withLowercaseAll :: CaseTransformer
withLowercaseAll :: CaseTransformer
withLowercaseAll =
(Maybe Lang -> CaseTransformState -> Text -> Text)
-> CaseTransformer
CaseTransformer (\Maybe Lang
mblang CaseTransformState
_ -> Maybe Lang -> Text -> Text
Unicode.toLower Maybe Lang
mblang)
withCapitalizeWords :: CaseTransformer
withCapitalizeWords :: CaseTransformer
withCapitalizeWords = (Maybe Lang -> CaseTransformState -> Text -> Text)
-> CaseTransformer
CaseTransformer Maybe Lang -> CaseTransformState -> Text -> Text
go
where
go :: Maybe Lang -> CaseTransformState -> Text -> Text
go Maybe Lang
mblang CaseTransformState
st Text
chunk
| Text -> Bool
isMixedCase Text
chunk = Text
chunk
| CaseTransformState
st CaseTransformState -> CaseTransformState -> Bool
forall a. Eq a => a -> a -> Bool
== CaseTransformState
Start Bool -> Bool -> Bool
|| CaseTransformState
st CaseTransformState -> CaseTransformState -> Bool
forall a. Eq a => a -> a -> Bool
== CaseTransformState
StartSentence Bool -> Bool -> Bool
|| CaseTransformState
st CaseTransformState -> CaseTransformState -> Bool
forall a. Eq a => a -> a -> Bool
== CaseTransformState
AfterWordEnd Bool -> Bool -> Bool
||
CaseTransformState
st CaseTransformState -> CaseTransformState -> Bool
forall a. Eq a => a -> a -> Bool
== CaseTransformState
BeforeLastWord
= if (Char -> Bool) -> Text -> Bool
T.all Char -> Bool
isLower Text
chunk
then Maybe Lang -> Text -> Text
capitalizeText Maybe Lang
mblang Text
chunk
else Text
chunk
| Bool
otherwise = Text
chunk
withCapitalizeFirst :: CaseTransformer
withCapitalizeFirst :: CaseTransformer
withCapitalizeFirst = (Maybe Lang -> CaseTransformState -> Text -> Text)
-> CaseTransformer
CaseTransformer Maybe Lang -> CaseTransformState -> Text -> Text
go
where
go :: Maybe Lang -> CaseTransformState -> Text -> Text
go Maybe Lang
mblang CaseTransformState
st Text
chunk
| Text -> Bool
isMixedCase Text
chunk = Text
chunk
| CaseTransformState
st CaseTransformState -> CaseTransformState -> Bool
forall a. Eq a => a -> a -> Bool
== CaseTransformState
Start
= if (Char -> Bool) -> Text -> Bool
T.all Char -> Bool
isLower Text
chunk
then Maybe Lang -> Text -> Text
capitalizeText Maybe Lang
mblang Text
chunk
else Text
chunk
| Bool
otherwise = Text
chunk
withSentenceCase :: CaseTransformer
withSentenceCase :: CaseTransformer
withSentenceCase = (Maybe Lang -> CaseTransformState -> Text -> Text)
-> CaseTransformer
CaseTransformer Maybe Lang -> CaseTransformState -> Text -> Text
go
where
go :: Maybe Lang -> CaseTransformState -> Text -> Text
go Maybe Lang
mblang CaseTransformState
st Text
chunk
| Text -> Bool
isCapitalized Text
chunk
, Bool -> Bool
not (CaseTransformState
st CaseTransformState -> CaseTransformState -> Bool
forall a. Eq a => a -> a -> Bool
== CaseTransformState
Start Bool -> Bool -> Bool
|| CaseTransformState
st CaseTransformState -> CaseTransformState -> Bool
forall a. Eq a => a -> a -> Bool
== CaseTransformState
StartSentence)
= Maybe Lang -> Text -> Text
Unicode.toLower Maybe Lang
mblang Text
chunk
| Text -> Bool
isCapitalized Text
chunk Bool -> Bool -> Bool
|| (Char -> Bool) -> Text -> Bool
T.all Char -> Bool
isLower Text
chunk
, CaseTransformState
st CaseTransformState -> CaseTransformState -> Bool
forall a. Eq a => a -> a -> Bool
== CaseTransformState
Start Bool -> Bool -> Bool
|| CaseTransformState
st CaseTransformState -> CaseTransformState -> Bool
forall a. Eq a => a -> a -> Bool
== CaseTransformState
StartSentence
= Maybe Lang -> Text -> Text
capitalizeText Maybe Lang
mblang (Text -> Text) -> Text -> Text
forall a b. (a -> b) -> a -> b
$ Maybe Lang -> Text -> Text
Unicode.toLower Maybe Lang
mblang Text
chunk
| Bool
otherwise = Text
chunk
withTitleCase :: CaseTransformer
withTitleCase :: CaseTransformer
withTitleCase = (Maybe Lang -> CaseTransformState -> Text -> Text)
-> CaseTransformer
CaseTransformer Maybe Lang -> CaseTransformState -> Text -> Text
go
where
go :: Maybe Lang -> CaseTransformState -> Text -> Text
go Maybe Lang
mblang CaseTransformState
st Text
chunk
| Text -> Bool
isMixedCase Text
chunk = Text
chunk
| (Char -> Bool) -> Text -> Bool
T.all Char -> Bool
isUpper Text
chunk = Text
chunk
| (Char -> Bool) -> Text -> Bool
T.any (Bool -> Bool
not (Bool -> Bool) -> (Char -> Bool) -> Char -> Bool
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Char -> Bool
isAscii) Text
chunk = Text
chunk
| CaseTransformState
st CaseTransformState -> CaseTransformState -> Bool
forall a. Eq a => a -> a -> Bool
== CaseTransformState
StartSentence Bool -> Bool -> Bool
|| CaseTransformState
st CaseTransformState -> CaseTransformState -> Bool
forall a. Eq a => a -> a -> Bool
== CaseTransformState
Start =
Maybe Lang -> Text -> Text
capitalizeText Maybe Lang
mblang (Text -> Text) -> Text -> Text
forall a b. (a -> b) -> a -> b
$ Maybe Lang -> Text -> Text
Unicode.toLower Maybe Lang
mblang Text
chunk
| CaseTransformState
st CaseTransformState -> CaseTransformState -> Bool
forall a. Eq a => a -> a -> Bool
== CaseTransformState
AfterWordEnd
, Bool -> Bool
not (Text -> Bool
isStopWord Text
chunk)
, Text -> Int -> Ordering
T.compareLength Text
chunk Int
1 Ordering -> Ordering -> Bool
forall a. Eq a => a -> a -> Bool
== Ordering
GT =
Maybe Lang -> Text -> Text
capitalizeText Maybe Lang
mblang (Text -> Text) -> Text -> Text
forall a b. (a -> b) -> a -> b
$ Maybe Lang -> Text -> Text
Unicode.toLower Maybe Lang
mblang Text
chunk
| CaseTransformState
st CaseTransformState -> CaseTransformState -> Bool
forall a. Eq a => a -> a -> Bool
== CaseTransformState
BeforeLastWord
, Text -> Int -> Ordering
T.compareLength Text
chunk Int
1 Ordering -> Ordering -> Bool
forall a. Eq a => a -> a -> Bool
== Ordering
GT =
Maybe Lang -> Text -> Text
capitalizeText Maybe Lang
mblang (Text -> Text) -> Text -> Text
forall a b. (a -> b) -> a -> b
$ Maybe Lang -> Text -> Text
Unicode.toLower Maybe Lang
mblang Text
chunk
| Bool
otherwise = Text
chunk
isCapitalized :: Text -> Bool
isCapitalized :: Text -> Bool
isCapitalized Text
t =
case Text -> Maybe (Char, Text)
T.uncons Text
t of
Just (Char
c, Text
t') -> Char -> Bool
isUpper Char
c Bool -> Bool -> Bool
&& (Char -> Bool) -> Text -> Bool
T.all Char -> Bool
isLower Text
t'
Maybe (Char, Text)
_ -> Bool
False
isMixedCase :: Text -> Bool
isMixedCase :: Text -> Bool
isMixedCase Text
t = (Char -> Bool) -> Text -> Bool
T.any Char -> Bool
isUpper Text
t Bool -> Bool -> Bool
&& (Char -> Bool) -> Text -> Bool
T.any Char -> Bool
isLower Text
t
capitalizeText :: Maybe Lang -> Text -> Text
capitalizeText :: Maybe Lang -> Text -> Text
capitalizeText Maybe Lang
mblang Text
x =
case Text -> Maybe (Char, Text)
T.uncons Text
x of
Just (Char
c,Text
x') -> Maybe Lang -> Text -> Text
Unicode.toUpper Maybe Lang
mblang (Char -> Text
T.singleton Char
c) Text -> Text -> Text
forall a. Semigroup a => a -> a -> a
<> Text
x'
Maybe (Char, Text)
Nothing -> Text
x
isStopWord :: Text -> Bool
isStopWord :: Text -> Bool
isStopWord Text
"a" = Bool
True
isStopWord Text
"an" = Bool
True
isStopWord Text
"and" = Bool
True
isStopWord Text
"as" = Bool
True
isStopWord Text
"at" = Bool
True
isStopWord Text
"but" = Bool
True
isStopWord Text
"by" = Bool
True
isStopWord Text
"down" = Bool
True
isStopWord Text
"for" = Bool
True
isStopWord Text
"from" = Bool
True
isStopWord Text
"in" = Bool
True
isStopWord Text
"into" = Bool
True
isStopWord Text
"nor" = Bool
True
isStopWord Text
"of" = Bool
True
isStopWord Text
"on" = Bool
True
isStopWord Text
"onto" = Bool
True
isStopWord Text
"or" = Bool
True
isStopWord Text
"over" = Bool
True
isStopWord Text
"so" = Bool
True
isStopWord Text
"the" = Bool
True
isStopWord Text
"till" = Bool
True
isStopWord Text
"to" = Bool
True
isStopWord Text
"up" = Bool
True
isStopWord Text
"via" = Bool
True
isStopWord Text
"with" = Bool
True
isStopWord Text
"yet" = Bool
True
isStopWord Text
"about" = Bool
True
isStopWord Text
"van" = Bool
True
isStopWord Text
"von" = Bool
True
isStopWord Text
"de" = Bool
True
isStopWord Text
"d" = Bool
True
isStopWord Text
"l" = Bool
True
isStopWord Text
_ = Bool
False