{-# LANGUAGE StrictData #-}
{-# LANGUAGE FlexibleInstances #-}
{-# LANGUAGE OverloadedStrings #-}
-- | Provides functions that facilitate defining textcase transformations.
-- To see how these can be used used, see the definitions of @addTextCase@
-- in "Citeproc.Pandoc" and "Citproc.CslJson".
module Citeproc.CaseTransform
  ( CaseTransformState(..)
  , CaseTransformer(..)
  , withUppercaseAll
  , withLowercaseAll
  , withCapitalizeWords
  , withCapitalizeFirst
  , withSentenceCase
  , withTitleCase
  )
where

import Data.Ord ()
import Data.Char (isUpper, isLower, isLetter)
import Data.Text (Text)
import qualified Data.Text as T
import Citeproc.Types (Lang(..))
import qualified Citeproc.Unicode as Unicode

-- | Wraps a function used to define textcase transformations.
newtype CaseTransformer =
  CaseTransformer
  { CaseTransformer -> Maybe Lang -> CaseTransformState -> Text -> Text
unCaseTransformer :: Maybe Lang -> CaseTransformState -> Text -> Text }

-- | Tracks context in textcase transformations.
data CaseTransformState =
      Start
    | StartSentence
    | AfterWordEnd
    | AfterWordChar
    | AfterSentenceEndingPunctuation
    | BeforeLastWord
    deriving (Int -> CaseTransformState -> ShowS
[CaseTransformState] -> ShowS
CaseTransformState -> String
(Int -> CaseTransformState -> ShowS)
-> (CaseTransformState -> String)
-> ([CaseTransformState] -> ShowS)
-> Show CaseTransformState
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
$cshowsPrec :: Int -> CaseTransformState -> ShowS
showsPrec :: Int -> CaseTransformState -> ShowS
$cshow :: CaseTransformState -> String
show :: CaseTransformState -> String
$cshowList :: [CaseTransformState] -> ShowS
showList :: [CaseTransformState] -> ShowS
Show, CaseTransformState -> CaseTransformState -> Bool
(CaseTransformState -> CaseTransformState -> Bool)
-> (CaseTransformState -> CaseTransformState -> Bool)
-> Eq CaseTransformState
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
$c== :: CaseTransformState -> CaseTransformState -> Bool
== :: CaseTransformState -> CaseTransformState -> Bool
$c/= :: CaseTransformState -> CaseTransformState -> Bool
/= :: CaseTransformState -> CaseTransformState -> Bool
Eq)

-- | Uppercase everything.
withUppercaseAll :: CaseTransformer
withUppercaseAll :: CaseTransformer
withUppercaseAll =
  (Maybe Lang -> CaseTransformState -> Text -> Text)
-> CaseTransformer
CaseTransformer (\Maybe Lang
mblang CaseTransformState
_ -> Maybe Lang -> Text -> Text
Unicode.toUpper Maybe Lang
mblang)

-- | Lowercase everything.
withLowercaseAll :: CaseTransformer
withLowercaseAll :: CaseTransformer
withLowercaseAll =
  (Maybe Lang -> CaseTransformState -> Text -> Text)
-> CaseTransformer
CaseTransformer (\Maybe Lang
mblang CaseTransformState
_ -> Maybe Lang -> Text -> Text
Unicode.toLower Maybe Lang
mblang)

-- | Capitalize all words.
withCapitalizeWords :: CaseTransformer
withCapitalizeWords :: CaseTransformer
withCapitalizeWords = (Maybe Lang -> CaseTransformState -> Text -> Text)
-> CaseTransformer
CaseTransformer Maybe Lang -> CaseTransformState -> Text -> Text
go
 where
  go :: Maybe Lang -> CaseTransformState -> Text -> Text
go Maybe Lang
mblang CaseTransformState
st Text
chunk
     | Text -> Bool
isMixedCase Text
chunk = Text
chunk
     | CaseTransformState
st CaseTransformState -> CaseTransformState -> Bool
forall a. Eq a => a -> a -> Bool
== CaseTransformState
Start Bool -> Bool -> Bool
|| CaseTransformState
st CaseTransformState -> CaseTransformState -> Bool
forall a. Eq a => a -> a -> Bool
== CaseTransformState
StartSentence Bool -> Bool -> Bool
|| CaseTransformState
st CaseTransformState -> CaseTransformState -> Bool
forall a. Eq a => a -> a -> Bool
== CaseTransformState
AfterWordEnd Bool -> Bool -> Bool
||
       CaseTransformState
st CaseTransformState -> CaseTransformState -> Bool
forall a. Eq a => a -> a -> Bool
== CaseTransformState
BeforeLastWord
       = if (Char -> Bool) -> Text -> Bool
T.all Char -> Bool
isLower Text
chunk
            then Maybe Lang -> Text -> Text
capitalizeText Maybe Lang
mblang Text
chunk
            else Text
chunk
     | Bool
otherwise = Text
chunk

-- | Capitalize first letter.
withCapitalizeFirst :: CaseTransformer
withCapitalizeFirst :: CaseTransformer
withCapitalizeFirst = (Maybe Lang -> CaseTransformState -> Text -> Text)
-> CaseTransformer
CaseTransformer Maybe Lang -> CaseTransformState -> Text -> Text
go
 where
  go :: Maybe Lang -> CaseTransformState -> Text -> Text
go Maybe Lang
mblang CaseTransformState
st Text
chunk
     | Text -> Bool
isMixedCase Text
chunk = Text
chunk
     | CaseTransformState
st CaseTransformState -> CaseTransformState -> Bool
forall a. Eq a => a -> a -> Bool
== CaseTransformState
Start
       = if (Char -> Bool) -> Text -> Bool
T.all Char -> Bool
isLower Text
chunk
            then Maybe Lang -> Text -> Text
capitalizeText Maybe Lang
mblang Text
chunk
            else Text
chunk
     | Bool
otherwise = Text
chunk

-- | Capitalize first letter of each sentence.
withSentenceCase :: CaseTransformer
withSentenceCase :: CaseTransformer
withSentenceCase = (Maybe Lang -> CaseTransformState -> Text -> Text)
-> CaseTransformer
CaseTransformer Maybe Lang -> CaseTransformState -> Text -> Text
go
 where
  go :: Maybe Lang -> CaseTransformState -> Text -> Text
go Maybe Lang
mblang CaseTransformState
st Text
chunk
     | Text -> Bool
isCapitalized Text
chunk
     , Bool -> Bool
not (CaseTransformState
st CaseTransformState -> CaseTransformState -> Bool
forall a. Eq a => a -> a -> Bool
== CaseTransformState
Start Bool -> Bool -> Bool
|| CaseTransformState
st CaseTransformState -> CaseTransformState -> Bool
forall a. Eq a => a -> a -> Bool
== CaseTransformState
StartSentence)
       = Maybe Lang -> Text -> Text
Unicode.toLower Maybe Lang
mblang Text
chunk
     | Text -> Bool
isCapitalized Text
chunk Bool -> Bool -> Bool
|| (Char -> Bool) -> Text -> Bool
T.all Char -> Bool
isLower Text
chunk
     , CaseTransformState
st CaseTransformState -> CaseTransformState -> Bool
forall a. Eq a => a -> a -> Bool
== CaseTransformState
Start Bool -> Bool -> Bool
|| CaseTransformState
st CaseTransformState -> CaseTransformState -> Bool
forall a. Eq a => a -> a -> Bool
== CaseTransformState
StartSentence
       = Maybe Lang -> Text -> Text
capitalizeText Maybe Lang
mblang (Text -> Text) -> Text -> Text
forall a b. (a -> b) -> a -> b
$ Maybe Lang -> Text -> Text
Unicode.toLower Maybe Lang
mblang Text
chunk
     | Bool
otherwise = Text
chunk

-- | Use title case.
withTitleCase :: CaseTransformer
withTitleCase :: CaseTransformer
withTitleCase = (Maybe Lang -> CaseTransformState -> Text -> Text)
-> CaseTransformer
CaseTransformer Maybe Lang -> CaseTransformState -> Text -> Text
go
 where
  go :: Maybe Lang -> CaseTransformState -> Text -> Text
go Maybe Lang
mblang CaseTransformState
st Text
chunk
     | Text -> Bool
isMixedCase Text
chunk = Text
chunk
     | (Char -> Bool) -> Text -> Bool
T.all (\Char
c -> Char -> Bool
isUpper Char
c Bool -> Bool -> Bool
|| Bool -> Bool
not (Char -> Bool
isLetter Char
c)) Text
chunk = Text
chunk
        -- spec doesn't say this but tests do: textcase_TitleCapitalization.txt
     | CaseTransformState
st CaseTransformState -> CaseTransformState -> Bool
forall a. Eq a => a -> a -> Bool
== CaseTransformState
StartSentence Bool -> Bool -> Bool
|| CaseTransformState
st CaseTransformState -> CaseTransformState -> Bool
forall a. Eq a => a -> a -> Bool
== CaseTransformState
Start =
       Maybe Lang -> Text -> Text
capitalizeText Maybe Lang
mblang (Text -> Text) -> Text -> Text
forall a b. (a -> b) -> a -> b
$ Maybe Lang -> Text -> Text
Unicode.toLower Maybe Lang
mblang Text
chunk
     | CaseTransformState
st CaseTransformState -> CaseTransformState -> Bool
forall a. Eq a => a -> a -> Bool
== CaseTransformState
AfterWordEnd
     , Bool -> Bool
not (Text -> Bool
isStopWord Text
chunk)
     , Text -> Int -> Ordering
T.compareLength Text
chunk Int
1 Ordering -> Ordering -> Bool
forall a. Eq a => a -> a -> Bool
== Ordering
GT =
         Maybe Lang -> Text -> Text
capitalizeText Maybe Lang
mblang (Text -> Text) -> Text -> Text
forall a b. (a -> b) -> a -> b
$ Maybe Lang -> Text -> Text
Unicode.toLower Maybe Lang
mblang Text
chunk
     | CaseTransformState
st CaseTransformState -> CaseTransformState -> Bool
forall a. Eq a => a -> a -> Bool
== CaseTransformState
BeforeLastWord
     , Text -> Int -> Ordering
T.compareLength Text
chunk Int
1 Ordering -> Ordering -> Bool
forall a. Eq a => a -> a -> Bool
== Ordering
GT =
         Maybe Lang -> Text -> Text
capitalizeText Maybe Lang
mblang (Text -> Text) -> Text -> Text
forall a b. (a -> b) -> a -> b
$ Maybe Lang -> Text -> Text
Unicode.toLower Maybe Lang
mblang Text
chunk
     | Bool
otherwise = Text
chunk

isCapitalized :: Text -> Bool
isCapitalized :: Text -> Bool
isCapitalized Text
t =
  case Text -> Maybe (Char, Text)
T.uncons Text
t of
    Just (Char
c, Text
t') -> Char -> Bool
isUpper Char
c Bool -> Bool -> Bool
&& (Char -> Bool) -> Text -> Bool
T.all Char -> Bool
isLower Text
t'
    Maybe (Char, Text)
_ -> Bool
False

isMixedCase :: Text -> Bool
isMixedCase :: Text -> Bool
isMixedCase Text
t = (Char -> Bool) -> Text -> Bool
T.any Char -> Bool
isUpper Text
t Bool -> Bool -> Bool
&& (Char -> Bool) -> Text -> Bool
T.any Char -> Bool
isLower Text
t

capitalizeText :: Maybe Lang -> Text -> Text
capitalizeText :: Maybe Lang -> Text -> Text
capitalizeText Maybe Lang
mblang Text
x =
  case Text -> Maybe (Char, Text)
T.uncons Text
x of
    Just (Char
c,Text
x') -> Maybe Lang -> Text -> Text
Unicode.toUpper Maybe Lang
mblang (Char -> Text
T.singleton Char
c) Text -> Text -> Text
forall a. Semigroup a => a -> a -> a
<> Text
x'
    Maybe (Char, Text)
Nothing     -> Text
x

isStopWord :: Text -> Bool
isStopWord :: Text -> Bool
isStopWord Text
"a" = Bool
True
isStopWord Text
"an" = Bool
True
isStopWord Text
"and" = Bool
True
isStopWord Text
"as" = Bool
True
isStopWord Text
"at" = Bool
True
isStopWord Text
"but" = Bool
True
isStopWord Text
"by" = Bool
True
isStopWord Text
"down" = Bool
True
isStopWord Text
"for" = Bool
True
isStopWord Text
"from" = Bool
True
isStopWord Text
"in" = Bool
True
isStopWord Text
"into" = Bool
True
isStopWord Text
"nor" = Bool
True
isStopWord Text
"of" = Bool
True
isStopWord Text
"on" = Bool
True
isStopWord Text
"onto" = Bool
True
isStopWord Text
"or" = Bool
True
isStopWord Text
"over" = Bool
True
isStopWord Text
"so" = Bool
True
isStopWord Text
"the" = Bool
True
isStopWord Text
"till" = Bool
True
isStopWord Text
"to" = Bool
True
isStopWord Text
"up" = Bool
True
isStopWord Text
"via" = Bool
True
isStopWord Text
"with" = Bool
True
isStopWord Text
"yet" = Bool
True
-- not in original list but seems required by test flipflop_Apostrophes
-- and textcase_SkipNameParticlesInTitleCase
isStopWord Text
"about" = Bool
True
isStopWord Text
"van" = Bool
True
isStopWord Text
"von" = Bool
True
isStopWord Text
"de" = Bool
True
isStopWord Text
"d" = Bool
True
isStopWord Text
"l" = Bool
True
isStopWord Text
_ = Bool
False