{-# LANGUAGE OverloadedStrings #-}
{- |
   Module      : Text.Pandoc.BCP47
   Copyright   : Copyright (C) 2017-2021 John MacFarlane
   License     : GNU GPL, version 2 or above

   Maintainer  : John MacFarlane <jgm@berkeley.edu>
   Stability   : alpha
   Portability : portable

Functions for parsing and rendering BCP47 language identifiers.
-}
module Text.Pandoc.BCP47 (
                       getLang
                     , parseBCP47
                     , Lang(..)
                     , renderLang
                     )
where
import Control.Monad (guard)
import Data.Char (isAlphaNum, isAscii, isLetter, isLower, isUpper)
import Text.Pandoc.Definition
import Text.Pandoc.Options
import Text.DocTemplates (FromContext(..))
import qualified Data.Text as T
import qualified Text.Parsec as P

-- | Represents BCP 47 language/country code.
data Lang = Lang{ Lang -> Text
langLanguage :: T.Text
                , Lang -> Text
langScript   :: T.Text
                , Lang -> Text
langRegion   :: T.Text
                , Lang -> [Text]
langVariants :: [T.Text] }
                deriving (Lang -> Lang -> Bool
(Lang -> Lang -> Bool) -> (Lang -> Lang -> Bool) -> Eq Lang
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: Lang -> Lang -> Bool
$c/= :: Lang -> Lang -> Bool
== :: Lang -> Lang -> Bool
$c== :: Lang -> Lang -> Bool
Eq, Eq Lang
Eq Lang
-> (Lang -> Lang -> Ordering)
-> (Lang -> Lang -> Bool)
-> (Lang -> Lang -> Bool)
-> (Lang -> Lang -> Bool)
-> (Lang -> Lang -> Bool)
-> (Lang -> Lang -> Lang)
-> (Lang -> Lang -> Lang)
-> Ord Lang
Lang -> Lang -> Bool
Lang -> Lang -> Ordering
Lang -> Lang -> Lang
forall a.
Eq a
-> (a -> a -> Ordering)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> a)
-> (a -> a -> a)
-> Ord a
min :: Lang -> Lang -> Lang
$cmin :: Lang -> Lang -> Lang
max :: Lang -> Lang -> Lang
$cmax :: Lang -> Lang -> Lang
>= :: Lang -> Lang -> Bool
$c>= :: Lang -> Lang -> Bool
> :: Lang -> Lang -> Bool
$c> :: Lang -> Lang -> Bool
<= :: Lang -> Lang -> Bool
$c<= :: Lang -> Lang -> Bool
< :: Lang -> Lang -> Bool
$c< :: Lang -> Lang -> Bool
compare :: Lang -> Lang -> Ordering
$ccompare :: Lang -> Lang -> Ordering
$cp1Ord :: Eq Lang
Ord, Int -> Lang -> ShowS
[Lang] -> ShowS
Lang -> String
(Int -> Lang -> ShowS)
-> (Lang -> String) -> ([Lang] -> ShowS) -> Show Lang
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [Lang] -> ShowS
$cshowList :: [Lang] -> ShowS
show :: Lang -> String
$cshow :: Lang -> String
showsPrec :: Int -> Lang -> ShowS
$cshowsPrec :: Int -> Lang -> ShowS
Show)

-- | Render a Lang as BCP 47.
renderLang :: Lang -> T.Text
renderLang :: Lang -> Text
renderLang Lang
lang = Text -> [Text] -> Text
T.intercalate Text
"-" (Lang -> Text
langLanguage Lang
lang Text -> [Text] -> [Text]
forall a. a -> [a] -> [a]
: (Text -> Bool) -> [Text] -> [Text]
forall a. (a -> Bool) -> [a] -> [a]
filter (Bool -> Bool
not (Bool -> Bool) -> (Text -> Bool) -> Text -> Bool
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Text -> Bool
T.null)
                    ([Lang -> Text
langScript Lang
lang, Lang -> Text
langRegion Lang
lang] [Text] -> [Text] -> [Text]
forall a. [a] -> [a] -> [a]
++ Lang -> [Text]
langVariants Lang
lang))

-- | Get the contents of the `lang` metadata field or variable.
getLang :: WriterOptions -> Meta -> Maybe T.Text
getLang :: WriterOptions -> Meta -> Maybe Text
getLang WriterOptions
opts Meta
meta =
  case Text -> Context Text -> Maybe Text
forall a b. FromContext a b => Text -> Context a -> Maybe b
lookupContext Text
"lang" (WriterOptions -> Context Text
writerVariables WriterOptions
opts) of
        Just Text
s -> Text -> Maybe Text
forall a. a -> Maybe a
Just Text
s
        Maybe Text
_      ->
          case Text -> Meta -> Maybe MetaValue
lookupMeta Text
"lang" Meta
meta of
               Just (MetaBlocks [Para [Str Text
s]])  -> Text -> Maybe Text
forall a. a -> Maybe a
Just Text
s
               Just (MetaBlocks [Plain [Str Text
s]]) -> Text -> Maybe Text
forall a. a -> Maybe a
Just Text
s
               Just (MetaInlines [Str Text
s])        -> Text -> Maybe Text
forall a. a -> Maybe a
Just Text
s
               Just (MetaString Text
s)               -> Text -> Maybe Text
forall a. a -> Maybe a
Just Text
s
               Maybe MetaValue
_                                 -> Maybe Text
forall a. Maybe a
Nothing

-- | Parse a BCP 47 string as a Lang.  Currently we parse
-- extensions and private-use fields as "variants," even
-- though officially they aren't.
parseBCP47 :: T.Text -> Either T.Text Lang
parseBCP47 :: Text -> Either Text Lang
parseBCP47 Text
lang =
  case Parsec Text () Lang -> String -> Text -> Either ParseError Lang
forall s t a.
Stream s Identity t =>
Parsec s () a -> String -> s -> Either ParseError a
P.parse Parsec Text () Lang
forall u. ParsecT Text u Identity Lang
bcp47 String
"lang" Text
lang of
       Right Lang
r -> Lang -> Either Text Lang
forall a b. b -> Either a b
Right Lang
r
       Left ParseError
e  -> Text -> Either Text Lang
forall a b. a -> Either a b
Left (Text -> Either Text Lang) -> Text -> Either Text Lang
forall a b. (a -> b) -> a -> b
$ String -> Text
T.pack (String -> Text) -> String -> Text
forall a b. (a -> b) -> a -> b
$ ParseError -> String
forall a. Show a => a -> String
show ParseError
e
  where bcp47 :: ParsecT Text u Identity Lang
bcp47 = do
          Text
language <- ParsecT Text u Identity Text
forall u. ParsecT Text u Identity Text
pLanguage
          Text
script <- Text
-> ParsecT Text u Identity Text -> ParsecT Text u Identity Text
forall s (m :: * -> *) t a u.
Stream s m t =>
a -> ParsecT s u m a -> ParsecT s u m a
P.option Text
"" ParsecT Text u Identity Text
forall u. ParsecT Text u Identity Text
pScript
          Text
region <- Text
-> ParsecT Text u Identity Text -> ParsecT Text u Identity Text
forall s (m :: * -> *) t a u.
Stream s m t =>
a -> ParsecT s u m a -> ParsecT s u m a
P.option Text
"" ParsecT Text u Identity Text
forall u. ParsecT Text u Identity Text
pRegion
          [Text]
variants <- ParsecT Text u Identity Text -> ParsecT Text u Identity [Text]
forall s u (m :: * -> *) a. ParsecT s u m a -> ParsecT s u m [a]
P.many (ParsecT Text u Identity Text
forall u. ParsecT Text u Identity Text
pVariant ParsecT Text u Identity Text
-> ParsecT Text u Identity Text -> ParsecT Text u Identity Text
forall s u (m :: * -> *) a.
ParsecT s u m a -> ParsecT s u m a -> ParsecT s u m a
P.<|> ParsecT Text u Identity Text
forall u. ParsecT Text u Identity Text
pExtension ParsecT Text u Identity Text
-> ParsecT Text u Identity Text -> ParsecT Text u Identity Text
forall s u (m :: * -> *) a.
ParsecT s u m a -> ParsecT s u m a -> ParsecT s u m a
P.<|> ParsecT Text u Identity Text
forall u. ParsecT Text u Identity Text
pPrivateUse)
          ParsecT Text u Identity ()
forall s (m :: * -> *) t u.
(Stream s m t, Show t) =>
ParsecT s u m ()
P.eof
          Lang -> ParsecT Text u Identity Lang
forall (m :: * -> *) a. Monad m => a -> m a
return Lang :: Text -> Text -> Text -> [Text] -> Lang
Lang{   langLanguage :: Text
langLanguage = Text
language
                       , langScript :: Text
langScript = Text
script
                       , langRegion :: Text
langRegion = Text
region
                       , langVariants :: [Text]
langVariants = [Text]
variants }
        asciiLetter :: ParsecT Text u Identity Char
asciiLetter = (Char -> Bool) -> ParsecT Text u Identity Char
forall s (m :: * -> *) u.
Stream s m Char =>
(Char -> Bool) -> ParsecT s u m Char
P.satisfy (\Char
c -> Char -> Bool
isAscii Char
c Bool -> Bool -> Bool
&& Char -> Bool
isLetter Char
c)
        pLanguage :: ParsecT Text u Identity Text
pLanguage = do
          String
cs <- ParsecT Text u Identity Char -> ParsecT Text u Identity String
forall s (m :: * -> *) t u a.
Stream s m t =>
ParsecT s u m a -> ParsecT s u m [a]
P.many1 ParsecT Text u Identity Char
forall u. ParsecT Text u Identity Char
asciiLetter
          let lcs :: Int
lcs = String -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length String
cs
          Bool -> ParsecT Text u Identity ()
forall (f :: * -> *). Alternative f => Bool -> f ()
guard (Bool -> ParsecT Text u Identity ())
-> Bool -> ParsecT Text u Identity ()
forall a b. (a -> b) -> a -> b
$ Int
lcs Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Int
2 Bool -> Bool -> Bool
|| Int
lcs Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Int
3
          Text -> ParsecT Text u Identity Text
forall (m :: * -> *) a. Monad m => a -> m a
return (Text -> ParsecT Text u Identity Text)
-> Text -> ParsecT Text u Identity Text
forall a b. (a -> b) -> a -> b
$ Text -> Text
T.toLower (Text -> Text) -> Text -> Text
forall a b. (a -> b) -> a -> b
$ String -> Text
T.pack String
cs
        pScript :: ParsecT Text u Identity Text
pScript = ParsecT Text u Identity Text -> ParsecT Text u Identity Text
forall s u (m :: * -> *) a. ParsecT s u m a -> ParsecT s u m a
P.try (ParsecT Text u Identity Text -> ParsecT Text u Identity Text)
-> ParsecT Text u Identity Text -> ParsecT Text u Identity Text
forall a b. (a -> b) -> a -> b
$ do
          Char -> ParsecT Text u Identity Char
forall s (m :: * -> *) u.
Stream s m Char =>
Char -> ParsecT s u m Char
P.char Char
'-'
          Char
x <- (Char -> Bool) -> ParsecT Text u Identity Char
forall s (m :: * -> *) u.
Stream s m Char =>
(Char -> Bool) -> ParsecT s u m Char
P.satisfy (\Char
c -> Char -> Bool
isAscii Char
c Bool -> Bool -> Bool
&& Char -> Bool
isLetter Char
c Bool -> Bool -> Bool
&& Char -> Bool
isUpper Char
c)
          String
xs <- Int
-> ParsecT Text u Identity Char -> ParsecT Text u Identity String
forall s (m :: * -> *) t u a.
Stream s m t =>
Int -> ParsecT s u m a -> ParsecT s u m [a]
P.count Int
3
                 ((Char -> Bool) -> ParsecT Text u Identity Char
forall s (m :: * -> *) u.
Stream s m Char =>
(Char -> Bool) -> ParsecT s u m Char
P.satisfy (\Char
c -> Char -> Bool
isAscii Char
c Bool -> Bool -> Bool
&& Char -> Bool
isLetter Char
c Bool -> Bool -> Bool
&& Char -> Bool
isLower Char
c))
          Text -> ParsecT Text u Identity Text
forall (m :: * -> *) a. Monad m => a -> m a
return (Text -> ParsecT Text u Identity Text)
-> Text -> ParsecT Text u Identity Text
forall a b. (a -> b) -> a -> b
$ Text -> Text
T.toLower (Text -> Text) -> Text -> Text
forall a b. (a -> b) -> a -> b
$ String -> Text
T.pack (Char
xChar -> ShowS
forall a. a -> [a] -> [a]
:String
xs)
        pRegion :: ParsecT Text u Identity Text
pRegion = ParsecT Text u Identity Text -> ParsecT Text u Identity Text
forall s u (m :: * -> *) a. ParsecT s u m a -> ParsecT s u m a
P.try (ParsecT Text u Identity Text -> ParsecT Text u Identity Text)
-> ParsecT Text u Identity Text -> ParsecT Text u Identity Text
forall a b. (a -> b) -> a -> b
$ do
          Char -> ParsecT Text u Identity Char
forall s (m :: * -> *) u.
Stream s m Char =>
Char -> ParsecT s u m Char
P.char Char
'-'
          String
cs <- ParsecT Text u Identity Char -> ParsecT Text u Identity String
forall s (m :: * -> *) t u a.
Stream s m t =>
ParsecT s u m a -> ParsecT s u m [a]
P.many1 ParsecT Text u Identity Char
forall u. ParsecT Text u Identity Char
asciiLetter
          let lcs :: Int
lcs = String -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length String
cs
          Bool -> ParsecT Text u Identity ()
forall (f :: * -> *). Alternative f => Bool -> f ()
guard (Bool -> ParsecT Text u Identity ())
-> Bool -> ParsecT Text u Identity ()
forall a b. (a -> b) -> a -> b
$ Int
lcs Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Int
2 Bool -> Bool -> Bool
|| Int
lcs Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Int
3
          Text -> ParsecT Text u Identity Text
forall (m :: * -> *) a. Monad m => a -> m a
return (Text -> ParsecT Text u Identity Text)
-> Text -> ParsecT Text u Identity Text
forall a b. (a -> b) -> a -> b
$ Text -> Text
T.toUpper (Text -> Text) -> Text -> Text
forall a b. (a -> b) -> a -> b
$ String -> Text
T.pack String
cs
        pVariant :: ParsecT Text u Identity Text
pVariant = ParsecT Text u Identity Text -> ParsecT Text u Identity Text
forall s u (m :: * -> *) a. ParsecT s u m a -> ParsecT s u m a
P.try (ParsecT Text u Identity Text -> ParsecT Text u Identity Text)
-> ParsecT Text u Identity Text -> ParsecT Text u Identity Text
forall a b. (a -> b) -> a -> b
$ do
          Char -> ParsecT Text u Identity Char
forall s (m :: * -> *) u.
Stream s m Char =>
Char -> ParsecT s u m Char
P.char Char
'-'
          String
ds <- String
-> ParsecT Text u Identity String -> ParsecT Text u Identity String
forall s (m :: * -> *) t a u.
Stream s m t =>
a -> ParsecT s u m a -> ParsecT s u m a
P.option String
"" (Int
-> ParsecT Text u Identity Char -> ParsecT Text u Identity String
forall s (m :: * -> *) t u a.
Stream s m t =>
Int -> ParsecT s u m a -> ParsecT s u m [a]
P.count Int
1 ParsecT Text u Identity Char
forall s (m :: * -> *) u. Stream s m Char => ParsecT s u m Char
P.digit)
          String
cs <- ParsecT Text u Identity Char -> ParsecT Text u Identity String
forall s (m :: * -> *) t u a.
Stream s m t =>
ParsecT s u m a -> ParsecT s u m [a]
P.many1 ParsecT Text u Identity Char
forall u. ParsecT Text u Identity Char
asciiLetter
          let var :: String
var = String
ds String -> ShowS
forall a. [a] -> [a] -> [a]
++ String
cs
              lv :: Int
lv = String -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length String
var
          Bool -> ParsecT Text u Identity ()
forall (f :: * -> *). Alternative f => Bool -> f ()
guard (Bool -> ParsecT Text u Identity ())
-> Bool -> ParsecT Text u Identity ()
forall a b. (a -> b) -> a -> b
$ if String -> Bool
forall (t :: * -> *) a. Foldable t => t a -> Bool
null String
ds
                     then Int
lv Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
>= Int
5 Bool -> Bool -> Bool
&& Int
lv Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
<= Int
8
                     else Int
lv Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Int
4
          Text -> ParsecT Text u Identity Text
forall (m :: * -> *) a. Monad m => a -> m a
return (Text -> ParsecT Text u Identity Text)
-> Text -> ParsecT Text u Identity Text
forall a b. (a -> b) -> a -> b
$ Text -> Text
T.toLower (Text -> Text) -> Text -> Text
forall a b. (a -> b) -> a -> b
$ String -> Text
T.pack String
var
        pExtension :: ParsecT Text u Identity Text
pExtension = ParsecT Text u Identity Text -> ParsecT Text u Identity Text
forall s u (m :: * -> *) a. ParsecT s u m a -> ParsecT s u m a
P.try (ParsecT Text u Identity Text -> ParsecT Text u Identity Text)
-> ParsecT Text u Identity Text -> ParsecT Text u Identity Text
forall a b. (a -> b) -> a -> b
$ do
          Char -> ParsecT Text u Identity Char
forall s (m :: * -> *) u.
Stream s m Char =>
Char -> ParsecT s u m Char
P.char Char
'-'
          String
cs <- ParsecT Text u Identity Char -> ParsecT Text u Identity String
forall s (m :: * -> *) t u a.
Stream s m t =>
ParsecT s u m a -> ParsecT s u m [a]
P.many1 (ParsecT Text u Identity Char -> ParsecT Text u Identity String)
-> ParsecT Text u Identity Char -> ParsecT Text u Identity String
forall a b. (a -> b) -> a -> b
$ (Char -> Bool) -> ParsecT Text u Identity Char
forall s (m :: * -> *) u.
Stream s m Char =>
(Char -> Bool) -> ParsecT s u m Char
P.satisfy (\Char
c -> Char -> Bool
isAscii Char
c Bool -> Bool -> Bool
&& Char -> Bool
isAlphaNum Char
c)
          let lcs :: Int
lcs = String -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length String
cs
          Bool -> ParsecT Text u Identity ()
forall (f :: * -> *). Alternative f => Bool -> f ()
guard (Bool -> ParsecT Text u Identity ())
-> Bool -> ParsecT Text u Identity ()
forall a b. (a -> b) -> a -> b
$ Int
lcs Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
>= Int
2 Bool -> Bool -> Bool
&& Int
lcs Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
<= Int
8
          Text -> ParsecT Text u Identity Text
forall (m :: * -> *) a. Monad m => a -> m a
return (Text -> ParsecT Text u Identity Text)
-> Text -> ParsecT Text u Identity Text
forall a b. (a -> b) -> a -> b
$ Text -> Text
T.toLower (Text -> Text) -> Text -> Text
forall a b. (a -> b) -> a -> b
$ String -> Text
T.pack String
cs
        pPrivateUse :: ParsecT Text u Identity Text
pPrivateUse = ParsecT Text u Identity Text -> ParsecT Text u Identity Text
forall s u (m :: * -> *) a. ParsecT s u m a -> ParsecT s u m a
P.try (ParsecT Text u Identity Text -> ParsecT Text u Identity Text)
-> ParsecT Text u Identity Text -> ParsecT Text u Identity Text
forall a b. (a -> b) -> a -> b
$ do
          Char -> ParsecT Text u Identity Char
forall s (m :: * -> *) u.
Stream s m Char =>
Char -> ParsecT s u m Char
P.char Char
'-'
          Char -> ParsecT Text u Identity Char
forall s (m :: * -> *) u.
Stream s m Char =>
Char -> ParsecT s u m Char
P.char Char
'x'
          Char -> ParsecT Text u Identity Char
forall s (m :: * -> *) u.
Stream s m Char =>
Char -> ParsecT s u m Char
P.char Char
'-'
          String
cs <- ParsecT Text u Identity Char -> ParsecT Text u Identity String
forall s (m :: * -> *) t u a.
Stream s m t =>
ParsecT s u m a -> ParsecT s u m [a]
P.many1 (ParsecT Text u Identity Char -> ParsecT Text u Identity String)
-> ParsecT Text u Identity Char -> ParsecT Text u Identity String
forall a b. (a -> b) -> a -> b
$ (Char -> Bool) -> ParsecT Text u Identity Char
forall s (m :: * -> *) u.
Stream s m Char =>
(Char -> Bool) -> ParsecT s u m Char
P.satisfy (\Char
c -> Char -> Bool
isAscii Char
c Bool -> Bool -> Bool
&& Char -> Bool
isAlphaNum Char
c)
          Bool -> ParsecT Text u Identity ()
forall (f :: * -> *). Alternative f => Bool -> f ()
guard (Bool -> ParsecT Text u Identity ())
-> Bool -> ParsecT Text u Identity ()
forall a b. (a -> b) -> a -> b
$ Bool -> Bool
not (String -> Bool
forall (t :: * -> *) a. Foldable t => t a -> Bool
null String
cs) Bool -> Bool -> Bool
&& String -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length String
cs Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
<= Int
8
          let var :: String
var = String
"x-" String -> ShowS
forall a. [a] -> [a] -> [a]
++ String
cs
          Text -> ParsecT Text u Identity Text
forall (m :: * -> *) a. Monad m => a -> m a
return (Text -> ParsecT Text u Identity Text)
-> Text -> ParsecT Text u Identity Text
forall a b. (a -> b) -> a -> b
$ Text -> Text
T.toLower (Text -> Text) -> Text -> Text
forall a b. (a -> b) -> a -> b
$ String -> Text
T.pack String
var