{- |
Copyright : (c) 2024 Pierre Le Marre
Maintainer: dev@wismill.eu
Stability   : experimental

Miscellaneous bits common to various parsers
-}
module UCD.Parser.Common (
  readCodePoint,
  readCodePointM,
  UnicodeRange (..),
  parseRange,
  pattern Comma,
  pattern HashTag,
  pattern NewLine,
  pattern Period,
  pattern SemiColon,
  pattern Slash,
) where

import Data.ByteString qualified as B
import Data.ByteString.Char8 qualified as B8
import Data.Char (chr)
import Data.Word (Word8)

--------------------------------------------------------------------------------
-- Code point parser
--------------------------------------------------------------------------------

{- | Parse a code point formatted as hexadecimal

/Warning:/ raise an error on invalid input.

>>> readCodePoint "0061"
'a'

@since 0.1.0
-}
readCodePoint  B.ByteString  Char
readCodePoint :: ByteString -> Char
readCodePoint = Int -> Char
chr (Int -> Char) -> (ByteString -> Int) -> ByteString -> Char
forall b c a. (b -> c) -> (a -> b) -> a -> c
. String -> Int
forall a. Read a => String -> a
read (String -> Int) -> (ByteString -> String) -> ByteString -> Int
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ByteString -> String
B8.unpack (ByteString -> String)
-> (ByteString -> ByteString) -> ByteString -> String
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (ByteString
"0x" <>)

{- | Parse a code point formatted as hexadecimal, or return 'Nothing' on an
empty string.

/Warning:/ raise an error on invalid input.

>>> readCodePointM "0061"
Just 'a'
>>> readCodePointM ""
Nothing

See also: 'readCodePoint'.

@since 0.1.0
-}
readCodePointM  B.ByteString  Maybe Char
readCodePointM :: ByteString -> Maybe Char
readCodePointM ByteString
raw
  | ByteString -> Bool
B.null ByteString
raw = Maybe Char
forall a. Maybe a
Nothing
  | Bool
otherwise = Char -> Maybe Char
forall a. a -> Maybe a
Just (ByteString -> Char
readCodePoint ByteString
raw)

--------------------------------------------------------------------------------
-- Code point range parser
--------------------------------------------------------------------------------

{- | A Unicode code point range

@since 0.1.0
-}
data UnicodeRange a
  = SingleChar
      { forall a. UnicodeRange a -> Char
_first  !Char
      }
  | CharRange
      { _first  !Char
      , forall a. UnicodeRange a -> Char
_last  !Char
      , forall a. UnicodeRange a -> a
_rangeName  !a
      }
  deriving (UnicodeRange a -> UnicodeRange a -> Bool
(UnicodeRange a -> UnicodeRange a -> Bool)
-> (UnicodeRange a -> UnicodeRange a -> Bool)
-> Eq (UnicodeRange a)
forall a. Eq a => UnicodeRange a -> UnicodeRange a -> Bool
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
$c== :: forall a. Eq a => UnicodeRange a -> UnicodeRange a -> Bool
== :: UnicodeRange a -> UnicodeRange a -> Bool
$c/= :: forall a. Eq a => UnicodeRange a -> UnicodeRange a -> Bool
/= :: UnicodeRange a -> UnicodeRange a -> Bool
Eq, Int -> UnicodeRange a -> ShowS
[UnicodeRange a] -> ShowS
UnicodeRange a -> String
(Int -> UnicodeRange a -> ShowS)
-> (UnicodeRange a -> String)
-> ([UnicodeRange a] -> ShowS)
-> Show (UnicodeRange a)
forall a. Show a => Int -> UnicodeRange a -> ShowS
forall a. Show a => [UnicodeRange a] -> ShowS
forall a. Show a => UnicodeRange a -> String
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
$cshowsPrec :: forall a. Show a => Int -> UnicodeRange a -> ShowS
showsPrec :: Int -> UnicodeRange a -> ShowS
$cshow :: forall a. Show a => UnicodeRange a -> String
show :: UnicodeRange a -> String
$cshowList :: forall a. Show a => [UnicodeRange a] -> ShowS
showList :: [UnicodeRange a] -> ShowS
Show)

{- | Parse @AAAA..BBBB@ range

@since 0.1.0
-}
parseRange  B.ByteString  UnicodeRange ()
parseRange :: ByteString -> UnicodeRange ()
parseRange ByteString
raw = case (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
B.span (Word8 -> Word8 -> Bool
forall a. Eq a => a -> a -> Bool
/= Word8
Period) ByteString
raw of
  (ByteString -> Char
readCodePoint  Char
ch1, ByteString
rest)
    | ByteString -> Bool
B.null ByteString
rest  Char -> UnicodeRange ()
forall a. Char -> UnicodeRange a
SingleChar Char
ch1
    | Bool
otherwise  Char -> Char -> () -> UnicodeRange ()
forall a. Char -> Char -> a -> UnicodeRange a
CharRange Char
ch1 (ByteString -> Char
readCodePoint (Int -> ByteString -> ByteString
B.drop Int
2 ByteString
rest)) ()

--------------------------------------------------------------------------------
-- Char8 patterns
--------------------------------------------------------------------------------

-- | @'\\n'@
pattern NewLine  Word8
pattern $mNewLine :: forall {r}. Word8 -> ((# #) -> r) -> ((# #) -> r) -> r
$bNewLine :: Word8
NewLine = 0x0a

-- | @#@
pattern HashTag  Word8
pattern $mHashTag :: forall {r}. Word8 -> ((# #) -> r) -> ((# #) -> r) -> r
$bHashTag :: Word8
HashTag = 0x23

-- | @,@
pattern Comma  Word8
pattern $mComma :: forall {r}. Word8 -> ((# #) -> r) -> ((# #) -> r) -> r
$bComma :: Word8
Comma = 0x2c

-- | @.@
pattern Period  Word8
pattern $mPeriod :: forall {r}. Word8 -> ((# #) -> r) -> ((# #) -> r) -> r
$bPeriod :: Word8
Period = 0x2e

-- | @\/@
pattern Slash  Word8
pattern $mSlash :: forall {r}. Word8 -> ((# #) -> r) -> ((# #) -> r) -> r
$bSlash :: Word8
Slash = 0x2f

-- | @;@
pattern SemiColon  Word8
pattern $mSemiColon :: forall {r}. Word8 -> ((# #) -> r) -> ((# #) -> r) -> r
$bSemiColon :: Word8
SemiColon = 0x3b