module Duckling.Types where
import Control.DeepSeq
import Data.Aeson
import qualified Data.ByteString.Lazy as LB
import Data.GADT.Compare
import Data.Hashable
import Data.Maybe
import Data.String
import Data.Text (Text)
import qualified Data.Text.Encoding as Text
import Data.Typeable ((:~:)(Refl), Typeable)
import GHC.Generics
import Prelude
import qualified Text.Regex.Base as R
import qualified Text.Regex.PCRE as PCRE
import Duckling.Dimensions.Types
import Duckling.Resolve
data Token = forall a . (Resolve a, Eq a, Hashable a, Show a, NFData a) =>
Token (Dimension a) a
deriving instance Show Token
instance Eq Token where
Token d1 v1 == Token d2 v2 = case geq d1 d2 of
Just Refl -> v1 == v2
Nothing -> False
instance Hashable Token where
hashWithSalt s (Token dim v) = hashWithSalt s (dim, v)
instance NFData Token where
rnf (Token _ v) = rnf v
isDimension :: Dimension a -> Token -> Bool
isDimension dim (Token dim' _) = isJust $ geq dim dim'
data Node = Node
{ nodeRange :: Range
, token :: Token
, children :: [Node]
, rule :: Maybe Text
} deriving (Eq, Generic, Hashable, Show, NFData)
data ResolvedToken = Resolved
{ range :: Range
, node :: Node
, jsonValue :: Value
} deriving (Eq, Show)
instance Ord ResolvedToken where
compare (Resolved range1 _ json1) (Resolved range2 _ json2) =
case compare range1 range2 of
EQ -> compare (toJText json1) (toJText json2)
z -> z
data Candidate = Candidate ResolvedToken Double Bool
deriving (Eq, Show)
instance Ord Candidate where
compare (Candidate (Resolved{range = Range s1 e1, node = Node{token = Token d1 _}}) score1 t1)
(Candidate (Resolved{range = Range s2 e2, node = Node{token = tok2}}) score2 t2)
| isDimension d1 tok2 = case starts of
EQ -> case ends of
EQ -> compare score1 score2
z -> z
LT -> case ends of
LT -> EQ
_ -> GT
GT -> case ends of
GT -> EQ
_ -> LT
| t1 == t2 = compRange
| t1 && compRange == GT = GT
| t2 && compRange == LT = LT
| otherwise = EQ
where
starts = compare s1 s2
ends = compare e1 e2
compRange = case starts of
EQ -> ends
LT -> case ends of
LT -> EQ
_ -> GT
GT -> case ends of
GT -> EQ
_ -> LT
data Range = Range Int Int
deriving (Eq, Ord, Generic, Hashable, Show, NFData)
type Production = [Token] -> Maybe Token
type Predicate = Token -> Bool
data PatternItem = Regex PCRE.Regex | Predicate Predicate
type Pattern = [PatternItem]
data Rule = Rule
{ name :: Text
, pattern :: Pattern
, prod :: Production
}
instance Show Rule where
show (Rule name _ _) = show name
data Entity = Entity
{ dim :: Text
, body :: Text
, value :: Text
, start :: Int
, end :: Int
} deriving (Eq, Generic, Show, NFData)
instance ToJSON Entity where
toEncoding = genericToEncoding defaultOptions
toJText :: ToJSON x => x -> Text
toJText j = Text.decodeUtf8 $ LB.toStrict $ encode j
regex :: String -> PatternItem
regex = Regex . R.makeRegexOpts compOpts execOpts
where
compOpts = PCRE.defaultCompOpt + PCRE.compCaseless
execOpts = PCRE.defaultExecOpt
dimension :: Typeable a => Dimension a -> PatternItem
dimension value = Predicate $ isDimension value