{-# LANGUAGE GADTs #-}
{-# LANGUAGE NamedFieldPuns #-}
{-# LANGUAGE NoRebindableSyntax #-}
module Duckling.Engine
( parseAndResolve
, lookupRegexAnywhere
, runDuckling
) where
import Control.DeepSeq
import Control.Monad.Extra
import Data.Aeson (toJSON)
import Data.ByteString (ByteString)
import Data.Functor.Identity
import Data.Maybe
import Data.Text (Text)
import Prelude
import qualified Data.Array as Array
import qualified Data.Foldable as Foldable
import qualified Data.List as L
import qualified Text.Regex.PCRE as PCRE
import Duckling.Dimensions.Types
import Duckling.Regex.Types
import Duckling.Resolve
import Duckling.Types
import Duckling.Types.Document (Document)
import Duckling.Types.Stash (Stash)
import qualified Duckling.Engine.Regex as Regex
import qualified Duckling.Types.Document as Document
import qualified Duckling.Types.Stash as Stash
type Duckling a = Identity a
runDuckling :: Duckling a -> a
runDuckling :: Duckling a -> a
runDuckling Duckling a
ma = Duckling a -> a
forall a. Identity a -> a
runIdentity Duckling a
ma
parseAndResolve :: [Rule] -> Text -> Context -> Options -> [ResolvedToken]
parseAndResolve :: [Rule] -> Text -> Context -> Options -> [ResolvedToken]
parseAndResolve [Rule]
rules Text
input Context
context Options
options =
(Node -> Maybe ResolvedToken) -> [Node] -> [ResolvedToken]
forall a b. (a -> Maybe b) -> [a] -> [b]
mapMaybe (Context -> Options -> Node -> Maybe ResolvedToken
resolveNode Context
context Options
options) ([Node] -> [ResolvedToken])
-> ([Node] -> [Node]) -> [Node] -> [ResolvedToken]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [Node] -> [Node]
forall a. NFData a => a -> a
force ([Node] -> [ResolvedToken]) -> [Node] -> [ResolvedToken]
forall a b. (a -> b) -> a -> b
$ Stash -> [Node]
Stash.toPosOrderedList (Stash -> [Node]) -> Stash -> [Node]
forall a b. (a -> b) -> a -> b
$
Duckling Stash -> Stash
forall a. Identity a -> a
runDuckling (Duckling Stash -> Stash) -> Duckling Stash -> Stash
forall a b. (a -> b) -> a -> b
$ [Rule] -> Document -> Duckling Stash
parseString [Rule]
rules (Text -> Document
Document.fromText Text
input)
produce :: Match -> Maybe Node
produce :: Match -> Maybe Node
produce (Rule
_, Int
_, []) = Maybe Node
forall a. Maybe a
Nothing
produce (Rule Text
name Pattern
_ Production
production, Int
_, etuor :: [Node]
etuor@(Node {nodeRange :: Node -> Range
nodeRange = Range Int
_ Int
e}:[Node]
_)) = do
let route :: [Node]
route = [Node] -> [Node]
forall a. [a] -> [a]
reverse [Node]
etuor
Token
token <- Maybe Token -> Maybe Token
forall a. NFData a => a -> a
force (Maybe Token -> Maybe Token) -> Maybe Token -> Maybe Token
forall a b. (a -> b) -> a -> b
$ Production
production Production -> Production
forall a b. (a -> b) -> a -> b
$ (Node -> Token) -> [Node] -> [Token]
forall a b. (a -> b) -> [a] -> [b]
map Node -> Token
token [Node]
route
case [Node]
route of
(Node {nodeRange :: Node -> Range
nodeRange = Range Int
p Int
_}:[Node]
_) -> Node -> Maybe Node
forall a. a -> Maybe a
Just Node :: Range -> Token -> [Node] -> Maybe Text -> Node
Node
{ nodeRange :: Range
nodeRange = Int -> Int -> Range
Range Int
p Int
e
, token :: Token
token = Token
token
, children :: [Node]
children = [Node]
route
, rule :: Maybe Text
rule = Text -> Maybe Text
forall a. a -> Maybe a
Just Text
name
}
[] -> Maybe Node
forall a. Maybe a
Nothing
lookupRegex :: Document -> PCRE.Regex -> Int -> Duckling [Node]
lookupRegex :: Document -> Regex -> Int -> Duckling [Node]
lookupRegex Document
doc Regex
_regex Int
position | Int
position Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
>= Document -> Int
Document.length Document
doc = [Node] -> Duckling [Node]
forall (m :: * -> *) a. Monad m => a -> m a
return []
lookupRegex Document
doc Regex
regex Int
position =
Document
-> Regex
-> Int
-> (Regex -> ByteString -> Maybe MatchArray)
-> Duckling [Node]
forall (t :: * -> *).
Foldable t =>
Document
-> Regex
-> Int
-> (Regex -> ByteString -> t MatchArray)
-> Duckling [Node]
lookupRegexCommon Document
doc Regex
regex Int
position Regex -> ByteString -> Maybe MatchArray
forall regex source.
RegexLike regex source =>
regex -> source -> Maybe MatchArray
Regex.matchOnce
lookupRegexAnywhere :: Document -> PCRE.Regex -> Duckling [Node]
lookupRegexAnywhere :: Document -> Regex -> Duckling [Node]
lookupRegexAnywhere Document
doc Regex
regex = Document
-> Regex
-> Int
-> (Regex -> ByteString -> [MatchArray])
-> Duckling [Node]
forall (t :: * -> *).
Foldable t =>
Document
-> Regex
-> Int
-> (Regex -> ByteString -> t MatchArray)
-> Duckling [Node]
lookupRegexCommon Document
doc Regex
regex Int
0 Regex -> ByteString -> [MatchArray]
forall regex source.
RegexLike regex source =>
regex -> source -> [MatchArray]
Regex.matchAll
{-# INLINE lookupRegexCommon #-}
lookupRegexCommon
:: Foldable t
=> Document
-> PCRE.Regex
-> Int
-> (PCRE.Regex -> ByteString -> t PCRE.MatchArray)
-> Duckling [Node]
lookupRegexCommon :: Document
-> Regex
-> Int
-> (Regex -> ByteString -> t MatchArray)
-> Duckling [Node]
lookupRegexCommon Document
doc Regex
regex Int
position Regex -> ByteString -> t MatchArray
matchFun = [Node] -> Duckling [Node]
forall (m :: * -> *) a. Monad m => a -> m a
return [Node]
nodes
where
(ByteString
substring, (Int, Int) -> Text
rangeToText, Int -> Int -> (Int, Int)
translateRange) =
Document
-> Int
-> (ByteString, (Int, Int) -> Text, Int -> Int -> (Int, Int))
Document.byteStringFromPos Document
doc Int
position
nodes :: [Node]
nodes = (MatchArray -> Maybe Node) -> [MatchArray] -> [Node]
forall a b. (a -> Maybe b) -> [a] -> [b]
mapMaybe ([(Int, Int)] -> Maybe Node
f ([(Int, Int)] -> Maybe Node)
-> (MatchArray -> [(Int, Int)]) -> MatchArray -> Maybe Node
forall b c a. (b -> c) -> (a -> b) -> a -> c
. MatchArray -> [(Int, Int)]
forall i e. Array i e -> [e]
Array.elems)
([MatchArray] -> [Node]) -> [MatchArray] -> [Node]
forall a b. (a -> b) -> a -> b
$ t MatchArray -> [MatchArray]
forall (t :: * -> *) a. Foldable t => t a -> [a]
Foldable.toList
(t MatchArray -> [MatchArray]) -> t MatchArray -> [MatchArray]
forall a b. (a -> b) -> a -> b
$ Regex -> ByteString -> t MatchArray
matchFun Regex
regex ByteString
substring
f :: [(Int, Int)] -> Maybe Node
f :: [(Int, Int)] -> Maybe Node
f [] = Maybe Node
forall a. Maybe a
Nothing
f ((Int
0,Int
0):[(Int, Int)]
_) = Maybe Node
forall a. Maybe a
Nothing
f ((Int
bsStart, Int
bsLen):[(Int, Int)]
groups) =
if Document -> Int -> Int -> Bool
Document.isRangeValid Document
doc Int
start Int
end
then Node -> Maybe Node
forall a. a -> Maybe a
Just Node
node
else Maybe Node
forall a. Maybe a
Nothing
where
textGroups :: [Text]
textGroups = ((Int, Int) -> Text) -> [(Int, Int)] -> [Text]
forall a b. (a -> b) -> [a] -> [b]
map (Int, Int) -> Text
rangeToText [(Int, Int)]
groups
(Int
start, Int
end) = Int -> Int -> (Int, Int)
translateRange Int
bsStart Int
bsLen
node :: Node
node = Node :: Range -> Token -> [Node] -> Maybe Text -> Node
Node
{ nodeRange :: Range
nodeRange = Int -> Int -> Range
Range Int
start Int
end
, token :: Token
token = Dimension GroupMatch -> GroupMatch -> Token
forall a.
(Resolve a, Eq a, Hashable a, Show a, NFData a) =>
Dimension a -> a -> Token
Token Dimension GroupMatch
RegexMatch ([Text] -> GroupMatch
GroupMatch [Text]
textGroups)
, children :: [Node]
children = []
, rule :: Maybe Text
rule = Maybe Text
forall a. Maybe a
Nothing
}
lookupItem :: Document -> PatternItem -> Stash -> Int -> Duckling [Node]
lookupItem :: Document -> PatternItem -> Stash -> Int -> Duckling [Node]
lookupItem Document
doc (Regex Regex
re) Stash
_ Int
position =
(Node -> Bool) -> [Node] -> [Node]
forall a. (a -> Bool) -> [a] -> [a]
filter (Int -> Document -> Node -> Bool
isPositionValid Int
position Document
doc) ([Node] -> [Node]) -> Duckling [Node] -> Duckling [Node]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$>
Document -> Regex -> Int -> Duckling [Node]
lookupRegex Document
doc Regex
re Int
position
lookupItem Document
doc (Predicate Predicate
p) Stash
stash Int
position =
[Node] -> Duckling [Node]
forall (m :: * -> *) a. Monad m => a -> m a
return ([Node] -> Duckling [Node]) -> [Node] -> Duckling [Node]
forall a b. (a -> b) -> a -> b
$
(Node -> Bool) -> [Node] -> [Node]
forall a. (a -> Bool) -> [a] -> [a]
filter (Predicate
p Predicate -> (Node -> Token) -> Node -> Bool
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Node -> Token
token) ([Node] -> [Node]) -> [Node] -> [Node]
forall a b. (a -> b) -> a -> b
$
(Node -> Bool) -> [Node] -> [Node]
forall a. (a -> Bool) -> [a] -> [a]
takeWhile (Int -> Document -> Node -> Bool
isPositionValid Int
position Document
doc) ([Node] -> [Node]) -> [Node] -> [Node]
forall a b. (a -> b) -> a -> b
$
Stash -> Int -> [Node]
Stash.toPosOrderedListFrom Stash
stash Int
position
lookupItemAnywhere :: Document -> PatternItem -> Stash -> Duckling [Node]
lookupItemAnywhere :: Document -> PatternItem -> Stash -> Duckling [Node]
lookupItemAnywhere Document
doc (Regex Regex
re) Stash
_ = Document -> Regex -> Duckling [Node]
lookupRegexAnywhere Document
doc Regex
re
lookupItemAnywhere Document
_doc (Predicate Predicate
p) Stash
stash =
[Node] -> Duckling [Node]
forall (m :: * -> *) a. Monad m => a -> m a
return ([Node] -> Duckling [Node]) -> [Node] -> Duckling [Node]
forall a b. (a -> b) -> a -> b
$ (Node -> Bool) -> [Node] -> [Node]
forall a. (a -> Bool) -> [a] -> [a]
filter (Predicate
p Predicate -> (Node -> Token) -> Node -> Bool
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Node -> Token
token) ([Node] -> [Node]) -> [Node] -> [Node]
forall a b. (a -> b) -> a -> b
$ Stash -> [Node]
Stash.toPosOrderedList Stash
stash
isPositionValid :: Int -> Document -> Node -> Bool
isPositionValid :: Int -> Document -> Node -> Bool
isPositionValid Int
position Document
sentence Node{nodeRange :: Node -> Range
nodeRange = Range Int
start Int
_} =
Document -> Int -> Int -> Bool
Document.isAdjacent Document
sentence Int
position Int
start
type Match = (Rule, Int, [Node])
matchAll :: Document -> Stash -> [Match] -> Duckling [Match]
matchAll :: Document -> Stash -> [Match] -> Duckling [Match]
matchAll Document
sentence Stash
stash [Match]
matches = (Match -> Duckling [Match]) -> [Match] -> Duckling [Match]
forall (m :: * -> *) a b. Monad m => (a -> m [b]) -> [a] -> m [b]
concatMapM Match -> Duckling [Match]
mkNextMatches [Match]
matches
where
mkNextMatches :: Match -> Duckling [Match]
mkNextMatches :: Match -> Duckling [Match]
mkNextMatches match :: Match
match@(Rule {pattern :: Rule -> Pattern
pattern = []}, Int
_, [Node]
_) = [Match] -> Duckling [Match]
forall (m :: * -> *) a. Monad m => a -> m a
return [ Match
match ]
mkNextMatches match :: Match
match@(Rule {pattern :: Rule -> Pattern
pattern = PatternItem
p:Pattern
_}, Int
_, [Node]
_) = do
[Match]
nextMatches <- Document -> Stash -> [Match] -> Duckling [Match]
matchAll Document
sentence Stash
stash ([Match] -> Duckling [Match])
-> Duckling [Match] -> Duckling [Match]
forall (m :: * -> *) a b. Monad m => (a -> m b) -> m a -> m b
=<< Document -> Stash -> Match -> Duckling [Match]
matchFirst Document
sentence Stash
stash Match
match
[Match] -> Duckling [Match]
forall (m :: * -> *) a. Monad m => a -> m a
return ([Match] -> Duckling [Match]) -> [Match] -> Duckling [Match]
forall a b. (a -> b) -> a -> b
$ case PatternItem
p of
Regex Regex
_ -> [Match]
nextMatches
Predicate Predicate
_ -> Match
matchMatch -> [Match] -> [Match]
forall a. a -> [a] -> [a]
:[Match]
nextMatches
matchFirst :: Document -> Stash -> Match -> Duckling [Match]
matchFirst :: Document -> Stash -> Match -> Duckling [Match]
matchFirst Document
_ Stash
_ (Rule {pattern :: Rule -> Pattern
pattern = []}, Int
_, [Node]
_) = [Match] -> Duckling [Match]
forall (m :: * -> *) a. Monad m => a -> m a
return []
matchFirst Document
sentence Stash
stash (rule :: Rule
rule@Rule{pattern :: Rule -> Pattern
pattern = PatternItem
p : Pattern
ps}, Int
position, [Node]
route) =
(Node -> Match) -> [Node] -> [Match]
forall a b. (a -> b) -> [a] -> [b]
map ([Node] -> Rule -> Node -> Match
mkMatch [Node]
route Rule
newRule) ([Node] -> [Match]) -> Duckling [Node] -> Duckling [Match]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Document -> PatternItem -> Stash -> Int -> Duckling [Node]
lookupItem Document
sentence PatternItem
p Stash
stash Int
position
where
newRule :: Rule
newRule = Rule
rule { pattern :: Pattern
pattern = Pattern
ps }
matchFirstAnywhere :: Document -> Stash -> Rule -> Duckling [Match]
matchFirstAnywhere :: Document -> Stash -> Rule -> Duckling [Match]
matchFirstAnywhere Document
_sentence Stash
_stash Rule {pattern :: Rule -> Pattern
pattern = []} = [Match] -> Duckling [Match]
forall (m :: * -> *) a. Monad m => a -> m a
return []
matchFirstAnywhere Document
sentence Stash
stash rule :: Rule
rule@Rule{pattern :: Rule -> Pattern
pattern = PatternItem
p : Pattern
ps} =
(Node -> Match) -> [Node] -> [Match]
forall a b. (a -> b) -> [a] -> [b]
map ([Node] -> Rule -> Node -> Match
mkMatch [] Rule
newRule) ([Node] -> [Match]) -> Duckling [Node] -> Duckling [Match]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Document -> PatternItem -> Stash -> Duckling [Node]
lookupItemAnywhere Document
sentence PatternItem
p Stash
stash
where
newRule :: Rule
newRule = Rule
rule { pattern :: Pattern
pattern = Pattern
ps }
{-# INLINE mkMatch #-}
mkMatch :: [Node] -> Rule -> Node -> Match
mkMatch :: [Node] -> Rule -> Node -> Match
mkMatch [Node]
route Rule
newRule (node :: Node
node@Node {nodeRange :: Node -> Range
nodeRange = Range Int
_ Int
pos'}) =
[Node]
newRoute [Node] -> Match -> Match
`seq` (Rule
newRule, Int
pos', [Node]
newRoute)
where newRoute :: [Node]
newRoute = Node
nodeNode -> [Node] -> [Node]
forall a. a -> [a] -> [a]
:[Node]
route
parseString1
:: [Rule] -> Document -> Stash -> Stash -> [Match]
-> Duckling (Stash, [Match])
parseString1 :: [Rule]
-> Document
-> Stash
-> Stash
-> [Match]
-> Duckling (Stash, [Match])
parseString1 [Rule]
rules Document
sentence Stash
stash Stash
new [Match]
matches = do
[Match]
newPartial <- (Match -> Duckling [Match]) -> [Match] -> Duckling [Match]
forall (m :: * -> *) a b. Monad m => (a -> m [b]) -> [a] -> m [b]
concatMapM (Document -> Stash -> Match -> Duckling [Match]
matchFirst Document
sentence Stash
new) [Match]
matches
[Match]
newMatches <- (Rule -> Duckling [Match]) -> [Rule] -> Duckling [Match]
forall (m :: * -> *) a b. Monad m => (a -> m [b]) -> [a] -> m [b]
concatMapM (Document -> Stash -> Rule -> Duckling [Match]
matchFirstAnywhere Document
sentence Stash
new) [Rule]
rules
([Match]
full, [Match]
partial) <- (Match -> Bool) -> [Match] -> ([Match], [Match])
forall a. (a -> Bool) -> [a] -> ([a], [a])
L.partition (\(Rule {Pattern
pattern :: Pattern
pattern :: Rule -> Pattern
pattern}, Int
_, [Node]
_) -> Pattern -> Bool
forall (t :: * -> *) a. Foldable t => t a -> Bool
null Pattern
pattern)
([Match] -> ([Match], [Match]))
-> Duckling [Match] -> Identity ([Match], [Match])
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Document -> Stash -> [Match] -> Duckling [Match]
matchAll Document
sentence Stash
stash ([Match]
newPartial [Match] -> [Match] -> [Match]
forall a. [a] -> [a] -> [a]
++ [Match]
newMatches)
(Stash, [Match]) -> Duckling (Stash, [Match])
forall (m :: * -> *) a. Monad m => a -> m a
return ( [Node] -> Stash
Stash.fromList ([Node] -> Stash) -> [Node] -> Stash
forall a b. (a -> b) -> a -> b
$ (Match -> Maybe Node) -> [Match] -> [Node]
forall a b. (a -> Maybe b) -> [a] -> [b]
mapMaybe Match -> Maybe Node
produce [Match]
full
, [Match]
partial [Match] -> [Match] -> [Match]
forall a. [a] -> [a] -> [a]
++ [Match]
matches
)
saturateParseString
:: [Rule] -> Document -> Stash -> Stash -> [Match] -> Duckling Stash
saturateParseString :: [Rule] -> Document -> Stash -> Stash -> [Match] -> Duckling Stash
saturateParseString [Rule]
rules Document
sentence Stash
stash Stash
new [Match]
matches = do
(Stash
new', [Match]
matches') <- [Rule]
-> Document
-> Stash
-> Stash
-> [Match]
-> Duckling (Stash, [Match])
parseString1 [Rule]
rules Document
sentence Stash
stash Stash
new [Match]
matches
let stash' :: Stash
stash' = Stash -> Stash -> Stash
Stash.union Stash
stash Stash
new'
if Stash -> Bool
Stash.null Stash
new'
then Stash -> Duckling Stash
forall (m :: * -> *) a. Monad m => a -> m a
return Stash
stash
else [Rule] -> Document -> Stash -> Stash -> [Match] -> Duckling Stash
saturateParseString [Rule]
rules Document
sentence Stash
stash' Stash
new' [Match]
matches'
parseString :: [Rule] -> Document -> Duckling Stash
parseString :: [Rule] -> Document -> Duckling Stash
parseString [Rule]
rules Document
sentence = do
(Stash
new, [Match]
partialMatches) <-
[Rule]
-> Document
-> Stash
-> Stash
-> [Match]
-> Duckling (Stash, [Match])
parseString1 [Rule]
rules Document
sentence Stash
Stash.empty Stash
Stash.empty []
if Stash -> Bool
Stash.null Stash
new
then Stash -> Duckling Stash
forall (m :: * -> *) a. Monad m => a -> m a
return Stash
Stash.empty
else
[Rule] -> Document -> Stash -> Stash -> [Match] -> Duckling Stash
saturateParseString [Rule]
headPredicateRules Document
sentence Stash
new Stash
new [Match]
partialMatches
where
headPredicateRules :: [Rule]
headPredicateRules =
[ Rule
rule | rule :: Rule
rule@Rule{pattern :: Rule -> Pattern
pattern = (Predicate Predicate
_ : Pattern
_)} <- [Rule]
rules ]
resolveNode :: Context -> Options -> Node -> Maybe ResolvedToken
resolveNode :: Context -> Options -> Node -> Maybe ResolvedToken
resolveNode Context
context Options
options n :: Node
n@Node{token :: Node -> Token
token = (Token Dimension a
dim a
dd), nodeRange :: Node -> Range
nodeRange = Range
r}
= do
(ResolvedValue a
val, Bool
latent) <- Context -> Options -> a -> Maybe (ResolvedValue a, Bool)
forall a.
Resolve a =>
Context -> Options -> a -> Maybe (ResolvedValue a, Bool)
resolve Context
context Options
options a
dd
ResolvedToken -> Maybe ResolvedToken
forall a. a -> Maybe a
Just Resolved :: Range -> Node -> ResolvedVal -> Bool -> ResolvedToken
Resolved
{ range :: Range
range = Range
r
, node :: Node
node = Node
n
, rval :: ResolvedVal
rval = Dimension a -> ResolvedValue a -> ResolvedVal
forall a.
(Resolve a, Eq (ResolvedValue a), Show (ResolvedValue a),
ToJSON (ResolvedValue a)) =>
Dimension a -> ResolvedValue a -> ResolvedVal
RVal Dimension a
dim ResolvedValue a
val
, isLatent :: Bool
isLatent = Bool
latent
}