{-# LANGUAGE GADTs #-}
{-# LANGUAGE OverloadedStrings #-}
module Duckling.Numeral.KO.Rules
( rules
) where
import Data.HashMap.Strict (HashMap)
import Data.Maybe
import Data.String
import Data.Text (Text)
import Prelude
import qualified Data.HashMap.Strict as HashMap
import qualified Data.Text as Text
import Duckling.Dimensions.Types
import Duckling.Numeral.Helpers
import Duckling.Numeral.Types (NumeralData (..))
import Duckling.Regex.Types
import Duckling.Types
import qualified Duckling.Numeral.Types as TNumeral
integerForOrdinalsMap :: HashMap Text Integer
integerForOrdinalsMap :: HashMap Text Integer
integerForOrdinalsMap = [(Text, Integer)] -> HashMap Text Integer
forall k v. (Eq k, Hashable k) => [(k, v)] -> HashMap k v
HashMap.fromList
[ ( Text
"한", Integer
1 )
, ( Text
"첫", Integer
1 )
, ( Text
"두", Integer
2 )
, ( Text
"세", Integer
3 )
, ( Text
"네", Integer
4 )
]
ruleIntegerForOrdinals :: Rule
ruleIntegerForOrdinals :: Rule
ruleIntegerForOrdinals = Rule :: Text -> Pattern -> Production -> Rule
Rule
{ name :: Text
name = Text
"integer (1..4) - for ordinals"
, pattern :: Pattern
pattern =
[ String -> PatternItem
regex String
"(한|첫|두|세|네)"
]
, prod :: Production
prod = \[Token]
tokens -> case [Token]
tokens of
(Token Dimension a
RegexMatch (GroupMatch (match:_)):[Token]
_) ->
Text -> HashMap Text Integer -> Maybe Integer
forall k v. (Eq k, Hashable k) => k -> HashMap k v -> Maybe v
HashMap.lookup Text
match HashMap Text Integer
integerForOrdinalsMap Maybe Integer -> (Integer -> Maybe Token) -> Maybe Token
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= Integer -> Maybe Token
integer
[Token]
_ -> Maybe Token
forall a. Maybe a
Nothing
}
ruleFew :: Rule
ruleFew :: Rule
ruleFew = Rule :: Text -> Pattern -> Production -> Rule
Rule
{ name :: Text
name = Text
"few 몇"
, pattern :: Pattern
pattern =
[ String -> PatternItem
regex String
"몇"
]
, prod :: Production
prod = \[Token]
_ -> Integer -> Maybe Token
integer Integer
3
}
ruleDecimalWithThousandsSeparator :: Rule
ruleDecimalWithThousandsSeparator :: Rule
ruleDecimalWithThousandsSeparator = Rule :: Text -> Pattern -> Production -> Rule
Rule
{ name :: Text
name = Text
"decimal with thousands separator"
, pattern :: Pattern
pattern =
[ String -> PatternItem
regex String
"(\\d+(,\\d\\d\\d)+\\.\\d+)"
]
, prod :: Production
prod = \[Token]
tokens -> case [Token]
tokens of
(Token Dimension a
RegexMatch (GroupMatch (match:_)):[Token]
_) ->
Text -> Maybe Double
parseDouble (Text -> Text -> Text -> Text
Text.replace Text
"," Text
Text.empty Text
match) Maybe Double -> (Double -> Maybe Token) -> Maybe Token
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= Double -> Maybe Token
double
[Token]
_ -> Maybe Token
forall a. Maybe a
Nothing
}
ruleDecimalNumeral :: Rule
ruleDecimalNumeral :: Rule
ruleDecimalNumeral = Rule :: Text -> Pattern -> Production -> Rule
Rule
{ name :: Text
name = Text
"decimal number"
, pattern :: Pattern
pattern =
[ String -> PatternItem
regex String
"(\\d*\\.\\d+)"
]
, prod :: Production
prod = \[Token]
tokens -> case [Token]
tokens of
(Token Dimension a
RegexMatch (GroupMatch (match:_)):[Token]
_) -> Bool -> Text -> Maybe Token
parseDecimal Bool
True Text
match
[Token]
_ -> Maybe Token
forall a. Maybe a
Nothing
}
ruleFraction :: Rule
ruleFraction :: Rule
ruleFraction = Rule :: Text -> Pattern -> Production -> Rule
Rule
{ name :: Text
name = Text
"fraction"
, pattern :: Pattern
pattern =
[ Dimension NumeralData -> PatternItem
forall a. Typeable a => Dimension a -> PatternItem
dimension Dimension NumeralData
Numeral
, String -> PatternItem
regex String
"분(의|에)"
, Dimension NumeralData -> PatternItem
forall a. Typeable a => Dimension a -> PatternItem
dimension Dimension NumeralData
Numeral
]
, prod :: Production
prod = \[Token]
tokens -> case [Token]
tokens of
(Token Dimension a
Numeral NumeralData{TNumeral.value = v1}:
Token
_:
Token Dimension a
Numeral NumeralData{TNumeral.value = v2}:
[Token]
_) -> Double -> Maybe Token
double (Double -> Maybe Token) -> Double -> Maybe Token
forall a b. (a -> b) -> a -> b
$ Double
v2 Double -> Double -> Double
forall a. Fractional a => a -> a -> a
/ Double
v1
[Token]
_ -> Maybe Token
forall a. Maybe a
Nothing
}
ruleNumeralsPrefixWithOr :: Rule
ruleNumeralsPrefixWithOr :: Rule
ruleNumeralsPrefixWithOr = Rule :: Text -> Pattern -> Production -> Rule
Rule
{ name :: Text
name = Text
"numbers prefix with -, 마이너스, or 마이나스"
, pattern :: Pattern
pattern =
[ String -> PatternItem
regex String
"-|마이너스\\s?|마이나스\\s?"
, Dimension NumeralData -> PatternItem
forall a. Typeable a => Dimension a -> PatternItem
dimension Dimension NumeralData
Numeral
]
, prod :: Production
prod = \[Token]
tokens -> case [Token]
tokens of
(Token
_:Token Dimension a
Numeral a
nd:[Token]
_) -> Double -> Maybe Token
double (NumeralData -> Double
TNumeral.value a
NumeralData
nd Double -> Double -> Double
forall a. Num a => a -> a -> a
* (-Double
1))
[Token]
_ -> Maybe Token
forall a. Maybe a
Nothing
}
ruleHalf :: Rule
ruleHalf :: Rule
ruleHalf = Rule :: Text -> Pattern -> Production -> Rule
Rule
{ name :: Text
name = Text
"half - 반"
, pattern :: Pattern
pattern =
[ String -> PatternItem
regex String
"반"
]
, prod :: Production
prod = \[Token]
_ -> Double -> Maybe Token
double Double
0.5
}
ruleInteger :: Rule
ruleInteger :: Rule
ruleInteger = Rule :: Text -> Pattern -> Production -> Rule
Rule
{ name :: Text
name = Text
"integer 0"
, pattern :: Pattern
pattern =
[ String -> PatternItem
regex String
"영|공|빵"
]
, prod :: Production
prod = \[Token]
_ -> Integer -> Maybe Token
integer Integer
0
}
integerTypeAndOrdinalsMap :: HashMap Text Integer
integerTypeAndOrdinalsMap :: HashMap Text Integer
integerTypeAndOrdinalsMap = [(Text, Integer)] -> HashMap Text Integer
forall k v. (Eq k, Hashable k) => [(k, v)] -> HashMap k v
HashMap.fromList
[ ( Text
"열", Integer
10 )
, ( Text
"스물", Integer
20 )
, ( Text
"서른", Integer
30 )
, ( Text
"마흔", Integer
40 )
, ( Text
"쉰", Integer
50 )
, ( Text
"예순", Integer
60 )
, ( Text
"일흔", Integer
70 )
, ( Text
"여든", Integer
80 )
, ( Text
"아흔", Integer
90 )
]
ruleIntegerTypeAndOrdinals :: Rule
ruleIntegerTypeAndOrdinals :: Rule
ruleIntegerTypeAndOrdinals = Rule :: Text -> Pattern -> Production -> Rule
Rule
{ name :: Text
name = Text
"integer (20..90) - TYPE 2 and ordinals"
, pattern :: Pattern
pattern =
[ String -> PatternItem
regex String
"(열|스물|서른|마흔|쉰|예순|일흔|여든|아흔)"
]
, prod :: Production
prod = \[Token]
tokens -> case [Token]
tokens of
(Token Dimension a
RegexMatch (GroupMatch (match:_)):[Token]
_) ->
Text -> HashMap Text Integer -> Maybe Integer
forall k v. (Eq k, Hashable k) => k -> HashMap k v -> Maybe v
HashMap.lookup Text
match HashMap Text Integer
integerTypeAndOrdinalsMap Maybe Integer -> (Integer -> Maybe Token) -> Maybe Token
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= Integer -> Maybe Token
integer
[Token]
_ -> Maybe Token
forall a. Maybe a
Nothing
}
integerType1Map :: HashMap Text Integer
integerType1Map :: HashMap Text Integer
integerType1Map = [(Text, Integer)] -> HashMap Text Integer
forall k v. (Eq k, Hashable k) => [(k, v)] -> HashMap k v
HashMap.fromList
[ ( Text
"영", Integer
0 )
, ( Text
"일", Integer
1 )
, ( Text
"이", Integer
2 )
, ( Text
"삼", Integer
3 )
, ( Text
"사", Integer
4 )
, ( Text
"오", Integer
5 )
, ( Text
"육", Integer
6 )
, ( Text
"칠", Integer
7 )
, ( Text
"팔", Integer
8 )
, ( Text
"구", Integer
9 )
]
ruleIntegerType1 :: Rule
ruleIntegerType1 :: Rule
ruleIntegerType1 = Rule :: Text -> Pattern -> Production -> Rule
Rule
{ name :: Text
name = Text
"integer - TYPE 1"
, pattern :: Pattern
pattern =
[ String -> PatternItem
regex String
"(영|일|이|삼|사|오|육|칠|팔|구)"
]
, prod :: Production
prod = \[Token]
tokens -> case [Token]
tokens of
(Token Dimension a
RegexMatch (GroupMatch (match:_)):[Token]
_) ->
Text -> HashMap Text Integer -> Maybe Integer
forall k v. (Eq k, Hashable k) => k -> HashMap k v -> Maybe v
HashMap.lookup Text
match HashMap Text Integer
integerType1Map Maybe Integer -> (Integer -> Maybe Token) -> Maybe Token
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= Integer -> Maybe Token
integer
[Token]
_ -> Maybe Token
forall a. Maybe a
Nothing
}
integerType1PowersOfTenMap :: HashMap Text (Double, Int)
integerType1PowersOfTenMap :: HashMap Text (Double, Int)
integerType1PowersOfTenMap = [(Text, (Double, Int))] -> HashMap Text (Double, Int)
forall k v. (Eq k, Hashable k) => [(k, v)] -> HashMap k v
HashMap.fromList
[ ( Text
"십", (Double
10, Int
1) )
, ( Text
"백", (Double
1e2, Int
2) )
, ( Text
"천", (Double
1e3, Int
3) )
, ( Text
"만", (Double
1e4, Int
4) )
, ( Text
"억", (Double
1e8, Int
8) )
, ( Text
"조", (Double
1e12, Int
12) )
]
ruleIntegerType1PowersOfTen :: Rule
ruleIntegerType1PowersOfTen :: Rule
ruleIntegerType1PowersOfTen = Rule :: Text -> Pattern -> Production -> Rule
Rule
{ name :: Text
name = Text
"integer - TYPE 1: powers of ten"
, pattern :: Pattern
pattern =
[ String -> PatternItem
regex String
"(십|백|천|만|억|조)"
]
, prod :: Production
prod = \[Token]
tokens -> case [Token]
tokens of
(Token Dimension a
RegexMatch (GroupMatch (match:_)):[Token]
_) ->
do
(Double
value, Int
grain) <- Text -> HashMap Text (Double, Int) -> Maybe (Double, Int)
forall k v. (Eq k, Hashable k) => k -> HashMap k v -> Maybe v
HashMap.lookup Text
match HashMap Text (Double, Int)
integerType1PowersOfTenMap
Double -> Maybe Token
double Double
value Maybe Token -> (Token -> Maybe Token) -> Maybe Token
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= Int -> Token -> Maybe Token
withGrain Int
grain Maybe Token -> (Token -> Maybe Token) -> Maybe Token
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= Token -> Maybe Token
withMultipliable
[Token]
_ -> Maybe Token
forall a. Maybe a
Nothing
}
ruleSum :: Rule
ruleSum :: Rule
ruleSum = Rule :: Text -> Pattern -> Production -> Rule
Rule
{ name :: Text
name = Text
"intersect 2 numbers"
, pattern :: Pattern
pattern =
[ Predicate -> PatternItem
Predicate Predicate
hasGrain
, Predicate -> PatternItem
Predicate (Predicate -> PatternItem) -> Predicate -> PatternItem
forall a b. (a -> b) -> a -> b
$ [Bool] -> Bool
forall (t :: * -> *). Foldable t => t Bool -> Bool
and ([Bool] -> Bool) -> (Token -> [Bool]) -> Predicate
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [Predicate] -> Token -> [Bool]
forall (t :: * -> *) (m :: * -> *) a.
(Traversable t, Monad m) =>
t (m a) -> m (t a)
sequence [Bool -> Bool
not (Bool -> Bool) -> Predicate -> Predicate
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Predicate
isMultipliable, Predicate
isPositive]
]
, prod :: Production
prod = \[Token]
tokens -> case [Token]
tokens of
(Token Dimension a
Numeral NumeralData{TNumeral.value = val1, TNumeral.grain = Just g}:
Token Dimension a
Numeral NumeralData{TNumeral.value = val2}:
[Token]
_) | (Double
10 Double -> Double -> Double
forall a. Floating a => a -> a -> a
** Int -> Double
forall a b. (Integral a, Num b) => a -> b
fromIntegral Int
g) Double -> Double -> Bool
forall a. Ord a => a -> a -> Bool
> Double
val2 -> Double -> Maybe Token
double (Double -> Maybe Token) -> Double -> Maybe Token
forall a b. (a -> b) -> a -> b
$ Double
val1 Double -> Double -> Double
forall a. Num a => a -> a -> a
+ Double
val2
[Token]
_ -> Maybe Token
forall a. Maybe a
Nothing
}
ruleMultiply :: Rule
ruleMultiply :: Rule
ruleMultiply = Rule :: Text -> Pattern -> Production -> Rule
Rule
{ name :: Text
name = Text
"compose by multiplication"
, pattern :: Pattern
pattern =
[ Dimension NumeralData -> PatternItem
forall a. Typeable a => Dimension a -> PatternItem
dimension Dimension NumeralData
Numeral
, Predicate -> PatternItem
Predicate Predicate
isMultipliable
]
, prod :: Production
prod = \[Token]
tokens -> case [Token]
tokens of
(Token
token1:Token
token2:[Token]
_) -> Token -> Token -> Maybe Token
multiply Token
token1 Token
token2
[Token]
_ -> Maybe Token
forall a. Maybe a
Nothing
}
integerType2Map :: HashMap Text Integer
integerType2Map :: HashMap Text Integer
integerType2Map = [(Text, Integer)] -> HashMap Text Integer
forall k v. (Eq k, Hashable k) => [(k, v)] -> HashMap k v
HashMap.fromList
[ ( Text
"하나", Integer
1 )
, ( Text
"둘", Integer
2 )
, ( Text
"셋", Integer
3 )
, ( Text
"넷", Integer
4 )
, ( Text
"다섯", Integer
5 )
, ( Text
"여섯", Integer
6 )
, ( Text
"일곱", Integer
7 )
, ( Text
"여덟", Integer
8 )
, ( Text
"아홉", Integer
9 )
]
ruleIntegerType2 :: Rule
ruleIntegerType2 :: Rule
ruleIntegerType2 = Rule :: Text -> Pattern -> Production -> Rule
Rule
{ name :: Text
name = Text
"integer (1..10) - TYPE 2"
, pattern :: Pattern
pattern =
[ String -> PatternItem
regex String
"(하나|둘|셋|넷|다섯|여섯|일곱|여덟|아홉)"
]
, prod :: Production
prod = \[Token]
tokens -> case [Token]
tokens of
(Token Dimension a
RegexMatch (GroupMatch (match:_)):[Token]
_) ->
Text -> HashMap Text Integer -> Maybe Integer
forall k v. (Eq k, Hashable k) => k -> HashMap k v -> Maybe v
HashMap.lookup Text
match HashMap Text Integer
integerType2Map Maybe Integer -> (Integer -> Maybe Token) -> Maybe Token
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= Integer -> Maybe Token
integer
[Token]
_ -> Maybe Token
forall a. Maybe a
Nothing
}
ruleNumeralDotNumeral :: Rule
ruleNumeralDotNumeral :: Rule
ruleNumeralDotNumeral = Rule :: Text -> Pattern -> Production -> Rule
Rule
{ name :: Text
name = Text
"number dot number - 삼점사"
, pattern :: Pattern
pattern =
[ Dimension NumeralData -> PatternItem
forall a. Typeable a => Dimension a -> PatternItem
dimension Dimension NumeralData
Numeral
, String -> PatternItem
regex String
"(점|쩜)((영|일|이|삼|사|오|육|칠|팔|구)+)"
]
, prod :: Production
prod = \[Token]
tokens -> case [Token]
tokens of
(Token Dimension a
Numeral NumeralData{TNumeral.value = v1}:
Token Dimension a
RegexMatch (GroupMatch (_:match:_)):
[Token]
_) -> do
let getDigit :: Char -> Maybe a
getDigit Char
'영' = a -> Maybe a
forall a. a -> Maybe a
Just a
"0"
getDigit Char
'일' = a -> Maybe a
forall a. a -> Maybe a
Just a
"1"
getDigit Char
'이' = a -> Maybe a
forall a. a -> Maybe a
Just a
"2"
getDigit Char
'삼' = a -> Maybe a
forall a. a -> Maybe a
Just a
"3"
getDigit Char
'사' = a -> Maybe a
forall a. a -> Maybe a
Just a
"4"
getDigit Char
'오' = a -> Maybe a
forall a. a -> Maybe a
Just a
"5"
getDigit Char
'육' = a -> Maybe a
forall a. a -> Maybe a
Just a
"6"
getDigit Char
'칠' = a -> Maybe a
forall a. a -> Maybe a
Just a
"7"
getDigit Char
'팔' = a -> Maybe a
forall a. a -> Maybe a
Just a
"8"
getDigit Char
'구' = a -> Maybe a
forall a. a -> Maybe a
Just a
"9"
getDigit Char
_ = Maybe a
forall a. Maybe a
Nothing
Double
v2 <- Text -> Maybe Double
parseDouble (Text -> Maybe Double)
-> (String -> Text) -> String -> Maybe Double
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [Text] -> Text
Text.concat ([Text] -> Text) -> (String -> [Text]) -> String -> Text
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Char -> Maybe Text) -> String -> [Text]
forall a b. (a -> Maybe b) -> [a] -> [b]
mapMaybe Char -> Maybe Text
forall a. IsString a => Char -> Maybe a
getDigit (String -> Maybe Double) -> String -> Maybe Double
forall a b. (a -> b) -> a -> b
$ Text -> String
Text.unpack Text
match
Double -> Maybe Token
double (Double -> Maybe Token) -> Double -> Maybe Token
forall a b. (a -> b) -> a -> b
$ Double
v1 Double -> Double -> Double
forall a. Num a => a -> a -> a
+ Double -> Double
decimalsToDouble Double
v2
[Token]
_ -> Maybe Token
forall a. Maybe a
Nothing
}
ruleIntegerType3 :: Rule
ruleIntegerType3 :: Rule
ruleIntegerType3 = Rule :: Text -> Pattern -> Production -> Rule
Rule
{ name :: Text
name = Text
"integer (21..99) - TYPE 2"
, pattern :: Pattern
pattern =
[ [Double] -> PatternItem
oneOf [Double
10, Double
20 .. Double
90]
, [Double] -> PatternItem
oneOf [Double
1 .. Double
9]
]
, prod :: Production
prod = \[Token]
tokens -> case [Token]
tokens of
(Token Dimension a
Numeral NumeralData{TNumeral.value = v1}:
Token Dimension a
Numeral NumeralData{TNumeral.value = v2}:
[Token]
_) -> Double -> Maybe Token
double (Double -> Maybe Token) -> Double -> Maybe Token
forall a b. (a -> b) -> a -> b
$ Double
v1 Double -> Double -> Double
forall a. Num a => a -> a -> a
+ Double
v2
[Token]
_ -> Maybe Token
forall a. Maybe a
Nothing
}
ruleIntegerWithThousandsSeparator :: Rule
ruleIntegerWithThousandsSeparator :: Rule
ruleIntegerWithThousandsSeparator = Rule :: Text -> Pattern -> Production -> Rule
Rule
{ name :: Text
name = Text
"integer with thousands separator ,"
, pattern :: Pattern
pattern =
[ String -> PatternItem
regex String
"(\\d{1,3}(,\\d\\d\\d){1,5})"
]
, prod :: Production
prod = \[Token]
tokens -> case [Token]
tokens of
(Token Dimension a
RegexMatch (GroupMatch (match:_)):[Token]
_) ->
Text -> Maybe Double
parseDouble (Text -> Text -> Text -> Text
Text.replace Text
"," Text
Text.empty Text
match) Maybe Double -> (Double -> Maybe Token) -> Maybe Token
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= Double -> Maybe Token
double
[Token]
_ -> Maybe Token
forall a. Maybe a
Nothing
}
rules :: [Rule]
rules :: [Rule]
rules =
[ Rule
ruleDecimalNumeral
, Rule
ruleDecimalWithThousandsSeparator
, Rule
ruleFew
, Rule
ruleFraction
, Rule
ruleHalf
, Rule
ruleInteger
, Rule
ruleIntegerForOrdinals
, Rule
ruleIntegerType1
, Rule
ruleIntegerType1PowersOfTen
, Rule
ruleSum
, Rule
ruleMultiply
, Rule
ruleIntegerType2
, Rule
ruleIntegerType3
, Rule
ruleIntegerTypeAndOrdinals
, Rule
ruleIntegerWithThousandsSeparator
, Rule
ruleNumeralDotNumeral
, Rule
ruleNumeralsPrefixWithOr
]