module Duckling.Numeral.KO.Rules
( rules ) where
import Data.Maybe
import qualified Data.Text as Text
import Prelude
import Data.String
import Duckling.Dimensions.Types
import Duckling.Numeral.Helpers
import Duckling.Numeral.Types (NumeralData (..))
import qualified Duckling.Numeral.Types as TNumeral
import Duckling.Regex.Types
import Duckling.Types
ruleIntegerForOrdinals :: Rule
ruleIntegerForOrdinals = Rule
{ name = "integer (1..4) - for ordinals"
, pattern =
[ regex "(한|첫|두|세|네)"
]
, prod = \tokens -> case tokens of
(Token RegexMatch (GroupMatch (match:_)):_) -> case match of
"한" -> integer 1
"첫" -> integer 1
"두" -> integer 2
"세" -> integer 3
"네" -> integer 4
_ -> Nothing
_ -> Nothing
}
ruleIntegerNumeric :: Rule
ruleIntegerNumeric = Rule
{ name = "integer (numeric)"
, pattern =
[ regex "(\\d{1,18})"
]
, prod = \tokens -> case tokens of
(Token RegexMatch (GroupMatch (match:_)):_) -> do
v <- parseInt match
integer $ toInteger v
_ -> Nothing
}
ruleFew :: Rule
ruleFew = Rule
{ name = "few 몇"
, pattern =
[ regex "몇"
]
, prod = \_ -> integer 3
}
ruleDecimalWithThousandsSeparator :: Rule
ruleDecimalWithThousandsSeparator = Rule
{ name = "decimal with thousands separator"
, pattern =
[ regex "(\\d+(,\\d\\d\\d)+\\.\\d+)"
]
, prod = \tokens -> case tokens of
(Token RegexMatch (GroupMatch (match:_)):_) ->
parseDouble (Text.replace (Text.singleton ',') Text.empty match) >>= double
_ -> Nothing
}
ruleDecimalNumeral :: Rule
ruleDecimalNumeral = Rule
{ name = "decimal number"
, pattern =
[ regex "(\\d*\\.\\d+)"
]
, prod = \tokens -> case tokens of
(Token RegexMatch (GroupMatch (match:_)):_) -> parseDecimal True match
_ -> Nothing
}
ruleFraction2 :: Rule
ruleFraction2 = Rule
{ name = "fraction"
, pattern =
[ dimension Numeral
, regex "/"
, dimension Numeral
]
, prod = \tokens -> case tokens of
(Token Numeral (NumeralData {TNumeral.value = v1}):
_:
Token Numeral (NumeralData {TNumeral.value = v2}):
_) -> double $ v1 / v2
_ -> Nothing
}
ruleNumeralsPrefixWithOr :: Rule
ruleNumeralsPrefixWithOr = Rule
{ name = "numbers prefix with -, 마이너스, or 마이나스"
, pattern =
[ regex "-|마이너스\\s?|마이나스\\s?"
, dimension Numeral
]
, prod = \tokens -> case tokens of
(_:Token Numeral nd:_) -> double (TNumeral.value nd * (1))
_ -> Nothing
}
ruleHalf :: Rule
ruleHalf = Rule
{ name = "half - 반"
, pattern =
[ regex "반"
]
, prod = \_ -> double 0.5
}
ruleInteger :: Rule
ruleInteger = Rule
{ name = "integer 0"
, pattern =
[ regex "영|공|빵"
]
, prod = \_ -> integer 0
}
ruleIntegerTypeAndOrdinals :: Rule
ruleIntegerTypeAndOrdinals = Rule
{ name = "integer (20..90) - TYPE 2 and ordinals"
, pattern =
[ regex "(열|스물|서른|마흔|쉰|예순|일흔|여든|아흔)"
]
, prod = \tokens -> case tokens of
(Token RegexMatch (GroupMatch (match:_)):_) -> case match of
"열" -> integer 10
"스물" -> integer 20
"서른" -> integer 30
"마흔" -> integer 40
"쉰" -> integer 50
"예순" -> integer 60
"일흔" -> integer 70
"여든" -> integer 80
"아흔" -> integer 90
_ -> Nothing
_ -> Nothing
}
ruleIntegerType1 :: Rule
ruleIntegerType1 = Rule
{ name = "integer - TYPE 1"
, pattern =
[ regex "(영|일|이|삼|사|오|육|칠|팔|구)"
]
, prod = \tokens -> case tokens of
(Token RegexMatch (GroupMatch (match:_)):_) -> case match of
"영" -> integer 0
"일" -> integer 1
"이" -> integer 2
"삼" -> integer 3
"사" -> integer 4
"오" -> integer 5
"육" -> integer 6
"칠" -> integer 7
"팔" -> integer 8
"구" -> integer 9
_ -> Nothing
_ -> Nothing
}
ruleIntegerType1PowersOfTen :: Rule
ruleIntegerType1PowersOfTen = Rule
{ name = "integer - TYPE 1: powers of ten"
, pattern =
[ regex "(십|백|천|만|억|조)"
]
, prod = \tokens -> case tokens of
(Token RegexMatch (GroupMatch (match:_)):_) -> case match of
"십" -> double 10 >>= withGrain 1 >>= withMultipliable
"백" -> double 1e2 >>= withGrain 2 >>= withMultipliable
"천" -> double 1e3 >>= withGrain 3 >>= withMultipliable
"만" -> double 1e4 >>= withGrain 4 >>= withMultipliable
"억" -> double 1e8 >>= withGrain 8 >>= withMultipliable
"조" -> double 1e12 >>= withGrain 12 >>= withMultipliable
_ -> Nothing
_ -> Nothing
}
ruleSum :: Rule
ruleSum = Rule
{ name = "intersect 2 numbers"
, pattern =
[ numberWith (fromMaybe 0 . TNumeral.grain) (>1)
, numberWith TNumeral.multipliable not
]
, prod = \tokens ->
case tokens of
(Token Numeral (NumeralData {TNumeral.value = val1, TNumeral.grain = Just g}):
Token Numeral (NumeralData {TNumeral.value = val2}):
_) | (10 ** fromIntegral g) > val2 -> double $ val1 + val2
_ -> Nothing
}
ruleMultiply :: Rule
ruleMultiply = Rule
{ name = "compose by multiplication"
, pattern =
[ dimension Numeral
, numberWith TNumeral.multipliable id
]
, prod = \tokens -> case tokens of
(token1:token2:_) -> multiply token1 token2
_ -> Nothing
}
ruleIntegerType2 :: Rule
ruleIntegerType2 = Rule
{ name = "integer (1..10) - TYPE 2"
, pattern =
[ regex "(하나|둘|셋|넷|다섯|여섯|일곱|여덟|아홉)"
]
, prod = \tokens -> case tokens of
(Token RegexMatch (GroupMatch (match:_)):_) -> case match of
"하나" -> integer 1
"둘" -> integer 2
"셋" -> integer 3
"넷" -> integer 4
"다섯" -> integer 5
"여섯" -> integer 6
"일곱" -> integer 7
"여덟" -> integer 8
"아홉" -> integer 9
_ -> Nothing
_ -> Nothing
}
ruleFraction :: Rule
ruleFraction = Rule
{ name = "fraction"
, pattern =
[ dimension Numeral
, regex "분(의|에)"
, dimension Numeral
]
, prod = \tokens -> case tokens of
(Token Numeral (NumeralData {TNumeral.value = v1}):
_:
Token Numeral (NumeralData {TNumeral.value = v2}):
_) -> double $ v2 / v1
_ -> Nothing
}
ruleNumeralDotNumeral :: Rule
ruleNumeralDotNumeral = Rule
{ name = "number dot number - 삼점사"
, pattern =
[ dimension Numeral
, regex "(점|쩜)((영|일|이|삼|사|오|육|칠|팔|구)+)"
]
, prod = \tokens -> case tokens of
(Token Numeral (NumeralData {TNumeral.value = v1}):
Token RegexMatch (GroupMatch (_:match:_)):
_) -> do
let getDigit '영' = Just "0"
getDigit '일' = Just "1"
getDigit '이' = Just "2"
getDigit '삼' = Just "3"
getDigit '사' = Just "4"
getDigit '오' = Just "5"
getDigit '육' = Just "6"
getDigit '칠' = Just "7"
getDigit '팔' = Just "8"
getDigit '구' = Just "9"
getDigit _ = Nothing
v2 <- parseDouble . Text.concat . mapMaybe getDigit $ Text.unpack match
double $ v1 + decimalsToDouble v2
_ -> Nothing
}
ruleIntegerType3 :: Rule
ruleIntegerType3 = Rule
{ name = "integer (21..99) - TYPE 2"
, pattern =
[ oneOf [10, 20 .. 90]
, oneOf [1 .. 9]
]
, prod = \tokens -> case tokens of
(Token Numeral (NumeralData {TNumeral.value = v1}):
Token Numeral (NumeralData {TNumeral.value = v2}):
_) -> double $ v1 + v2
_ -> Nothing
}
ruleIntegerWithThousandsSeparator :: Rule
ruleIntegerWithThousandsSeparator = Rule
{ name = "integer with thousands separator ,"
, pattern =
[ regex "(\\d{1,3}(,\\d\\d\\d){1,5})"
]
, prod = \tokens -> case tokens of
(Token RegexMatch (GroupMatch (match:_)):_) ->
parseDouble (Text.replace (Text.singleton ',') Text.empty match) >>= double
_ -> Nothing
}
rules :: [Rule]
rules =
[ ruleDecimalNumeral
, ruleDecimalWithThousandsSeparator
, ruleFew
, ruleFraction
, ruleFraction2
, ruleHalf
, ruleInteger
, ruleIntegerForOrdinals
, ruleIntegerNumeric
, ruleIntegerType1
, ruleIntegerType1PowersOfTen
, ruleSum
, ruleMultiply
, ruleIntegerType2
, ruleIntegerType3
, ruleIntegerTypeAndOrdinals
, ruleIntegerWithThousandsSeparator
, ruleNumeralDotNumeral
, ruleNumeralsPrefixWithOr
]