module Text.PhonotacticLearner.PhonotacticConstraints (
FeatureState(..), SegRef(..),
FeatureTable(..), srBounds, ftlook,
segsToRefs, refsToSegs,
csvToFeatureTable, featureTableToCsv,
NaturalClass(..), classToSeglist,
GlobReps(..), ClassGlob(..), classesToLists,
cgMatchCounter
) where
import Text.PhonotacticLearner.Util.Ring
import Text.PhonotacticLearner.Util.Probability
import Text.PhonotacticLearner.MaxentGrammar
import Text.PhonotacticLearner.DFST
import Text.Read.CSV
import Data.Array.IArray
import Data.Maybe
import Data.Tuple
import Data.List
import Data.Char
import Data.Monoid
import Control.Monad
import Control.Applicative hiding (many, some)
import Control.DeepSeq
import Control.Arrow((***),(&&&),first,second)
import qualified Data.Text as T
import qualified Data.Map.Lazy as M
import Text.ParserCombinators.ReadP
import Text.Read(Read(..),lift,parens,readMaybe)
data FeatureState = FOff | FPlus | FMinus deriving (Enum, Eq, Ord, Read, Show)
instance NFData FeatureState where
rnf fs = fs `seq` ()
newtype SegRef = Seg Int deriving (Eq, Ord, Read, Show, Ix, NFData)
data FeatureTable sigma = FeatureTable { featTable :: Array (SegRef,Int) FeatureState
, featNames :: Array Int T.Text
, segNames :: Array SegRef sigma
, featLookup :: M.Map T.Text Int
, segLookup :: M.Map sigma SegRef } deriving (Show, Eq)
instance (Ord a, NFData a) => NFData (FeatureTable a) where
rnf (FeatureTable ft fn sn fl sl) = rnf ft `seq` rnf fn `seq` rnf sn `seq` rnf fl `seq` rnf sl
srBounds :: FeatureTable sigma -> (SegRef, SegRef)
srBounds ft = bounds (segNames ft)
ftlook :: FeatureTable sigma -> SegRef -> Int -> FeatureState
ftlook ft sr fi = featTable ft ! (sr,fi)
segsToRefs :: (Ord sigma) => FeatureTable sigma -> [sigma] -> [SegRef]
segsToRefs ft = mapMaybe (\x -> M.lookup x (segLookup ft))
refsToSegs :: FeatureTable sigma -> [SegRef] -> [sigma]
refsToSegs ft = fmap (segNames ft !) . filter (inRange (srBounds ft))
allFeatures :: FeatureTable sigma -> [T.Text]
allFeatures ft = elems (featNames ft)
lstrip :: String -> String
lstrip (' ':xs) = lstrip xs
lstrip ('\t':xs) = lstrip xs
lstrip xs = xs
csvToFeatureTable :: (Ord sigma) => (String -> sigma) -> String -> Maybe (FeatureTable sigma)
csvToFeatureTable readSeg rawcsv = do
parsedcsv <- readCSV rawcsv
((_:segcells) : rawfeatrecs) <- return (fmap (fmap lstrip) parsedcsv)
let numsegs = length segcells
guard (numsegs > 0)
let seglist = listArray (Seg 1, Seg numsegs) (fmap readSeg segcells)
featrecs = filter (\xs -> length xs == numsegs + 1) rawfeatrecs
numfeats = length featrecs
guard (numfeats > 0)
let featlist = listArray (1, numfeats) (fmap (T.pack . head) featrecs)
ft = array ((Seg 1,1), (Seg numsegs, numfeats)) $ do
(featidx, _:featdata) <- zip [1..] featrecs
(segidx, segfield) <- zip (fmap Seg [1..]) featdata
let fstate = case segfield of
"+" -> FPlus
"✓" -> FPlus
"√" -> FPlus
"-" -> FMinus
"−" -> FMinus
_ -> FOff
return ((segidx, featidx), fstate)
let segmap = M.fromList (fmap swap (assocs seglist))
featmap = M.fromList (fmap swap (assocs featlist))
guard (M.size segmap == rangeSize (bounds seglist))
return (FeatureTable ft featlist seglist featmap segmap)
fschar FPlus = "+"
fschar FMinus = "-"
fschar FOff = "0"
featureTableToCsv :: (sigma -> String) -> FeatureTable sigma -> String
featureTableToCsv writeSeg ft = writeCSV (header : body) where
header = "" : fmap writeSeg (elems (segNames ft))
body = [T.unpack fn : [fschar (ftlook ft s f) | s <- indices (segNames ft)] | (f,fn) <- assocs (featNames ft)]
readSrLexicon :: FeatureTable String -> (String -> [String]) -> String -> [([SegRef],Int)]
readSrLexicon ft seg text = do
line <- lines text
let (wt@(_:_),wf') = break (== '\t') line
n <- case (words wf') of
[] -> [1]
[wf] -> maybeToList $ readMaybe wf
_ -> []
return (segsToRefs ft (seg wt), n)
data NaturalClass = NClass { isInverted :: !Bool
, featureList :: [(FeatureState, T.Text)]
} deriving (Eq, Ord)
instance NFData NaturalClass where
rnf (NClass b fs) = b `seq` rnf fs
instance Show NaturalClass where
show (NClass isNegated feats) = (if isNegated then "[¬ " else "[") ++ unwords (fmap showfeat feats) ++ "]"
where showfeat (fs, fn) = (case fs of
FPlus -> "+"
FMinus -> "−"
FOff -> "0")
++ T.unpack fn
isPrintNelem :: String -> Char -> Bool
isPrintNelem s c = isPrint c && not (isSpace c) && c `notElem` s
featP :: ReadP (FeatureState, T.Text)
featP = do
skipSpaces
state <- choice [FPlus <$ char '+', FPlus <$ char '✓', FPlus <$ char '√', FMinus <$ char '-', FMinus <$ char '−', FOff <$ char '0'] <++ return FPlus
feat <- (:) <$> satisfy (isPrintNelem "0123456789+-√−✓(){}[]¬^") <*> munch (isPrintNelem "(){}[]¬^")
return (state, T.pack feat)
classP = between (char '[') (char ']') (NClass
<$> ((True <$ char '¬') +++ (True <$ char '^') +++ return False)
<*> many featP)
instance Read NaturalClass where
readPrec = parens (lift (skipSpaces >> classP))
xor :: Bool -> Bool -> Bool
xor False p = p
xor True p = not p
classToSeglist :: FeatureTable sigma -> NaturalClass -> SegSet SegRef
classToSeglist ft (NClass isNegated cls) = force $ fnArray (srBounds ft) (\c -> isNegated `xor` and [ftlook ft c fi == fs | (fs,fi) <- icls])
where icls = do
(s,fn) <- cls
Just fi <- return (M.lookup fn (featLookup ft))
return (s,fi)
data ClassGlob = ClassGlob !Bool !Bool [(GlobReps, NaturalClass)] deriving (Eq, Ord)
instance NFData ClassGlob where
rnf (ClassGlob isinit isfin gparts) = isinit `seq` isfin `seq` rnf gparts
instance Show ClassGlob where
show (ClassGlob isinit isfin parts) = (guard isinit >> "#") ++ (showGP =<< parts) ++ (guard isfin >> "#") where
showGP (GStar, NClass False []) = "…"
showGP (rep, NClass False [(FPlus,"syllabic")]) = "V" ++ suf rep
showGP (rep, NClass False [(FMinus,"syllabic")]) = "C" ++ suf rep
showGP (rep, c) = show c ++ suf rep
suf GSingle = ""
suf GPlus = "₁"
suf GStar = "₀"
globRepsP :: ReadP GlobReps
globRepsP = choice [GPlus <$ char '+', GPlus <$ char '₁', GStar <$ char '*', GStar <$ char '₀', return GSingle]
classGlobP :: ReadP ClassGlob
classGlobP = do
isinit <- (True <$ char '#') +++ return False
gparts <- many1 $ ((GStar, NClass False []) <$ char '…') +++ do
cls <- classP +++ (NClass False [(FPlus,"syllabic")] <$ char 'V') +++ (NClass False [(FMinus,"syllabic")] <$ char 'C')
rep <- globRepsP
return (rep,cls)
isfin <- (True <$ char '#') +++ return False
return (ClassGlob isinit isfin gparts)
instance Read ClassGlob where
readPrec = parens (lift (skipSpaces >> classGlobP))
classesToLists :: FeatureTable sigma -> ClassGlob -> ListGlob SegRef
classesToLists ft (ClassGlob isinit isfin gparts) = ListGlob isinit isfin (fmap (second (classToSeglist ft)) gparts)
cgMatchCounter :: FeatureTable sigma -> ClassGlob -> ShortDFST SegRef
cgMatchCounter ft = matchCounter . classesToLists ft