Safe Haskell | None |
---|---|
Language | Haskell98 |
Baseline word-segmentation functions.
Synopsis
- data PathTyp
- pickPath :: Word b => PathTyp -> DAG a b -> DAG a b
- findPath :: Word b => PathTyp -> DAG a b -> Set EdgeID
- computeFreqs :: Word w => [Sent w t] -> Map Text (Int, Int)
- data FreqConf = FreqConf {
- pickFreqMap :: Map Text (Int, Int)
- smoothingParam :: Double
- computeAmbiStats :: Word w => AmbiCfg -> [Sent w t] -> AmbiStats
- data AmbiCfg = AmbiCfg {
- onlyChosen :: Bool
- data AmbiStats = AmbiStats {}
Documentation
pickPath :: Word b => PathTyp -> DAG a b -> DAG a b Source #
Select the shortest-path (or longest, depending on PathTyp
) in the given
DAG and remove all the edges which are not on this path.
Frequencies
computeFreqs :: Word w => [Sent w t] -> Map Text (Int, Int) Source #
Compute chosen/not-chosen counts of the individual orthographic forms in the DAGs. Only the ambiguous segments are taken into account.
Configuration related to frequency-based path picking.
FreqConf | |
|
Ambiguity-related stats
computeAmbiStats :: Word w => AmbiCfg -> [Sent w t] -> AmbiStats Source #
Compute: * the number of tokens participating in ambiguities * the total number of tokens
Numbers of tokens.
AmbiCfg | |
|