{-# LANGUAGE OverloadedStrings #-} module Codec.Compression.PPM.Utils ( lineToInstance , revWindows ) where import qualified Data.Text.Lazy as T import qualified Data.Sequence as Seq import Data.Sequence ((|>)) import Data.Foldable (toList) --classify :: [ -- | Calculates micro F-Score microFScore :: [a] -> [a] -> Double microFScore guess gold = error "unimp" -- | Calculates macro F-Score macroFScore :: [a] -> [a] -> Double macroFScore guess gold = error "unimp" -- | Splits a line of format IDLABELTEXT into a -- (label, document) tuple of (Text, [Char]). lineToInstance :: T.Text -> (T.Text, [Char]) lineToInstance l = (label, T.unpack (T.drop 1 text)) where (id, rest) = T.breakOn "\t" l (label, text) = T.breakOn "\t" (T.drop 1 rest) -- | Returns all subsequences of a given length. -- Includes initial shorter sequences. windows :: Int -> [a] -> [[a]] windows n0 = go 0 Seq.empty where go n s (a:as) | n' <= n0 = toList s' : go n' s' as | otherwise = toList s'' : go n s'' as where n' = n + 1 s' = s |> a s'' = Seq.drop 1 s' go _ _ [] = [] -- | Reverse-order windows of given length from input sequence. -- Includes shorter initial windows. revWindows :: Int -> [i] -> [[i]] revWindows n is = is' where is' = (map reverse . windows n) is