module Haskell_ML.Util
( Iris(..), Attributes(..), Sample
, readIrisData, attributeToVector, irisTypeToVector
, classificationAccuracy, printVector, printVecPair, mkSmplsUniform
, asciiPlot, calcMeanList
, for
) where
import Control.Applicative
import Control.Arrow
import Data.List
import qualified Data.Text as T
import Data.Attoparsec.Text hiding (take)
import Data.Singletons.TypeLits
import Numeric.LinearAlgebra.Data (maxIndex, toList)
import Numeric.LinearAlgebra.Static
import Text.Printf
data Iris = Setosa
| Versicolor
| Virginica
deriving (Show, Read, Eq, Ord, Enum)
data Attributes = Attributes
{ sepLen :: Double
, sepWidth :: Double
, pedLen :: Double
, pedWidth :: Double
} deriving (Show, Read, Eq, Ord)
type Sample = (Attributes, Iris)
readIrisData :: String -> IO [Sample]
readIrisData fname = do
ls <- T.lines . T.pack <$> readFile fname
return $ f <$> ls
where
f l = case parseOnly sampleParser l of
Left msg -> error msg
Right x -> x
mkSmplsUniform :: [Sample] -> [Sample]
mkSmplsUniform samps = map (first $ scaleAtt . offsetAtt) samps
where scaleAtt :: Attributes -> Attributes
scaleAtt Attributes{..} = Attributes (sls * sepLen) (sws * sepWidth) (pls * pedLen) (pws * pedWidth)
offsetAtt :: Attributes -> Attributes
offsetAtt Attributes{..} = Attributes (sepLen slo) (sepWidth swo) (pedLen plo) (pedWidth pwo)
slo = minFldVal sepLen samps
swo = minFldVal sepWidth samps
plo = minFldVal pedLen samps
pwo = minFldVal pedWidth samps
sls = 1.0 / (maxFldVal sepLen samps slo)
sws = 1.0 / (maxFldVal sepWidth samps swo)
pls = 1.0 / (maxFldVal pedLen samps plo)
pws = 1.0 / (maxFldVal pedWidth samps pwo)
minFldVal :: (Attributes -> Double) -> [Sample] -> Double
minFldVal = overSamps minimum
maxFldVal :: (Attributes -> Double) -> [Sample] -> Double
maxFldVal = overSamps maximum
overSamps :: ([Double] -> Double) -> (Attributes -> Double) -> [Sample] -> Double
overSamps f fldAcc = f . fldFromSamps fldAcc
fldFromSamps :: (Attributes -> Double) -> [Sample] -> [Double]
fldFromSamps fldAcc = map (fldAcc . fst)
attributeToVector :: Attributes -> R 4
attributeToVector Attributes{..} = vector [sepLen, sepWidth, pedLen, pedWidth]
irisTypeToVector :: Iris -> R 3
irisTypeToVector = \case
Setosa -> vector [1,0,0]
Versicolor -> vector [0,1,0]
Virginica -> vector [0,0,1]
classificationAccuracy :: (KnownNat n) => [R n] -> [R n] -> Double
classificationAccuracy us vs = calcMeanList $ cmpr us vs
where cmpr :: (KnownNat n) => [R n] -> [R n] -> [Double]
cmpr xs ys = for (zipWith maxComp xs ys) $ \case
True -> 1.0
False -> 0.0
maxComp :: (KnownNat n) => R n -> R n -> Bool
maxComp u v = maxIndex (extract u) == maxIndex (extract v)
calcMeanList :: (Fractional a) => [a] -> a
calcMeanList = uncurry (/) . foldr (\e (s,c) -> (e+s,c+1)) (0,0)
printVector :: (KnownNat n) => R n -> String
printVector v = foldl' (\ s x -> s ++ printf "%+6.4f " x) "[ " ((toList . extract) v) ++ " ]"
printVecPair :: (KnownNat m, KnownNat n) => (R m, R n) -> String
printVecPair (u, v) = "( " ++ printVector u ++ ", " ++ printVector v ++ " )"
asciiPlot :: [Double] -> String
asciiPlot xs = unlines $
zipWith (++)
[ " "
, printf " %6.4f " x_max
, " "
, " "
, " "
, " "
, " "
, " "
, " "
, " "
, " "
, printf " %6.4f " x_min
, " "
] $
(:) "^" $ transpose (
(:) "|||||||||||" $
for (take 60 xs) $ \x ->
valToStr $ (x x_min) * 10 / x_range
) ++ ["|" ++ replicate 60 '_' ++ ">"]
where valToStr :: Double -> String
valToStr x = let i = round (10 x)
in replicate i ' ' ++ "*" ++ replicate (10 i) ' '
x_min = minimum xs
x_max = maximum xs
x_range = x_max x_min
sampleParser :: Parser Sample
sampleParser = f <$> (double <* char ',')
<*> (double <* char ',')
<*> (double <* char ',')
<*> (double <* char ',')
<*> irisParser
where
f sl sw pl pw i = (Attributes sl sw pl pw, i)
irisParser :: Parser Iris
irisParser = string "Iris-setosa" *> return Setosa
<|> string "Iris-versicolor" *> return Versicolor
<|> string "Iris-virginica" *> return Virginica
for :: [a] -> (a -> b) -> [b]
for = flip map