module HNum.CSV where
import HNum.Vector
import Data.List ( intercalate )
import Control.Parallel
import Control.Concurrent.ParallelIO
import qualified Control.Concurrent.ParallelIO.Local
as Local
type Header = [String]
data DataFrame a = DataFrame { header :: Header
, dat :: Matrix a
} deriving (Show, Eq)
dataframe :: Header -> Matrix a -> DataFrame a
dataframe h m | length h == row m = DataFrame h m
| otherwise = error "Length of Header != Length of Data"
where n = length m `div` length h
fromVectors :: Header -> [Vector a] -> DataFrame a
fromVectors h vs = dataframe h (matrix vs') where vs' = map toList vs
(#) :: Eq a => DataFrame a -> String -> Vector a
df # hd | hd `notElem` header df = error "No specific header in dataFrame"
| otherwise = i `par` bd `pseq` vec $ bd !! i
where
i = hd `fd` header df
bd = matForm $ dat df
instance Functor DataFrame where
fmap f df = df { dat = fmap f (dat df) }
instance Convertable DataFrame where
readInt df = read <$> df :: DataFrame Int
readInteger df = read <$> df :: DataFrame Integer
readDouble df = read <$> df :: DataFrame Double
class Functor f => Writable f where
toString :: Show a => f a -> String
writeCSV :: (Eq a, Show a) => String -> f a -> IO ()
instance Writable Vector where
toString v = intercalate "\n" (toList $ show <$> v)
writeCSV title v = writeFile title (toString v)
instance Writable Matrix where
toString m = m1 `seq` m2 `seq` intercalate "\n" m2
where m1 = matForm (show <$> m)
m2 = map (intercalate ",") m1
writeCSV title m = writeFile title (toString m)
instance Writable DataFrame where
toString = undefined
writeCSV parentPath df =
let childPath = header df
vecValue = [df # heads | heads <- childPath]
strValue = [toString vecs | vecs <- vecValue]
candidate = [heads ++ "\n" ++ strs | heads <- childPath, strs <- strValue]
commands = [writeFile (parentPath ++ "/" ++ heads ++ ".csv") cds | heads <- childPath, cds <- candidate]
in childPath
`pseq` vecValue
`pseq` strValue
`pseq` candidate
`pseq` commands
`pseq` do
Local.withPool 4 $ \pool -> Local.parallel_ pool commands
readCSV :: String -> IO (DataFrame String)
readCSV filepath = do
r <- readFile filepath
let dat = splitWith '\n' (rmQuot r)
hd = splitWith ',' (head dat)
bd = map (splitWith ',') (tail dat)
return DataFrame {header = hd, dat = matrix bd}
splitWith :: Char -> String -> [String]
splitWith _ [] = []
splitWith sep str
| null temp'' = [temp]
| otherwise = temp `par` temp'' `pseq` temp' `pseq` temp : splitWith sep temp'
where
temp = takeWhile (/= sep) str
temp'' = dropWhile (/= sep) str
temp' = tail temp''
rmQuot :: String -> String
rmQuot [] = []
rmQuot x
| null temp = clean
| otherwise = clean `par` temp `pseq` clean' `pseq` clean ++ rmQuot clean'
where
clean = takeWhile (/= '"') x
temp = dropWhile (/= '"') x
clean' = tail temp