module ProjectM36.Relation.Parse.CSV where
import Data.Csv.Parser
import qualified Data.Vector as V
import Data.Char (ord)
import qualified Data.ByteString.Lazy as BS
import ProjectM36.Base
import ProjectM36.Relation
import ProjectM36.Error
import Data.Text.Encoding (decodeUtf8)
import qualified ProjectM36.Attribute as A
import qualified Data.Set as S
import Data.HashMap.Lazy as HM
import qualified Data.List as L
import qualified Data.Text as T
import Data.Attoparsec.ByteString.Lazy
import ProjectM36.Atom
data CsvImportError = CsvParseError String |
AttributeMappingError RelationalError |
HeaderAttributeMismatchError (S.Set AttributeName)
deriving (Show)
csvDecodeOptions :: DecodeOptions
csvDecodeOptions = DecodeOptions {decDelimiter = fromIntegral (ord ',')}
makeAtomFromCSVText :: AttributeName -> AtomType -> T.Text -> Either RelationalError Atom
makeAtomFromCSVText attrName aType textIn = makeAtomFromText attrName aType $ if aType == TextAtomType then
("\"" `T.append` textIn `T.append` "\"")
else
textIn
csvAsRelation :: BS.ByteString -> Attributes -> Either CsvImportError Relation
csvAsRelation inString attrs = case parse (csvWithHeader csvDecodeOptions) inString of
Fail _ _ err -> Left (CsvParseError err)
Done _ (headerRaw,vecMapsRaw) -> do
let strHeader = V.map decodeUtf8 headerRaw
strMapRecords = V.map convertMap vecMapsRaw
convertMap hmap = HM.fromList $ L.map (\(k,v) -> (decodeUtf8 k, (T.unpack . decodeUtf8) v)) (HM.toList hmap)
attrNames = V.map A.attributeName attrs
attrNameSet = S.fromList (V.toList attrNames)
headerSet = S.fromList (V.toList strHeader)
makeTupleList :: HM.HashMap AttributeName String -> [Either CsvImportError Atom]
makeTupleList tupMap = V.toList $ V.map (\attr -> either (Left . AttributeMappingError) Right $ makeAtomFromCSVText (A.attributeName attr) (A.atomType attr) (T.pack $ tupMap HM.! (A.attributeName attr))) attrs
case attrNameSet == headerSet of
False -> Left $ HeaderAttributeMismatchError (S.difference attrNameSet headerSet)
True -> do
tupleList <- mapM sequence $ V.toList (V.map makeTupleList strMapRecords)
case mkRelationFromList attrs tupleList of
Left err -> Left (AttributeMappingError err)
Right rel -> Right rel