module Text.SSV (
SSVFormat(..),
SSVFormatQuote(..),
readSSV,
showSSV,
hPutSSV,
writeSSVFile,
readCSV,
showCSV,
hPutCSV,
writeCSVFile,
toNL,
fromNL,
SSVReadException(..),
SSVShowException(..),
csvFormat,
pwfFormat )
where
import Control.Exception
import Data.Char
import Data.List
import Data.Maybe
import qualified Data.Set as Set
import Data.Typeable
import System.IO
data SSVFormatQuote = SSVFormatQuote {
ssvFormatQuoteEscape :: Maybe Char,
ssvFormatQuoteLeft :: Char,
ssvFormatQuoteRight :: Char
}
data SSVFormat = SSVFormat {
ssvFormatName :: String,
ssvFormatTerminator :: Char,
ssvFormatSeparator :: Char,
ssvFormatEscape :: Maybe Char,
ssvFormatStripWhite :: Bool,
ssvFormatQuote :: Maybe SSVFormatQuote,
ssvFormatWhiteChars :: String }
csvFormat :: SSVFormat
csvFormat = SSVFormat {
ssvFormatName = "CSV",
ssvFormatTerminator = '\n',
ssvFormatSeparator = ',',
ssvFormatEscape = Nothing,
ssvFormatStripWhite = True,
ssvFormatQuote = Just $ SSVFormatQuote {
ssvFormatQuoteEscape = Just '"',
ssvFormatQuoteLeft = '"',
ssvFormatQuoteRight = '"' },
ssvFormatWhiteChars = " \t" }
pwfFormat :: SSVFormat
pwfFormat = SSVFormat {
ssvFormatName = "Colon-separated values",
ssvFormatTerminator = '\n',
ssvFormatSeparator = ':',
ssvFormatEscape = Nothing,
ssvFormatStripWhite = False,
ssvFormatQuote = Nothing,
ssvFormatWhiteChars = "" }
data SSVReadException = SSVReadException String (Int, Int) String
| SSVEOFException String String
deriving Typeable
data SSVShowException = SSVShowException String String String
deriving Typeable
instance Show SSVReadException where
show (SSVReadException fmt (line, col) msg) =
fmt ++ ":" ++ show line ++ ":" ++ show col ++ ": " ++
"read error: " ++ msg
show (SSVEOFException fmt msg) =
fmt ++ ": read error at end of file: " ++ msg
instance Show SSVShowException where
show (SSVShowException fmt s msg) =
fmt ++ ": field " ++ show s ++ ": show error: " ++ msg
instance Exception SSVReadException
instance Exception SSVShowException
throwRE :: SSVFormat -> (Int, Int) -> String -> a
throwRE fmt pos msg =
throw $ SSVReadException (ssvFormatName fmt) pos msg
throwSE :: SSVFormat -> String -> String -> a
throwSE fmt s msg =
throw $ SSVShowException (ssvFormatName fmt) s msg
toNL :: String -> String
toNL =
foldr clean1 []
where
clean1 :: Char -> String -> String
clean1 '\r' cs@('\n' : _) = cs
clean1 '\r' cs = '\n' : cs
clean1 c cs = c : cs
fromNL :: String -> String
fromNL =
foldr dirty1 []
where
dirty1 :: Char -> String -> String
dirty1 '\n' cs = '\r' : '\n' : cs
dirty1 c cs = c : cs
readSSV :: SSVFormat -> String -> [[String]]
readSSV fmt =
nextsw (1, 1)
where
nextsw p cs
| ssvFormatStripWhite fmt = nextSW p cs
| otherwise = nextSX p cs
rs = ssvFormatTerminator fmt
fs = ssvFormatSeparator fmt
efmt = ssvFormatEscape fmt
e = isJust efmt
ec = fromJust efmt
qfmt = ssvFormatQuote fmt
q = isJust qfmt
lq = ssvFormatQuoteLeft $ fromJust qfmt
rq = ssvFormatQuoteRight $ fromJust qfmt
qesc = ssvFormatQuoteEscape $ fromJust qfmt
qe = isJust qesc
eq = fromJust qesc
incp (line, _) '\n' = (line + 1, 1)
incp (line, col) '\t' = (line, tcol)
where tcol = col + 8 ((col + 7) `mod` 8)
incp (line, _) '\r' = (line, 1)
incp (line, col) _ = (line, col + 1)
nextSW p (' ' : cs) = nextSW (incp p ' ') cs
nextSW p ('\t' : cs) = nextSW (incp p '\t') cs
nextSW p (c : cs)
| c == rs = mkCRS $ nextsw (incp p c) cs
| c == fs = mkCFS $ nextsw (incp p c) cs
| e && c == ec = nextSE (incp p c) cs
| q && c == lq = nextSQ (incp p c) cs
| otherwise = mkCX c $ nextSX (incp p c) cs
nextSW _ [] = []
nextSX p (c : cs)
| c == rs = mkCRS $ nextsw (incp p c) cs
| c == fs = mkCFS $ nextsw (incp p c) cs
| e && c == ec = nextSE (incp p c) cs
| q && c == lq = throwRE fmt p "illegal quote"
| otherwise = mkCX c $ nextSX (incp p c) cs
nextSX _ [] = []
nextSQ p (c : cs)
| c == rs = mkCX c $ nextSQ (incp p c) cs
| q && qe && c == eq = nextSZ (incp p c) cs
| q && c == rq = nextSD (incp p c) cs
| otherwise = mkCX c $ nextSQ (incp p c) cs
nextSQ _ [] = throw $ SSVEOFException
(ssvFormatName fmt) "unclosed quote"
nextSE p (c : cs) = mkCX c $ nextSX (incp p c) cs
nextSE _ [] = []
nextSZ p (' ' : cs) = nextSD (incp p ' ') cs
nextSZ p ('\t' : cs) = nextSD (incp p '\t') cs
nextSZ p (c : cs)
| c == rs = mkCRS $ nextsw (incp p c) cs
| c == fs = mkCFS $ nextsw (incp p c) cs
| q && qe && c == eq = mkCX c $ nextSQ (incp p c) cs
| q && c == rq = mkCX c $ nextSQ (incp p c) cs
| q && c == lq = mkCX c $ nextSQ (incp p c) cs
| otherwise = throwRE fmt p "illegal escape"
nextSZ _ [] = []
nextSD p (' ' : cs) = nextSD (incp p ' ') cs
nextSD p ('\t' : cs) = nextSD (incp p '\t') cs
nextSD p (c : cs)
| c == fs = mkCFS $ nextsw (incp p c) cs
| c == rs = mkCRS $ nextsw (incp p c) cs
| otherwise = throwRE fmt p "junk after quoted field"
nextSD _ [] = []
mkCX x [] = [[[x]]]
mkCX x ([]:rss) = [[x]]:rss
mkCX x ((w:wss):rss) = ((x:w):wss):rss
mkCFS [] = [["",""]]
mkCFS (r:rss) = ("":r):rss
mkCRS rss = [""]:rss
readCSV :: String -> [[String]]
readCSV = readSSV csvFormat . toNL
showSSV :: SSVFormat -> [[String]] -> String
showSSV fmt =
concatMap showRow
where
showRow =
(++ "\n") . intercalate [ssvFormatSeparator fmt] . map showField
where
showField s
| any needsQuoteChar s || endIsWhite s =
case ssvFormatQuote fmt of
Just qfmt ->
if isJust (ssvFormatQuoteEscape qfmt) ||
not (elem (ssvFormatQuoteRight qfmt) s)
then quote qfmt s
else case ssvFormatEscape fmt of
Just ch -> escape ch s
Nothing -> throwSE fmt s "unquotable character in field"
Nothing ->
case ssvFormatEscape fmt of
Just ch -> escape ch s
Nothing -> throwSE fmt s "unquotable character in field"
| otherwise = s
where
needsQuoteChar c
| Set.member c quotableChars = True
| isPrint c = False
| otherwise = True
where
quotableChars =
Set.fromList $ concat $ catMaybes [
Just [ssvFormatTerminator fmt],
Just [ssvFormatSeparator fmt],
fmap (:[]) $ ssvFormatEscape fmt,
fmap ((:[]) . ssvFormatQuoteLeft) $
ssvFormatQuote fmt ]
endIsWhite _ | not (ssvFormatStripWhite fmt) = False
endIsWhite "" = False
endIsWhite s' =
let firstChar = head s'
lastChar = last s'
in
firstChar `elem` ssvFormatWhiteChars fmt ||
lastChar `elem` ssvFormatWhiteChars fmt
quote qfmt s' = [ssvFormatQuoteLeft qfmt] ++
qescape qfmt s' ++
[ssvFormatQuoteRight qfmt]
escape esc s' =
foldr escape1 "" s'
where
escape1 c cs
| needsQuoteChar c = esc : c : cs
| otherwise = c : cs
qescape qfmt s' =
case ssvFormatQuoteEscape qfmt of
Just qesc -> foldr (qescape1 qesc) "" s'
Nothing -> s'
where
qescape1 qesc c cs
| c == qesc || c == ssvFormatQuoteRight qfmt =
qesc : c : cs
| otherwise =
c : cs
showCSV :: [[String]] -> String
showCSV = showSSV csvFormat
hPutSSV :: SSVFormat -> Handle -> [[String]] -> IO ()
hPutSSV fmt h csv = do
hSetEncoding h utf8
let nlm = NewlineMode { inputNL = nativeNewline, outputNL = CRLF }
hSetNewlineMode h nlm
hPutStr h $ showSSV fmt csv
hPutCSV :: Handle -> [[String]] -> IO ()
hPutCSV = hPutSSV csvFormat
writeSSVFile :: SSVFormat -> String -> [[String]] -> IO ()
writeSSVFile fmt path csv = do
h <- openFile path WriteMode
hPutSSV fmt h csv
hClose h
writeCSVFile :: String -> [[String]] -> IO ()
writeCSVFile = writeSSVFile csvFormat