module Data.IDX (
IDXData
, IDXContentType(..)
, idxType
, idxDimensions
, isIDXReal
, isIDXIntegral
, idxDoubleContent
, idxIntContent
, encodeIDX
, decodeIDX
, encodeIDXFile
, decodeIDXFile
)where
import Control.Monad
import Data.Binary
import Data.ByteString.Lazy (ByteString)
import qualified Data.ByteString.Lazy as BL
import Data.Int
import Data.Traversable
import qualified Data.Vector.Unboxed as V
import Data.Vector.Unboxed ((!))
import Data.Word
import Debug.Trace
data IDXContentType where
IDXUnsignedByte :: IDXContentType
IDXSignedByte :: IDXContentType
IDXShort :: IDXContentType
IDXInt :: IDXContentType
IDXFloat :: IDXContentType
IDXDouble :: IDXContentType
deriving Show
instance Binary IDXContentType where
get = do
w <- getWord8
case w of
0x08 -> return IDXUnsignedByte
0x09 -> return IDXSignedByte
0x0B -> return IDXShort
0x0C -> return IDXInt
0x0D -> return IDXFloat
0x0E -> return IDXDouble
_ -> fail $ "Unrecognized IDX content type: " ++ (show w)
put IDXUnsignedByte = putWord8 0x08
put IDXSignedByte = putWord8 0x09
put IDXShort = putWord8 0x0B
put IDXInt = putWord8 0x0C
put IDXFloat = putWord8 0x0D
put IDXDouble = putWord8 0x0E
data IDXData = IDXInts IDXContentType (V.Vector Int) (V.Vector Int )
| IDXDoubles IDXContentType (V.Vector Int) (V.Vector Double)
deriving Show
idxType :: IDXData -> IDXContentType
idxType (IDXInts t _ _) = t
idxType (IDXDoubles t _ _) = t
idxDimensions :: IDXData -> V.Vector Int
idxDimensions (IDXInts _ ds _) = ds
idxDimensions (IDXDoubles _ ds _) = ds
isIDXIntegral :: IDXData -> Bool
isIDXIntegral (IDXInts _ _ _) = True
isIDXIntegral (_ ) = False
isIDXReal :: IDXData -> Bool
isIDXReal (IDXDoubles _ _ _) = True
isIDXReal (_ ) = False
idxIntContent :: IDXData -> V.Vector Int
idxIntContent (IDXInts _ _ v) = v
idxIntContent (IDXDoubles _ _ v) =
V.fromList $ [round $ (v ! i) | i <- [0.. V.length v]]
idxDoubleContent :: IDXData -> V.Vector Double
idxDoubleContent (IDXDoubles _ _ v) = v
idxDoubleContent (IDXInts _ _ v) =
V.fromList $ [fromIntegral $ (v ! i) | i <- [0.. V.length v]]
instance Binary IDXData where
get = do
getWord8
getWord8
idxType <- get :: Get IDXContentType
nDimensions <- fromIntegral <$> getWord8
dimensionSizes <- replicateM nDimensions (fromIntegral <$> getInt32)
let nEntries = fromIntegral $ product dimensionSizes
dimV = V.fromList dimensionSizes
case idxType of
t@IDXUnsignedByte -> buildIntResult nEntries t dimV getWord8
t@IDXSignedByte -> buildIntResult nEntries t dimV getInt8
t@IDXShort -> buildIntResult nEntries t dimV getInt16
t@IDXInt -> buildIntResult nEntries t dimV getInt32
t@IDXFloat -> buildDoubleResult nEntries t dimV getFloat
t@IDXDouble -> buildDoubleResult nEntries t dimV getDouble
put d = do
putWord8 0
putWord8 0
put $ idxType d
let dimensions = idxDimensions d
put $ (fromIntegral $ V.length dimensions :: Word8)
V.forM_ dimensions $ (\x -> put $! (fromIntegral x :: Int32))
case d of
IDXDoubles t _ content -> V.forM_ content $ putReal t
IDXInts t _ content -> V.forM_ content $ putIntegral t
buildIntResult :: Integral a
=> Int
-> IDXContentType
-> V.Vector Int
-> Get a
-> Get IDXData
buildIntResult nEntries typ dimV getContent = do
content <- readContent readEntries 500 nEntries
return $ IDXInts typ dimV content
where
readEntries n = V.replicateM n $ fromIntegral <$> getContent >>= (return $!)
buildDoubleResult :: Real a
=> Int
-> IDXContentType
-> V.Vector Int
-> Get a
-> Get IDXData
buildDoubleResult nEntries typ dimV getContent = do
content <- readContent readEntries 500 nEntries
return $ IDXDoubles typ dimV content
where
readEntries n = V.replicateM n $ realToFrac <$> getContent >>= (return $!)
readContent :: (V.Unbox a)
=> (Int -> Get (V.Vector a))
-> Int
-> Int
-> Get (V.Vector a)
readContent readEntries chunkSize n =
if n > chunkSize
then do
headChunk <- readEntries (n `mod` chunkSize)
let nChunks = n `div` chunkSize
chunkList <- replicateM nChunks (readContent readEntries chunkSize chunkSize)
return $! V.concat $ headChunk:chunkList
else do
rest <- readEntries n
return $! rest
putIntegral :: IDXContentType -> Int -> Put
putIntegral IDXUnsignedByte n = put $! (fromIntegral n :: Word8)
putIntegral IDXSignedByte n = put $! (fromIntegral n :: Int8 )
putIntegral IDXShort n = put $! (fromIntegral n :: Int16)
putIntegral IDXInt n = put $! (fromIntegral n :: Int32)
putIntegral t _ = error $ "IDX.putIntegral " ++ show t
putReal :: IDXContentType -> Double -> Put
putReal IDXDouble n = put n
putReal IDXFloat n = put $! (realToFrac n :: Float )
getInt8 :: Get Int8
getInt8 = get
getInt16 :: Get Int16
getInt16 = get
getInt32 :: Get Int32
getInt32 = get
getFloat :: Get Float
getFloat = get
getDouble :: Get Double
getDouble = get
decodeIDXFile :: FilePath -> IO (Maybe IDXData)
decodeIDXFile path = BL.readFile path >>= return . decodeIDX
decodeIDX :: BL.ByteString -> Maybe IDXData
decodeIDX content = case decodeOrFail content of
Right (_,_,result) -> Just result
Left _ -> Nothing
encodeIDXFile :: IDXData -> FilePath -> IO ()
encodeIDXFile idx path = encodeFile path idx
encodeIDX :: IDXData -> BL.ByteString
encodeIDX = encode