module Data.IDX (
IDXData
, IDXLabels
, IDXContentType(..)
, idxType
, idxDimensions
, isIDXReal
, isIDXIntegral
, idxDoubleContent
, idxIntContent
, labeledIntData
, labeledDoubleData
, encodeIDXLabels
, decodeIDXLabels
, encodeIDXLabelsFile
, decodeIDXLabelsFile
, encodeIDX
, decodeIDX
, encodeIDXFile
, decodeIDXFile
)where
import Control.Applicative ((<$>))
import Control.Monad
import Data.Binary
import Data.ByteString.Lazy (ByteString)
import qualified Data.ByteString.Lazy as BL
import Data.IDX.Internal
import Data.Int
import Data.Traversable
import qualified Data.Vector.Unboxed as V
import Data.Vector.Unboxed ((!))
import Data.Word
import Debug.Trace
instance Binary IDXContentType where
get = do
w <- getWord8
case w of
0x08 -> return IDXUnsignedByte
0x09 -> return IDXSignedByte
0x0B -> return IDXShort
0x0C -> return IDXInt
0x0D -> return IDXFloat
0x0E -> return IDXDouble
_ -> fail $ "Unrecognized IDX content type: " ++ (show w)
put IDXUnsignedByte = putWord8 0x08
put IDXSignedByte = putWord8 0x09
put IDXShort = putWord8 0x0B
put IDXInt = putWord8 0x0C
put IDXFloat = putWord8 0x0D
put IDXDouble = putWord8 0x0E
instance Binary IDXLabels where
get = do
getInt32
nItems <- fromIntegral <$> getInt32
let readEntries n = V.replicateM n $ fromIntegral <$> getWord8 >>= (return $!)
v <- readContent readEntries 500 nItems
return $ IDXLabels v
put (IDXLabels v) = do
put (0 :: Int32)
let len = V.length v
put (fromIntegral len :: Int32)
V.forM_ v (\x -> put $! (fromIntegral x :: Word8))
labeledIntData:: IDXLabels -> IDXData -> Maybe [(Int, V.Vector Int)]
labeledIntData (IDXLabels v) dat =
if (V.length v) == dim0
then Just $ do
i <- [0 .. dim0 1]
let lab = v ! i
return $ (lab,V.slice (i*entrySize) entrySize content)
else Nothing
where
dim0 = (idxDimensions dat) ! 0
content = idxIntContent dat
entrySize = (V.product $ idxDimensions dat) `div` dim0
labeledDoubleData:: IDXLabels -> IDXData -> Maybe [(Int, V.Vector Double)]
labeledDoubleData (IDXLabels v) dat =
if (V.length v) == dim0
then Just $ do
i <- [0 .. dim0 1]
let lab = v ! i
return $ (lab,V.slice (i*entrySize) entrySize content)
else Nothing
where
dim0 = (idxDimensions dat) ! 0
content = idxDoubleContent dat
entrySize = (V.product $ idxDimensions dat) `div` dim0
instance Binary IDXData where
get = do
getWord8
getWord8
idxType <- get :: Get IDXContentType
nDimensions <- fromIntegral <$> getWord8
dimensionSizes <- replicateM nDimensions (fromIntegral <$> getInt32)
let nEntries = fromIntegral $ product dimensionSizes
dimV = V.fromList dimensionSizes
case idxType of
t@IDXUnsignedByte -> buildIntResult nEntries t dimV getWord8
t@IDXSignedByte -> buildIntResult nEntries t dimV getInt8
t@IDXShort -> buildIntResult nEntries t dimV getInt16
t@IDXInt -> buildIntResult nEntries t dimV getInt32
t@IDXFloat -> buildDoubleResult nEntries t dimV getFloat
t@IDXDouble -> buildDoubleResult nEntries t dimV getDouble
put d = do
putWord8 0
putWord8 0
put $ idxType d
let dimensions = idxDimensions d
put $ (fromIntegral $ V.length dimensions :: Word8)
V.forM_ dimensions $ (\x -> put $! (fromIntegral x :: Int32))
case d of
IDXDoubles t _ content -> V.forM_ content $ putReal t
IDXInts t _ content -> V.forM_ content $ putIntegral t
buildIntResult :: Integral a
=> Int
-> IDXContentType
-> V.Vector Int
-> Get a
-> Get IDXData
buildIntResult nEntries typ dimV getContent = do
content <- readContent readEntries 500 nEntries
return $ IDXInts typ dimV content
where
readEntries n = V.replicateM n $ fromIntegral <$> getContent >>= (return $!)
buildDoubleResult :: Real a
=> Int
-> IDXContentType
-> V.Vector Int
-> Get a
-> Get IDXData
buildDoubleResult nEntries typ dimV getContent = do
content <- readContent readEntries 500 nEntries
return $ IDXDoubles typ dimV content
where
readEntries n = V.replicateM n $ realToFrac <$> getContent >>= (return $!)
putIntegral :: IDXContentType -> Int -> Put
putIntegral IDXUnsignedByte n = put $! (fromIntegral n :: Word8)
putIntegral IDXSignedByte n = put $! (fromIntegral n :: Int8 )
putIntegral IDXShort n = put $! (fromIntegral n :: Int16)
putIntegral IDXInt n = put $! (fromIntegral n :: Int32)
putIntegral t _ = error $ "IDX.putIntegral " ++ show t
putReal :: IDXContentType -> Double -> Put
putReal IDXDouble n = put n
putReal IDXFloat n = put $! (realToFrac n :: Float )
getInt8 :: Get Int8
getInt8 = get
getInt16 :: Get Int16
getInt16 = get
getInt32 :: Get Int32
getInt32 = get
getFloat :: Get Float
getFloat = get
getDouble :: Get Double
getDouble = get
decodeIDXLabelsFile :: FilePath -> IO (Maybe IDXLabels)
decodeIDXLabelsFile path = BL.readFile path >>= return . decodeIDXLabels
decodeIDXLabels :: BL.ByteString -> Maybe IDXLabels
decodeIDXLabels content = case decodeOrFail content of
Right (_,_,result) -> Just result
Left _ -> Nothing
encodeIDXLabelsFile :: IDXLabels -> FilePath -> IO ()
encodeIDXLabelsFile labs path = encodeFile path labs
encodeIDXLabels :: IDXLabels -> BL.ByteString
encodeIDXLabels = encode
decodeIDXFile :: FilePath -> IO (Maybe IDXData)
decodeIDXFile path = BL.readFile path >>= return . decodeIDX
decodeIDX :: BL.ByteString -> Maybe IDXData
decodeIDX content = case decodeOrFail content of
Right (_,_,result) -> Just result
Left _ -> Nothing
encodeIDXFile :: IDXData -> FilePath -> IO ()
encodeIDXFile idx path = encodeFile path idx
encodeIDX :: IDXData -> BL.ByteString
encodeIDX = encode