module Foundation.String.ModifiedUTF8
( fromModified
) where
import GHC.ST (runST, ST)
import GHC.Prim (Addr#)
import Control.Monad (mapM_)
import Foundation.Internal.Base
import Foundation.Primitive.Types.OffsetSize
import qualified Foundation.Array.Unboxed as Vec
import Foundation.Array.Unboxed (UArray)
import Foundation.Numerical
import Foundation.Primitive.FinalPtr
import Foundation.String.UTF8Table
accessBytes :: Offset Word8 -> (Offset Word8 -> Word8) -> ([Word8], Offset Word8)
accessBytes offset getAtIdx = (loop offset, pastEnd)
where
nbytes :: Size Word8
nbytes = Size $ getNbBytes $ getAtIdx offset
pastEnd :: Offset Word8
pastEnd = 1 + (offset `offsetPlusE` nbytes)
loop :: Offset Word8 -> [Word8]
loop off
| off == pastEnd = []
| otherwise = getAtIdx off : loop (off + 1)
buildByteArray :: Addr# -> ST st (UArray Word8)
buildByteArray addr = Vec.UVecAddr (Offset 0) (Size 100000) `fmap`
toFinalPtr (Ptr addr) (\_ -> return ())
fromModified :: Addr# -> UArray Word8
fromModified addr = runST $ do
ba <- buildByteArray addr
Vec.unsafeIndexer ba buildWithBytes
where
buildWithBytes getAt = Vec.builderBuild 64 $ loopBuilder getAt (Offset 0)
loopBuilder getAt offset =
case bs of
[] -> internalError "ModifiedUTF8.fromModified"
[0x00] -> return ()
[b1,b2] | b1 == 0xC0 && b2 == 0x80 -> Vec.builderAppend 0x00 >> loopBuilder getAt noffset
_ -> mapM_ Vec.builderAppend bs >> loopBuilder getAt noffset
where
(bs, noffset) = accessBytes offset getAt