{-# LANGUAGE CPP, MagicHash, BangPatterns, ScopedTypeVariables #-} {-| Module : GHC.Packing.Type Copyright : (c) Jost Berthold, 2010-2015, License : BSD3 Maintainer : Jost Berthold <jost.berthold@gmail.com> Stability : experimental Portability : no (depends on GHC internals) = Serialized type for the packman library, instances and helpers The data type @'Serialized' a@ includes a phantom type @a@ to ensure type safety within one and the same program run. Type @a@ can be polymorphic (at compile time, that is) when @'Serialized' a@ is not used apart from being argument to @'deserialize'@. The @Show@, @Read@, and @Binary@ instances of @Serialized a@ require an additional @Typeable@ context (which requires @a@ to be monomorphic) in order to implement dynamic type checks when parsing and deserialising data from external sources. -} module GHC.Packing.Type -- ( Serialized(..) -- TOOD assemble export list with structure and headings/text blocks -- , ... ) where import GHC.Prim -- ByteArray# import GHC.Exts ( Int(..)) -- I# -- Read and Show instances import Text.Printf ( printf ) import Text.ParserCombinators.ReadP (sepBy1, many1, ReadP, munch, munch1, pfail, readP_to_S, satisfy, skipSpaces, string ) import Data.Char ( isDigit ) -- Binary instance import Data.Binary ( Get, Binary(..), encode, decode, encodeFile, decodeFile ) -- we use UArrays of machine word size (TargetWord) import Data.Word( Word, Word64, Word32 ) import Data.Array.Base ( UArray(..), elems, listArray ) import Foreign.Storable ( sizeOf ) -- for dynamic type checks when parsing import Data.Typeable (Typeable(..), typeOf) #if MIN_VERSION_base(4,8,0) import Data.Typeable (typeRepFingerprint) #else import Data.Typeable.Internal (TypeRep(..)) #endif import qualified GHC.Fingerprint -- for a hash of the executable. Using GHC.Fingerprint.getFileHash import GHC.Fingerprint(getFileHash) import System.Environment import System.IO.Unsafe -- for control flow and exceptions import Control.Monad(when) import Control.Exception(throw) import GHC.Packing.PackException -- | The type of Serialized data. Phantom type 'a' ensures that we -- unpack data as the expected type. data Serialized a = Serialized { packetData :: ByteArray# } {- $ShowReadBinary The power of evaluation-orthogonal serialisation is that one can /externalise/ partially evaluated data (containing thunks), for instance write it to disk or send it over a network. Therefore, the module defines a 'Binary' instance for 'Serialized a', as well as instances for 'Read' and 'Show'@ which satisfy > read . show == id :: 'Serialized' a -> 'Serialized' a The phantom type is enough to ensure type-correctness when serialised data remain in one single program run. However, when data from previous runs are read from an external source, their type needs to be checked at runtime. Type information must be stored together with the (binary) serialisation data. The serialised data contain pointers to static data in the generating program (top-level functions and constants) and very likely to additional library code. Therefore, the /exact same binary/ must be used when reading in serialised data from an external source. A hash of the executable is included in the representation to ensure this. -} -- | prints packet as Word array in 4 columns (/Word/ meaning the -- machine word size), and additionally includes Fingerprint hash -- values for executable binary and type. instance Typeable a => Show (Serialized a) where show p = unlines [ "Serialization Packet, size " ++ show size, ", program " ++ show prgHash, ", type fingerprint" ++ show t, showWArray (UArray 0 (size-1) size dat) ] where size = case sizeofByteArray# dat of sz# -> (I# sz# ) `div` sizeOf(undefined::TargetWord) t = typeFP ( undefined :: a ) dat = packetData p -- | Helper to show a serialized structure as a packet (Word Array) showWArray :: UArray Int TargetWord -> String showWArray arr = unlines [ show i ++ ":" ++ unwords (map showH row) | (i,row) <- zip [0,4..] elRows ] where showH w = -- "\t0x" ++ showHex w " " printf ('\t':hexWordFmt) w elRows = takeEach4 (elems arr) takeEach4 :: [a] -> [[a]] takeEach4 [] = [] takeEach4 xs = first:takeEach4 rest where (first,rest) = splitAt 4 xs ----------------------------------------------- -- | Reads the format generated by the 'Show' instance, checks -- hash values for executable and type and parses exactly as much as -- the included data size announces. instance Typeable a => Read (Serialized a) -- using ReadP parser (base-4.x) where readsPrec _ input = case parseP input of [] -> throw P_ParseError -- no parse [((sz,tp,dat),r)] -> let !(UArray _ _ _ arr# ) = listArray (0,sz-1) dat t = typeFP (undefined::a) in if t == tp then [(Serialized arr# , r)] else throw P_TypeMismatch other-> throw P_ParseError -- ambiguous parse for packet -- | Packet Parser, reads the format generated by the @Read@ instance. -- Could also consume other formats of the array (not implemented). -- Returns: (data size in words, type fingerprint, array values) parseP :: ReadS (Int, FP, [TargetWord]) parseP = readP_to_S $ -- read header with size and type, then iterate over array values, -- reading several hex words in one row, separated by -- tab and space. Packet size needed to avoid returning a prefix. do string "Serialization Packet, size " sz_str <- munch1 isDigit let sz = read sz_str::Int string ", program " h <- munch1 (not . (== '\n')) when (read h /= prgHash) (throw P_BinaryMismatch) -- executables do not match. No ambiguous parses here, -- so just throw; otherwise we would only pfail. newline string ", type " tp <- munch1 (not . (== '\n')) newline let startRow = do { many1 digit; colon; tabSpace } row = do { startRow; sepBy1 hexNum tabSpace } valss <- sepBy1 row newline skipSpaces -- eat remaining spaces let vals = concat valss l = length vals -- filter out wrong lengths: if (sz /= length vals) then pfail else return (sz, read tp, vals) digit = satisfy isDigit colon = satisfy (==':') tabSpace = munch1 ( \x -> x `elem` " \t" ) newline = munch1 (\x -> x `elem` " \n") hexNum :: ReadP TargetWord hexNum = do string "0x" ds <- munch hexDigit return (read ("0x" ++ ds)) where hexDigit = (\x -> x `elem` "0123456789abcdefABCDEF") ------------------------------------------------------------------ -- | The binary format of @'Serialized' a@ data includes FingerPrint -- hash values for type and executable binary, which are checked -- when reading Serialized data back in using @get@. instance Typeable a => Binary (Serialized a) where -- We make our life simple and construct/deconstruct Word -- (U)Arrays, quite as we did in the Show/Read instances. put (Serialized bArr#) = do put prgHash put (typeFP (undefined :: a)) let arr = UArray 0 (sz-1) sz bArr# :: UArray Int TargetWord sz = case sizeofByteArray# bArr# of sz# -> (I# sz# ) `div` sizeOf(undefined::TargetWord) put arr get = do hash <- get :: Get FP when (hash /= prgHash) (throw P_BinaryMismatch) -- executables do not match tp <- get :: Get FP when (tp /= typeFP (undefined :: a)) (throw P_TypeMismatch) -- Type error during packet parse uarr <- get :: Get (UArray Int TargetWord) let !(UArray _ _ sz bArr#) = uarr return ( Serialized bArr# ) ------------------------------------------------------------------ -- $ComparingTypes ----------------------------------------------- -- Helper functions to compare types at runtime: -- We use type "fingerprints" defined in 'GHC.Fingerprint.Type' -- This should ensure (as of GHC.7.8) that types with the same name -- but different definition get different hashes. (however, we also -- require the executable to be exactly the same, so this is not -- strictly necessary anyway). -- Typeable context for dynamic type checks. -- | The module uses a custom GHC fingerprint type with its two Word64 -- fields, to be able to /read/ fingerprints data FP = FP Word64 Word64 deriving (Read, Show, Eq) -- | checks whether the type of the given expression matches the given Fingerprint matches :: Typeable a => a -> FP -> Bool matches x (FP c1 c2) = f1 == c1 && f2 == c2 where (GHC.Fingerprint.Fingerprint f1 f2) = typeRepFingerprint (typeOf x) #if ! MIN_VERSION_base(4,8,0) -- typeRepFingerprint is provided since base-4.8.0.0 typeRepFingerprint typeRep = ghcFP where TypeRep ghcFP _ _ = typeRep #endif -- | creates an 'FP' from a GHC 'Fingerprint' toFP :: GHC.Fingerprint.Fingerprint -> FP toFP (GHC.Fingerprint.Fingerprint f1 f2) = FP f1 f2 -- | returns the type fingerprint of an expression typeFP :: Typeable a => a -> FP typeFP = toFP . typeRepFingerprint . typeOf -- | Binary instance for fingerprint data (encoding TypeRep and -- executable in binary-encoded @Serialized a@) instance Binary FP where put (FP f1 f2) = do put f1 put f2 get = do f1 <- get :: Get Word64 f2 <- get :: Get Word64 return (FP f1 f2) ----------------------------------------------- -- | To check that the program (executable) is identical when packing -- and unpacking, the fingerprint type from above is used (Read/Show -- instances required). An 'FP' fingerprint of the executable is -- computed once, by unsafePerformIO inside this CAF (safe to inline, -- just inefficient). {-# NOINLINE prgHash #-} prgHash :: FP prgHash = unsafePerformIO $ getExecutablePath >>= getFileHash >>= return . toFP ----------------------------------------------- -- | The target word size is the size of a machine word on the -- platform we run on. -- -- This type is only used in Binary, Read and Show instances, where -- packets are stored as 'UArrays' of 'TargetWord'. -- -- Actually, GHC uses machine word size (as Haskell 2010 spec. does -- not fix it) so we could just use Word. See -- <http://www.haskell.org/ghc/docs/7.8.3/html/users_guide/bugs-and-infelicities.html#haskell-98-2010-undefined> -- We'd rather just import 'GHC.Constants.TargetWord' but it was -- removed. This code here is a cheap and incomplete hack, as the -- package would otherwise need a configure script. #if x86_64_BUILD_ARCH type TargetWord = Word64 hexWordFmt = "0x%016x" #elif i386_BUILD_ARCH type TargetWord = Word32 hexWordFmt = "0x%08x" #elif powerpc_BUILD_ARCH #error Don't know word size of your Power-PC model #else #warning Don't know the word size on your machine. type TargetWord = Word #endif