{-# LANGUAGE DeriveFunctor, DeriveFoldable, DeriveTraversable #-} {-# LANGUAGE FlexibleContexts, TemplateHaskell #-} -- | This module defines a function to perform unification of LLVM types. -- -- The unification step is intended to eliminate duplicate struct -- definitions wherever possible. Duplicate structs have .NNN -- suffixes after their names and are constructed by the linker when -- it cannot prove that types are equivalent. -- -- In theory, llvm-link should do this (and it does try). However, -- the implementation in llvm-link is poor and gives up on even -- trivial cases. This implementation is a bit more robust and -- sensitive to some frontend quirks. -- -- This is a low-level implementation that is run at IR translation -- time. The result of 'unifyTypes' is a map from CType pointers to -- Type objects. This map can be used at IR construction time to map each -- C type to a canonical high-level representation if at all possible. -- -- * Opaque types are eliminated if there is a core of unifiable -- definitions of types with the same name -- -- * A canonical representative is chosen for each base type name -- -- Moving this analysis to very early in the translation process has -- two major benefits: 1) all higher-level analyses can do type-level -- comparisons without ad-hoc normalization steps, and 2) the knot -- tying process in the IR translation is simpler because types are -- already fully resolved. -- -- TODO: This could be made more configurable for more aggressive -- unification (e.g., in the presence of {}* fields - this elimination is not -- exactly safe) module LLVM.Internal.TypeUnification ( unifyTypes ) where import Control.Applicative import Control.Monad ( filterM, foldM, forM_, forM ) import Control.Monad.Trans.Class import Control.Monad.Error.Class import Control.Monad.EitherK import Control.Unification import Control.Unification.IntVar import Data.Foldable ( Foldable ) import Data.Traversable ( Traversable ) import Data.Set ( Set ) import qualified Data.Set as S import Data.HashMap.Strict ( HashMap ) import qualified Data.HashMap.Strict as HM import Data.HashTable.IO ( BasicHashTable ) import qualified Data.HashTable.IO as HT import Data.Map ( Map ) import qualified Data.Map as M import Data.Maybe ( fromMaybe ) import Data.Monoid import qualified Data.Text as T import Data.Word ( Word64 ) import Debug.Trace.LocationTH import Foreign.Ptr import System.IO.Unsafe ( unsafePerformIO ) import Data.LLVM.Types import LLVM.Internal.Interop -- This is adapted from one of the tests in unification-fd -- | A type of Type terms data T a = T String [a] deriving (Show, Eq, Ord, Functor, Foldable, Traversable) instance Unifiable T where zipMatch (T a xs) (T b ys) = case a == b of True -> case pair xs ys of Nothing -> Nothing Just p -> Just (T a (map Right p)) False -> Nothing type TTerm = UTerm T IntVar -- | Build a term from a name and a list of term arguments. typeTerm :: String -> [TTerm] -> TTerm typeTerm = (UTerm .) . T unifyTypes :: [TypePtr] -> IO (BasicHashTable Word64 Type, Map Type Int) unifyTypes tptrs = do tmap <- HT.new smap <- HT.new -- Translate all non-struct types directly into the hash table. The -- fold only accumulates struct types (including unions and classes) into -- a HashMap from name to ptr. -- -- This accumulated map is the one that will be unified over. structsByName <- foldM (translateAndGroup tmap smap) mempty tptrs -- Now, for each name in structsByName, attempt to unify all of the -- types in the group. For each type that does unify, make an entry -- in tmap for its pointer to the single representative Type. For -- the rest, do a basic translation. mapM_ (unifyStructTypes tmap smap) (HM.toList structsByName) typeSizes <- HT.foldM (htToMap tmap) mempty smap return (tmap, typeSizes) htToMap :: BasicHashTable Word64 Type -> Map Type Int -> (Word64, Int) -> IO (Map Type Int) htToMap tmap a (ip, v) = do Just t <- HT.lookup tmap ip return $ M.insert t v a unifyStructTypes :: BasicHashTable Word64 Type -> BasicHashTable Word64 Int -> (String, [(TypePtr, Int)]) -> IO () unifyStructTypes tmap smap (name, allPtrs) = do sizedPtrs <- filterM (isNotOpaque . fst) allPtrs let ptrs = map fst sizedPtrs -- All of the types reachable from the list of input types @ptrs@ retainedTypes <- retainedTypeSearch ptrs -- The struct types that are reachable from the input types, grouped -- by their base names. namedStructs <- filterM isStruct (S.toList retainedTypes) let dependencyTypes = filter (`notElem` ptrs) namedStructs groupedDependencyTypes <- groupByBaseName dependencyTypes (unifyResult, _) <- runIntBindingT $ do -- Assign a logic variable to each struct type reachable from -- the input types (including the input types) varMap <- assignVars (HM.insert name ptrs groupedDependencyTypes) -- Convert everything to terms, then start unifying groups. -- The final unification gives us the answer we want. primaryTerms <- mapM (toTerm varMap) ptrs let dterms = map snd $ HM.toList groupedDependencyTypes depTermGroups <- forM dterms $ \dptrs -> do -- If a dependency is opaque, don't bother trying to unify it -- since it won't have the right arity. nonOpaqueDeps <- lift $ filterM isNotOpaque dptrs dts <- mapM (toTerm varMap) nonOpaqueDeps return dts runEitherKT $ do mapM_ unifies depTermGroups unifies primaryTerms case unifyResult of Left _ -> -- Make an entry in tmap for each input forM_ allPtrs $ \(p, sz) -> do Just name' <- cTypeName p isPacked <- cTypeIsPacked p mptrs <- cTypeList p let t = TypeStruct (Right (T.pack name')) (map (delayedLookup tmap) mptrs) isPacked ip = fromIntegral (ptrToIntPtr p) HT.insert tmap ip t HT.insert smap ip sz Right _ -> case sizedPtrs of [] -> do -- Make a single opaque entry for allPtrs let t = TypeStruct (Right (T.pack name)) [] False forM_ allPtrs $ \(p, sz) -> do let ip = fromIntegral (ptrToIntPtr p) HT.insert tmap ip t HT.insert smap ip sz (p0, _) : _ -> do -- Make one type to represent all of these types that were unified -- and use it as the entry for each of the input pointers. mptrs <- cTypeList p0 isPacked <- cTypeIsPacked p0 let t = TypeStruct (Right (T.pack name)) (map (delayedLookup tmap) mptrs) isPacked forM_ allPtrs $ \(p, sz) -> do let ip = fromIntegral (ptrToIntPtr p) HT.insert tmap ip t HT.insert smap ip sz translateAndGroup :: BasicHashTable Word64 Type -> BasicHashTable Word64 Int -> HashMap String [(TypePtr, Int)] -> TypePtr -> IO (HashMap String [(TypePtr, Int)]) translateAndGroup tmap sizeMap structs tptr = do let ip = fromIntegral $ ptrToIntPtr tptr tag <- cTypeTag tptr byteSize <- cTypeSizeInBytes tptr HT.insert sizeMap ip byteSize case tag of TYPE_STRUCT -> do name <- cTypeName tptr case name of Just n -> return $ HM.insertWith (++) (structBaseName n) [(tptr, byteSize)] structs -- Anonymous struct Nothing -> do isPacked <- cTypeIsPacked tptr ptrs <- cTypeList tptr let ts = map (delayedLookup tmap) ptrs stype = TypeStruct (Left ip) ts isPacked HT.insert tmap ip stype return structs _ -> do t' <- case tag of TYPE_VOID -> return TypeVoid TYPE_FLOAT -> return TypeFloat TYPE_DOUBLE -> return TypeDouble TYPE_X86_FP80 -> return TypeX86FP80 TYPE_FP128 -> return TypeFP128 TYPE_PPC_FP128 -> return TypePPCFP128 TYPE_LABEL -> return TypeLabel TYPE_METADATA -> return TypeMetadata TYPE_X86_MMX -> return TypeX86MMX TYPE_INTEGER -> do sz <- cTypeSize tptr return $ TypeInteger sz TYPE_FUNCTION -> do isVa <- cTypeIsVarArg tptr rtp <- cTypeInner tptr argTypePtrs <- cTypeList tptr let rt = delayedLookup tmap rtp argTs = map (delayedLookup tmap) argTypePtrs return $ TypeFunction rt argTs isVa TYPE_ARRAY -> do sz <- cTypeSize tptr itp <- cTypeInner tptr let innerType = delayedLookup tmap itp return $ TypeArray sz innerType TYPE_VECTOR -> do sz <- cTypeSize tptr itp <- cTypeInner tptr let innerType = delayedLookup tmap itp return $ TypeVector sz innerType TYPE_POINTER -> do itp <- cTypeInner tptr addrSpc <- cTypeAddrSpace tptr let innerType = delayedLookup tmap itp return $ TypePointer innerType addrSpc TYPE_STRUCT -> $failure "Impossible, cannot have a struct here" HT.insert tmap ip t' return structs -- | Look up the Type associated with a TypePtr lazily and impurely. -- This relies on no entries in the hash table being inspected until -- everything is unified and all entries are made. -- -- This is simpler, faster, and more space efficient than using real -- knot tying. It is also safe here since there are never any lookups -- in the hash table in this algorithm - only the IR translator -- actually performs lookups. delayedLookup :: BasicHashTable Word64 Type -> TypePtr -> Type delayedLookup tmap ptr = unsafePerformIO $ do res <- HT.lookup tmap $ fromIntegral (ptrToIntPtr ptr) return $ fromMaybe errMsg res where errMsg = $failure ("Type lookup failure for " ++ show ptr) -- | Perform a breadth-first search to discover all types referenced -- by the given input type list retainedTypeSearch :: [TypePtr] -> IO (Set TypePtr) retainedTypeSearch = go mempty . S.fromList where go visited q = do let vals = q `S.difference` visited case S.null vals of True -> return visited False -> do let visited' = visited `S.union` vals q' <- foldM addValuesFrom mempty (S.toList vals) go visited' q' addValuesFrom q tp = do tag <- cTypeTag tp case tag of TYPE_STRUCT -> do ptrs <- cTypeList tp return $ q `S.union` S.fromList ptrs TYPE_FUNCTION -> do rtp <- cTypeInner tp atps <- cTypeList tp return $ q `S.union` S.fromList (rtp : atps) TYPE_ARRAY -> do itp <- cTypeInner tp return $ S.insert itp q TYPE_VECTOR -> do itp <- cTypeInner tp return $ S.insert itp q TYPE_POINTER -> do itp <- cTypeInner tp return $ S.insert itp q _ -> return $ S.insert tp q -- | Top level term creator; this only works for named structs. toTerm :: Map TypePtr TTerm -> TypePtr -> IntBindingT T IO TTerm toTerm varMap tp = do let Just svar = M.lookup tp varMap Just name <- lift $ cTypeName tp ptrs <- lift $ cTypeList tp innerTerms <- mapM (toTerm' varMap) ptrs return $ typeTerm (structBaseName name) (svar : innerTerms) -- | Inner term creator; for structs, just return the variable -- assigned to that Type. toTerm' :: Map TypePtr TTerm -> TypePtr -> IntBindingT T IO TTerm toTerm' varMap tp = do tag <- lift $ cTypeTag tp case tag of TYPE_VOID -> return $ typeTerm "%primitive.void" [] TYPE_FLOAT -> return $ typeTerm "%primitive.float" [] TYPE_DOUBLE -> return $ typeTerm "%primtive.double" [] TYPE_X86_FP80 -> return $ typeTerm "%primitive.x86_fp80" [] TYPE_FP128 -> return $ typeTerm "%primitive.fp128" [] TYPE_PPC_FP128 -> return $ typeTerm "%primitive.ppc_fp128" [] TYPE_LABEL -> return $ typeTerm "%primitive.label" [] TYPE_METADATA -> return $ typeTerm "%primitive.metadata" [] TYPE_X86_MMX -> return $ typeTerm "%primitive.x86_mmx" [] TYPE_INTEGER -> do sz <- lift $ cTypeSize tp return $ typeTerm ("%primitive.i" ++ show sz) [] TYPE_FUNCTION -> do rtp <- lift $ cTypeInner tp atps <- lift $ cTypeList tp innerTerms <- mapM (toTerm' varMap) (rtp : atps) return $ typeTerm "%primitive.function" innerTerms TYPE_ARRAY -> do sz <- lift $ cTypeSize tp itp <- lift $ cTypeInner tp ity <- toTerm' varMap itp return $ typeTerm ("%primitive.array." ++ show sz) [ity] TYPE_VECTOR -> do sz <- lift $ cTypeSize tp itp <- lift $ cTypeInner tp ity <- toTerm' varMap itp return $ typeTerm ("%primitive.vector." ++ show sz) [ity] TYPE_POINTER -> do -- If the struct is an opaque literal ptr {}*, assign it a fresh -- variable (we will allow such an opaque ref to be anything -- since the frontend seems to put them in strange places). itp <- lift $ cTypeInner tp innerTag <- lift $ cTypeTag itp case innerTag of TYPE_STRUCT -> do mptrs <- lift $ cTypeList itp case null mptrs of True -> UVar <$> freeVar False -> do ity <- toTerm' varMap itp return $ typeTerm "*" [ity] _ -> do ity <- toTerm' varMap itp return $ typeTerm "*" [ity] TYPE_STRUCT -> case M.lookup tp varMap of Nothing -> do mptrs <- lift $ cTypeList tp innerTerms <- mapM (toTerm' varMap) mptrs return $ typeTerm "%anon.struct" innerTerms Just v -> return v -- | Assign fresh unification variables to every struct type in the -- input map. assignVars :: HashMap String [TypePtr] -> IntBindingT T IO (Map TypePtr TTerm) assignVars = foldM assignVarL mempty . HM.toList where assignVarL acc (_, ts) = foldM assignVar acc ts assignVar acc ty = do x <- freeVar return $! M.insert ty (UVar x) acc -- | Take a list of types and group them by their base names (that is, -- the part of the name up to the trailing .NNN). groupByBaseName :: [TypePtr] -> IO (HashMap String [TypePtr]) groupByBaseName = foldM addToMap mempty where addToMap m tp = do -- All struct types that make it here are named. Just name <- cTypeName tp return $ HM.insertWith (++) (structBaseName name) [tp] m -- | Unify a list of terms unifies :: (Functor (e m), BindingMonad t v m, MonadTrans e, MonadError (UnificationFailure t v) (e m)) => [UTerm t v] -> e m () unifies [] = return () unifies [_] = return () unifies (y:ys) = go ys y where go [] _ = return () go (x:xs) z = unify x z >>= go xs -- Helpers -- | Like zip, but only returns a result if the two input lists are -- the same length pair :: [a] -> [b] -> Maybe [(a, b)] pair xs ys = case length xs == length ys of False -> Nothing True -> Just (zip xs ys) isStruct :: TypePtr -> IO Bool isStruct tp = do tag <- cTypeTag tp case tag of TYPE_STRUCT -> do n <- cTypeName tp case n of Nothing -> return False Just _ -> return True _ -> return False isNotOpaque :: TypePtr -> IO Bool isNotOpaque tp = do ptrs <- cTypeList tp return (not (null ptrs))