module LLVM.Extra.Extension.X86 (
X86.maxss, X86.minss, X86.maxps, X86.minps,
X86.maxsd, X86.minsd, X86.maxpd, X86.minpd,
cmpss, cmpps, cmpsd, cmppd, cmpps256, cmppd256,
pcmpgtb, pcmpgtw, pcmpgtd, pcmpgtq,
pcmpugtb, pcmpugtw, pcmpugtd, pcmpugtq,
pminsb, pminsw, pminsd,
pmaxsb, pmaxsw, pmaxsd,
pminub, pminuw, pminud,
pmaxub, pmaxuw, pmaxud,
pabsb, pabsw, pabsd,
pmuludq, pmuldq,
pmulld,
cvtps2dq, cvtpd2dq,
cvtdq2ps, cvtdq2pd,
ldmxcsr, stmxcsr, withMXCSR,
X86.haddps, X86.haddpd, X86.dpps, X86.dppd,
roundss, X86.roundps, roundsd, X86.roundpd,
absss, abssd, absps, abspd,
) where
import qualified LLVM.Extra.Extension.X86Auto as X86
import qualified LLVM.Extra.Extension as Ext
import LLVM.Extra.Extension.X86Auto (
V2Double, V4Float,
V2Int64, V2Word64,
V4Int32, V4Word32,
V8Int16, V8Word16,
V16Int8, V16Word8,
)
import LLVM.Extra.ExtensionCheck.X86
(sse1, sse2, sse41, sse42, )
import qualified LLVM.Extra.Monad as M
import qualified LLVM.Extra.ArithmeticPrivate as A
import qualified LLVM.Core as LLVM
import LLVM.Core
(Value, Vector, valueOf, constOf, vector,
CodeGenFunction, FPPredicate, )
import qualified Type.Data.Num.Decimal as TypeNum
import qualified Data.NonEmpty.Class as NonEmptyC
import qualified Data.Empty as Empty
import Data.NonEmpty ((!:), )
import Data.Bits (clearBit, complement, )
import Data.Word (Word8, Word32, Word64, )
import Control.Monad.HT ((<=<), )
import Control.Applicative (pure, )
import Foreign.Ptr (Ptr, )
switchFPPred ::
(Num i, LLVM.IsConst i, LLVM.IsInteger i, LLVM.IsPrimitive i,
LLVM.IsFirstClass v,
TypeNum.Positive n,
LLVM.IsSized v, LLVM.IsSized (Vector n i),
LLVM.SizeOf v ~ LLVM.SizeOf (Vector n i)) =>
(Value v -> Value v -> Value Word8 -> CodeGenFunction r (Value v)) ->
FPPredicate -> Value v -> Value v -> CodeGenFunction r (Value (Vector n i))
switchFPPred g p x y =
let f i x0 y0 = LLVM.bitcast =<< g x0 y0 (valueOf i)
in case p of
LLVM.FPFalse -> return (LLVM.value LLVM.zero)
LLVM.FPOEQ -> f 0 x y
LLVM.FPOGT -> f 1 y x
LLVM.FPOGE -> f 2 y x
LLVM.FPOLT -> f 1 x y
LLVM.FPOLE -> f 2 x y
LLVM.FPONE -> M.liftR2 A.and (f 7 x y) (f 4 x y)
LLVM.FPORD -> f 7 x y
LLVM.FPUNO -> f 3 x y
LLVM.FPUEQ -> M.liftR2 A.or (f 3 x y) (f 0 x y)
LLVM.FPUGT -> f 6 x y
LLVM.FPUGE -> f 5 x y
LLVM.FPULT -> f 6 y x
LLVM.FPULE -> f 5 y x
LLVM.FPUNE -> f 4 x y
LLVM.FPT -> return (valueOf $ pure (1))
cmpss :: Ext.T (FPPredicate -> V4Float -> V4Float -> CodeGenFunction r V4Int32)
cmpss = fmap switchFPPred X86.cmpss
cmpps :: Ext.T (FPPredicate -> V4Float -> V4Float -> CodeGenFunction r V4Int32)
cmpps = fmap switchFPPred X86.cmpps
cmpsd :: Ext.T (FPPredicate -> V2Double -> V2Double -> CodeGenFunction r V2Int64)
cmpsd = fmap switchFPPred X86.cmpsd
cmppd :: Ext.T (FPPredicate -> V2Double -> V2Double -> CodeGenFunction r V2Int64)
cmppd = fmap switchFPPred X86.cmppd
cmpps256 :: Ext.T (FPPredicate -> X86.V8Float -> X86.V8Float -> CodeGenFunction r X86.V8Int32)
cmpps256 = fmap switchFPPred X86.cmpps256
cmppd256 :: Ext.T (FPPredicate -> X86.V4Double -> X86.V4Double -> CodeGenFunction r X86.V4Int64)
cmppd256 = fmap switchFPPred X86.cmppd256
pcmpgtb :: Ext.T (V16Int8 -> V16Int8 -> CodeGenFunction r V16Int8)
pcmpgtb = Ext.intrinsic sse2 "pcmpgt.b"
pcmpgtw :: Ext.T (V8Int16 -> V8Int16 -> CodeGenFunction r V8Int16)
pcmpgtw = Ext.intrinsic sse2 "pcmpgt.w"
pcmpgtd :: Ext.T (V4Int32 -> V4Int32 -> CodeGenFunction r V4Int32)
pcmpgtd = Ext.intrinsic sse2 "pcmpgt.d"
pcmpgtq :: Ext.T (V2Int64 -> V2Int64 -> CodeGenFunction r V2Int64)
pcmpgtq = Ext.intrinsic sse42 "pcmpgtq"
pcmpuFromPcmp ::
(TypeNum.Positive n,
LLVM.IsPrimitive s,
LLVM.IsPrimitive u, LLVM.IsArithmetic u, LLVM.IsConst u,
Bounded u, Integral u,
LLVM.IsSized (Vector n s), LLVM.IsSized (Vector n u),
LLVM.SizeOf (Vector n s) ~ LLVM.SizeOf (Vector n u)) =>
Ext.T (Value (Vector n s) -> Value (Vector n s) -> CodeGenFunction r (Value (Vector n s))) ->
Ext.T (Value (Vector n u) -> Value (Vector n u) -> CodeGenFunction r (Value (Vector n u)))
pcmpuFromPcmp pcmp =
Ext.with pcmp $ \cmp x y -> do
let offset = valueOf $ pure (1 + div maxBound 2)
xa <- LLVM.bitcast =<< A.sub x offset
ya <- LLVM.bitcast =<< A.sub y offset
LLVM.bitcast =<< cmp xa ya
pcmpugtb :: Ext.T (V16Word8 -> V16Word8 -> CodeGenFunction r V16Word8)
pcmpugtb = pcmpuFromPcmp pcmpgtb
pcmpugtw :: Ext.T (V8Word16 -> V8Word16 -> CodeGenFunction r V8Word16)
pcmpugtw = pcmpuFromPcmp pcmpgtw
pcmpugtd :: Ext.T (V4Word32 -> V4Word32 -> CodeGenFunction r V4Word32)
pcmpugtd = pcmpuFromPcmp pcmpgtd
pcmpugtq :: Ext.T (V2Word64 -> V2Word64 -> CodeGenFunction r V2Word64)
pcmpugtq = pcmpuFromPcmp pcmpgtq
pminsb, pmaxsb :: Ext.T (V16Int8 -> V16Int8 -> CodeGenFunction r V16Int8)
pminsb = X86.pminsb128
pmaxsb = X86.pmaxsb128
pminsw, pmaxsw :: Ext.T (V8Int16 -> V8Int16 -> CodeGenFunction r V8Int16)
pminsw = X86.pminsw128
pmaxsw = X86.pmaxsw128
pminsd, pmaxsd :: Ext.T (V4Int32 -> V4Int32 -> CodeGenFunction r V4Int32)
pminsd = X86.pminsd128
pmaxsd = X86.pmaxsd128
pminub, pmaxub :: Ext.T (V16Word8 -> V16Word8 -> CodeGenFunction r V16Word8)
pminub = X86.pminub128
pmaxub = X86.pmaxub128
pminuw, pmaxuw :: Ext.T (V8Word16 -> V8Word16 -> CodeGenFunction r V8Word16)
pminuw = X86.pminuw128
pmaxuw = X86.pmaxuw128
pminud, pmaxud :: Ext.T (V4Word32 -> V4Word32 -> CodeGenFunction r V4Word32)
pminud = X86.pminud128
pmaxud = X86.pmaxud128
pabsb :: Ext.T (V16Int8 -> CodeGenFunction r V16Int8)
pabsb = X86.pabsb128
pabsw :: Ext.T (V8Int16 -> CodeGenFunction r V8Int16)
pabsw = X86.pabsw128
pabsd :: Ext.T (V4Int32 -> CodeGenFunction r V4Int32)
pabsd = X86.pabsd128
pmuludq :: Ext.T (V4Word32 -> V4Word32 -> CodeGenFunction r V2Word64)
pmuludq = X86.pmuludq128
pmuldq :: Ext.T (V4Int32 -> V4Int32 -> CodeGenFunction r V2Int64)
pmuldq = X86.pmuldq128
pmulld :: Ext.T (V4Word32 -> V4Word32 -> CodeGenFunction r V4Word32)
pmulld = Ext.wrap sse41 LLVM.mul
cvtps2dq :: Ext.T (V4Float -> CodeGenFunction r V4Int32)
cvtps2dq = X86.cvtps2dq
cvtpd2dq :: Ext.T (V2Double -> CodeGenFunction r V4Int32)
cvtpd2dq = X86.cvtpd2dq
cvtdq2ps :: Ext.T (V4Int32 -> CodeGenFunction r V4Float)
cvtdq2ps = X86.cvtdq2ps
cvtdq2pd :: Ext.T (V4Int32 -> CodeGenFunction r V2Double)
cvtdq2pd = X86.cvtdq2pd
valueUnit :: Value () -> ()
valueUnit _ = ()
ldmxcsr :: Ext.T (Value (Ptr Word32) -> CodeGenFunction r ())
ldmxcsr =
fmap (fmap valueUnit .) $ Ext.intrinsicAttr [] sse1 "ldmxcsr"
stmxcsr :: Ext.T (Value (Ptr Word32) -> CodeGenFunction r ())
stmxcsr =
fmap (fmap valueUnit .) $ Ext.intrinsicAttr [] sse1 "stmxcsr"
withMXCSR :: Word32 -> Ext.T (CodeGenFunction r a -> CodeGenFunction r a)
withMXCSR mxcsr =
Ext.with2 ldmxcsr stmxcsr $ \ ld st f -> do
mxcsrOld <- LLVM.alloca
st mxcsrOld
mxcsrFloor <- LLVM.alloca
LLVM.store (valueOf $ mxcsr) mxcsrFloor
ld mxcsrFloor
r <- f
ld mxcsrOld
return r
roundss :: Ext.T (V4Float -> Value Word32 -> CodeGenFunction r V4Float)
roundss =
fmap (\f -> f (LLVM.value LLVM.undef)) X86.roundss
roundsd :: Ext.T (V2Double -> Value Word32 -> CodeGenFunction r V2Double)
roundsd =
fmap (\f -> f (LLVM.value LLVM.undef)) X86.roundsd
absss :: Ext.T (V4Float -> CodeGenFunction r V4Float)
absss =
Ext.wrap sse1 $
LLVM.bitcast
<=< A.and (LLVM.valueOf $ vector $
(flip clearBit 31 $ complement 0) !: NonEmptyC.repeat (complement 0)
:: V4Word32)
<=< LLVM.bitcast
abssd :: Ext.T (V2Double -> CodeGenFunction r V2Double)
abssd =
Ext.wrap sse2 $
LLVM.bitcast
<=< A.and (LLVM.valueOf $ vector $
(flip clearBit 63 $ complement 0) !: complement 0 !: Empty.Cons
:: V2Word64)
<=< LLVM.bitcast
mask ::
(TypeNum.Positive n, LLVM.IsConst w, LLVM.IsPrimitive w, LLVM.IsInteger w) =>
w -> Value (Vector n w) -> CodeGenFunction r (Value (Vector n w))
mask x =
A.and (LLVM.valueOf $ pure x)
absps ::
(TypeNum.Positive n) =>
Ext.T (Value (Vector n Float) -> CodeGenFunction r (Value (Vector n Float)))
absps =
Ext.wrap sse1 $
LLVM.bitcastElements
<=< mask (flip clearBit 31 $ complement 0 :: Word32)
<=< LLVM.bitcastElements
abspd ::
(TypeNum.Positive n) =>
Ext.T (Value (Vector n Double) -> CodeGenFunction r (Value (Vector n Double)))
abspd =
Ext.wrap sse2 $
LLVM.bitcastElements
<=< mask (flip clearBit 63 $ complement 0 :: Word64)
<=< LLVM.bitcastElements
_cumulate1s :: Ext.T (V4Float -> CodeGenFunction r V4Float)
_cumulate1s = Ext.with X86.haddps $ \haddp x -> do
y <- haddp x (LLVM.value LLVM.undef)
z <- LLVM.shufflevector x y $
constOf $ vector $ 0!:4!:2!:5!:Empty.Cons
offset <- LLVM.shufflevector y (LLVM.value LLVM.zero) $
constOf $ vector $ 4!:5!:0!:0!:Empty.Cons
A.add z offset