{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE QuasiQuotes #-}
{-# LANGUAGE TupleSections #-}
module Futhark.CodeGen.Backends.COpenCL
( compileProg,
GC.CParts (..),
GC.asLibrary,
GC.asExecutable,
GC.asServer,
)
where
import Control.Monad hiding (mapM)
import Data.List (intercalate)
import Futhark.CodeGen.Backends.COpenCL.Boilerplate
import qualified Futhark.CodeGen.Backends.GenericC as GC
import Futhark.CodeGen.Backends.GenericC.Options
import Futhark.CodeGen.ImpCode.OpenCL
import qualified Futhark.CodeGen.ImpGen.OpenCL as ImpGen
import Futhark.IR.KernelsMem hiding
( CmpSizeLe,
GetSize,
GetSizeMax,
)
import Futhark.MonadFreshNames
import Futhark.Util.Pretty (prettyOneLine)
import qualified Language.C.Quote.OpenCL as C
import qualified Language.C.Syntax as C
compileProg :: MonadFreshNames m => Prog KernelsMem -> m (ImpGen.Warnings, GC.CParts)
compileProg :: Prog KernelsMem -> m (Warnings, CParts)
compileProg Prog KernelsMem
prog = do
( Warnings
ws,
Program
String
opencl_code
String
opencl_prelude
Map KernelName KernelSafety
kernels
[PrimType]
types
Map KernelName SizeClass
sizes
[FailureMsg]
failures
Definitions OpenCL
prog'
) <-
Prog KernelsMem -> m (Warnings, Program)
forall (m :: * -> *).
MonadFreshNames m =>
Prog KernelsMem -> m (Warnings, Program)
ImpGen.compileProg Prog KernelsMem
prog
let cost_centres :: [KernelName]
cost_centres =
[ KernelName
copyDevToDev,
KernelName
copyDevToHost,
KernelName
copyHostToDev,
KernelName
copyScalarToDev,
KernelName
copyScalarFromDev
]
(Warnings
ws,)
(CParts -> (Warnings, CParts)) -> m CParts -> m (Warnings, CParts)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> String
-> Operations OpenCL ()
-> CompilerM OpenCL () ()
-> String
-> [Space]
-> [Option]
-> Definitions OpenCL
-> m CParts
forall (m :: * -> *) op.
MonadFreshNames m =>
String
-> Operations op ()
-> CompilerM op () ()
-> String
-> [Space]
-> [Option]
-> Definitions op
-> m CParts
GC.compileProg
String
"opencl"
Operations OpenCL ()
operations
( String
-> String
-> [KernelName]
-> Map KernelName KernelSafety
-> [PrimType]
-> Map KernelName SizeClass
-> [FailureMsg]
-> CompilerM OpenCL () ()
generateBoilerplate
String
opencl_code
String
opencl_prelude
[KernelName]
cost_centres
Map KernelName KernelSafety
kernels
[PrimType]
types
Map KernelName SizeClass
sizes
[FailureMsg]
failures
)
String
include_opencl_h
[String -> Space
Space String
"device", Space
DefaultSpace]
[Option]
cliOptions
Definitions OpenCL
prog'
where
operations :: GC.Operations OpenCL ()
operations :: Operations OpenCL ()
operations =
Operations OpenCL ()
forall op s. Operations op s
GC.defaultOperations
{ opsCompiler :: OpCompiler OpenCL ()
GC.opsCompiler = OpCompiler OpenCL ()
callKernel,
opsWriteScalar :: WriteScalar OpenCL ()
GC.opsWriteScalar = WriteScalar OpenCL ()
writeOpenCLScalar,
opsReadScalar :: ReadScalar OpenCL ()
GC.opsReadScalar = ReadScalar OpenCL ()
readOpenCLScalar,
opsAllocate :: Allocate OpenCL ()
GC.opsAllocate = Allocate OpenCL ()
allocateOpenCLBuffer,
opsDeallocate :: Deallocate OpenCL ()
GC.opsDeallocate = Deallocate OpenCL ()
deallocateOpenCLBuffer,
opsCopy :: Copy OpenCL ()
GC.opsCopy = Copy OpenCL ()
copyOpenCLMemory,
opsStaticArray :: StaticArray OpenCL ()
GC.opsStaticArray = StaticArray OpenCL ()
staticOpenCLArray,
opsMemoryType :: MemoryType OpenCL ()
GC.opsMemoryType = MemoryType OpenCL ()
openclMemoryType,
opsFatMemory :: Bool
GC.opsFatMemory = Bool
True
}
include_opencl_h :: String
include_opencl_h =
[String] -> String
unlines
[ String
"#define CL_TARGET_OPENCL_VERSION 120",
String
"#define CL_USE_DEPRECATED_OPENCL_1_2_APIS",
String
"#ifdef __APPLE__",
String
"#define CL_SILENCE_DEPRECATION",
String
"#include <OpenCL/cl.h>",
String
"#else",
String
"#include <CL/cl.h>",
String
"#endif"
]
cliOptions :: [Option]
cliOptions :: [Option]
cliOptions =
[Option]
commonOptions
[Option] -> [Option] -> [Option]
forall a. [a] -> [a] -> [a]
++ [ Option :: String -> Maybe Char -> OptionArgument -> String -> Stm -> Option
Option
{ optionLongName :: String
optionLongName = String
"platform",
optionShortName :: Maybe Char
optionShortName = Char -> Maybe Char
forall a. a -> Maybe a
Just Char
'p',
optionArgument :: OptionArgument
optionArgument = String -> OptionArgument
RequiredArgument String
"NAME",
optionDescription :: String
optionDescription = String
"Use the first OpenCL platform whose name contains the given string.",
optionAction :: Stm
optionAction = [C.cstm|futhark_context_config_set_platform(cfg, optarg);|]
},
Option :: String -> Maybe Char -> OptionArgument -> String -> Stm -> Option
Option
{ optionLongName :: String
optionLongName = String
"dump-opencl",
optionShortName :: Maybe Char
optionShortName = Maybe Char
forall a. Maybe a
Nothing,
optionArgument :: OptionArgument
optionArgument = String -> OptionArgument
RequiredArgument String
"FILE",
optionDescription :: String
optionDescription = String
"Dump the embedded OpenCL program to the indicated file.",
optionAction :: Stm
optionAction =
[C.cstm|{futhark_context_config_dump_program_to(cfg, optarg);
entry_point = NULL;}|]
},
Option :: String -> Maybe Char -> OptionArgument -> String -> Stm -> Option
Option
{ optionLongName :: String
optionLongName = String
"load-opencl",
optionShortName :: Maybe Char
optionShortName = Maybe Char
forall a. Maybe a
Nothing,
optionArgument :: OptionArgument
optionArgument = String -> OptionArgument
RequiredArgument String
"FILE",
optionDescription :: String
optionDescription = String
"Instead of using the embedded OpenCL program, load it from the indicated file.",
optionAction :: Stm
optionAction = [C.cstm|futhark_context_config_load_program_from(cfg, optarg);|]
},
Option :: String -> Maybe Char -> OptionArgument -> String -> Stm -> Option
Option
{ optionLongName :: String
optionLongName = String
"dump-opencl-binary",
optionShortName :: Maybe Char
optionShortName = Maybe Char
forall a. Maybe a
Nothing,
optionArgument :: OptionArgument
optionArgument = String -> OptionArgument
RequiredArgument String
"FILE",
optionDescription :: String
optionDescription = String
"Dump the compiled version of the embedded OpenCL program to the indicated file.",
optionAction :: Stm
optionAction =
[C.cstm|{futhark_context_config_dump_binary_to(cfg, optarg);
entry_point = NULL;}|]
},
Option :: String -> Maybe Char -> OptionArgument -> String -> Stm -> Option
Option
{ optionLongName :: String
optionLongName = String
"load-opencl-binary",
optionShortName :: Maybe Char
optionShortName = Maybe Char
forall a. Maybe a
Nothing,
optionArgument :: OptionArgument
optionArgument = String -> OptionArgument
RequiredArgument String
"FILE",
optionDescription :: String
optionDescription = String
"Load an OpenCL binary from the indicated file.",
optionAction :: Stm
optionAction = [C.cstm|futhark_context_config_load_binary_from(cfg, optarg);|]
},
Option :: String -> Maybe Char -> OptionArgument -> String -> Stm -> Option
Option
{ optionLongName :: String
optionLongName = String
"build-option",
optionShortName :: Maybe Char
optionShortName = Maybe Char
forall a. Maybe a
Nothing,
optionArgument :: OptionArgument
optionArgument = String -> OptionArgument
RequiredArgument String
"OPT",
optionDescription :: String
optionDescription = String
"Add an additional build option to the string passed to clBuildProgram().",
optionAction :: Stm
optionAction = [C.cstm|futhark_context_config_add_build_option(cfg, optarg);|]
},
Option :: String -> Maybe Char -> OptionArgument -> String -> Stm -> Option
Option
{ optionLongName :: String
optionLongName = String
"profile",
optionShortName :: Maybe Char
optionShortName = Char -> Maybe Char
forall a. a -> Maybe a
Just Char
'P',
optionArgument :: OptionArgument
optionArgument = OptionArgument
NoArgument,
optionDescription :: String
optionDescription = String
"Gather profiling data while executing and print out a summary at the end.",
optionAction :: Stm
optionAction = [C.cstm|futhark_context_config_set_profiling(cfg, 1);|]
},
Option :: String -> Maybe Char -> OptionArgument -> String -> Stm -> Option
Option
{ optionLongName :: String
optionLongName = String
"list-devices",
optionShortName :: Maybe Char
optionShortName = Maybe Char
forall a. Maybe a
Nothing,
optionArgument :: OptionArgument
optionArgument = OptionArgument
NoArgument,
optionDescription :: String
optionDescription = String
"List all OpenCL devices and platforms available on the system.",
optionAction :: Stm
optionAction =
[C.cstm|{futhark_context_config_list_devices(cfg);
entry_point = NULL;}|]
}
]
writeOpenCLScalar :: GC.WriteScalar OpenCL ()
writeOpenCLScalar :: WriteScalar OpenCL ()
writeOpenCLScalar Exp
mem Exp
i Type
t String
"device" Volatility
_ Exp
val = do
VName
val' <- String -> CompilerM OpenCL () VName
forall (m :: * -> *). MonadFreshNames m => String -> m VName
newVName String
"write_tmp"
let (BlockItem
decl, Exp
blocking) =
case Exp
val of
C.Const {} -> ([C.citem|static $ty:t $id:val' = $exp:val;|], [C.cexp|CL_FALSE|])
Exp
_ -> ([C.citem|$ty:t $id:val' = $exp:val;|], [C.cexp|CL_TRUE|])
Stm -> CompilerM OpenCL () ()
forall op s. Stm -> CompilerM op s ()
GC.stm
[C.cstm|{$item:decl
OPENCL_SUCCEED_OR_RETURN(
clEnqueueWriteBuffer(ctx->opencl.queue, $exp:mem, $exp:blocking,
$exp:i * sizeof($ty:t), sizeof($ty:t),
&$id:val',
0, NULL, $exp:(profilingEvent copyScalarToDev)));
}|]
writeOpenCLScalar Exp
_ Exp
_ Type
_ String
space Volatility
_ Exp
_ =
String -> CompilerM OpenCL () ()
forall a. HasCallStack => String -> a
error (String -> CompilerM OpenCL () ())
-> String -> CompilerM OpenCL () ()
forall a b. (a -> b) -> a -> b
$ String
"Cannot write to '" String -> String -> String
forall a. [a] -> [a] -> [a]
++ String
space String -> String -> String
forall a. [a] -> [a] -> [a]
++ String
"' memory space."
readOpenCLScalar :: GC.ReadScalar OpenCL ()
readOpenCLScalar :: ReadScalar OpenCL ()
readOpenCLScalar Exp
mem Exp
i Type
t String
"device" Volatility
_ = do
VName
val <- String -> CompilerM OpenCL () VName
forall (m :: * -> *). MonadFreshNames m => String -> m VName
newVName String
"read_res"
InitGroup -> CompilerM OpenCL () ()
forall op s. InitGroup -> CompilerM op s ()
GC.decl [C.cdecl|$ty:t $id:val;|]
Stm -> CompilerM OpenCL () ()
forall op s. Stm -> CompilerM op s ()
GC.stm
[C.cstm|OPENCL_SUCCEED_OR_RETURN(
clEnqueueReadBuffer(ctx->opencl.queue, $exp:mem,
ctx->failure_is_an_option ? CL_FALSE : CL_TRUE,
$exp:i * sizeof($ty:t), sizeof($ty:t),
&$id:val,
0, NULL, $exp:(profilingEvent copyScalarFromDev)));
|]
Stm -> CompilerM OpenCL () ()
forall op s. Stm -> CompilerM op s ()
GC.stm
[C.cstm|if (ctx->failure_is_an_option &&
futhark_context_sync(ctx) != 0) { return 1; }|]
Exp -> CompilerM OpenCL () Exp
forall (m :: * -> *) a. Monad m => a -> m a
return [C.cexp|$id:val|]
readOpenCLScalar Exp
_ Exp
_ Type
_ String
space Volatility
_ =
String -> CompilerM OpenCL () Exp
forall a. HasCallStack => String -> a
error (String -> CompilerM OpenCL () Exp)
-> String -> CompilerM OpenCL () Exp
forall a b. (a -> b) -> a -> b
$ String
"Cannot read from '" String -> String -> String
forall a. [a] -> [a] -> [a]
++ String
space String -> String -> String
forall a. [a] -> [a] -> [a]
++ String
"' memory space."
allocateOpenCLBuffer :: GC.Allocate OpenCL ()
allocateOpenCLBuffer :: Allocate OpenCL ()
allocateOpenCLBuffer Exp
mem Exp
size Exp
tag String
"device" =
Stm -> CompilerM OpenCL () ()
forall op s. Stm -> CompilerM op s ()
GC.stm [C.cstm|OPENCL_SUCCEED_OR_RETURN(opencl_alloc(&ctx->opencl, $exp:size, $exp:tag, &$exp:mem));|]
allocateOpenCLBuffer Exp
_ Exp
_ Exp
_ String
space =
String -> CompilerM OpenCL () ()
forall a. HasCallStack => String -> a
error (String -> CompilerM OpenCL () ())
-> String -> CompilerM OpenCL () ()
forall a b. (a -> b) -> a -> b
$ String
"Cannot allocate in '" String -> String -> String
forall a. [a] -> [a] -> [a]
++ String
space String -> String -> String
forall a. [a] -> [a] -> [a]
++ String
"' space."
deallocateOpenCLBuffer :: GC.Deallocate OpenCL ()
deallocateOpenCLBuffer :: Deallocate OpenCL ()
deallocateOpenCLBuffer Exp
mem Exp
tag String
"device" =
Stm -> CompilerM OpenCL () ()
forall op s. Stm -> CompilerM op s ()
GC.stm [C.cstm|OPENCL_SUCCEED_OR_RETURN(opencl_free(&ctx->opencl, $exp:mem, $exp:tag));|]
deallocateOpenCLBuffer Exp
_ Exp
_ String
space =
String -> CompilerM OpenCL () ()
forall a. HasCallStack => String -> a
error (String -> CompilerM OpenCL () ())
-> String -> CompilerM OpenCL () ()
forall a b. (a -> b) -> a -> b
$ String
"Cannot deallocate in '" String -> String -> String
forall a. [a] -> [a] -> [a]
++ String
space String -> String -> String
forall a. [a] -> [a] -> [a]
++ String
"' space"
copyOpenCLMemory :: GC.Copy OpenCL ()
copyOpenCLMemory :: Copy OpenCL ()
copyOpenCLMemory Exp
destmem Exp
destidx Space
DefaultSpace Exp
srcmem Exp
srcidx (Space String
"device") Exp
nbytes =
Stm -> CompilerM OpenCL () ()
forall op s. Stm -> CompilerM op s ()
GC.stm
[C.cstm|
if ($exp:nbytes > 0) {
OPENCL_SUCCEED_OR_RETURN(
clEnqueueReadBuffer(ctx->opencl.queue, $exp:srcmem,
ctx->failure_is_an_option ? CL_FALSE : CL_TRUE,
$exp:srcidx, $exp:nbytes,
$exp:destmem + $exp:destidx,
0, NULL, $exp:(profilingEvent copyHostToDev)));
if (ctx->failure_is_an_option &&
futhark_context_sync(ctx) != 0) { return 1; }
}
|]
copyOpenCLMemory Exp
destmem Exp
destidx (Space String
"device") Exp
srcmem Exp
srcidx Space
DefaultSpace Exp
nbytes =
Stm -> CompilerM OpenCL () ()
forall op s. Stm -> CompilerM op s ()
GC.stm
[C.cstm|
if ($exp:nbytes > 0) {
OPENCL_SUCCEED_OR_RETURN(
clEnqueueWriteBuffer(ctx->opencl.queue, $exp:destmem, CL_TRUE,
$exp:destidx, $exp:nbytes,
$exp:srcmem + $exp:srcidx,
0, NULL, $exp:(profilingEvent copyDevToHost)));
}
|]
copyOpenCLMemory Exp
destmem Exp
destidx (Space String
"device") Exp
srcmem Exp
srcidx (Space String
"device") Exp
nbytes =
Stm -> CompilerM OpenCL () ()
forall op s. Stm -> CompilerM op s ()
GC.stm
[C.cstm|{
if ($exp:nbytes > 0) {
OPENCL_SUCCEED_OR_RETURN(
clEnqueueCopyBuffer(ctx->opencl.queue,
$exp:srcmem, $exp:destmem,
$exp:srcidx, $exp:destidx,
$exp:nbytes,
0, NULL, $exp:(profilingEvent copyDevToDev)));
if (ctx->debugging) {
OPENCL_SUCCEED_FATAL(clFinish(ctx->opencl.queue));
}
}
}|]
copyOpenCLMemory Exp
destmem Exp
destidx Space
DefaultSpace Exp
srcmem Exp
srcidx Space
DefaultSpace Exp
nbytes =
Exp -> Exp -> Exp -> Exp -> Exp -> CompilerM OpenCL () ()
forall op s. Exp -> Exp -> Exp -> Exp -> Exp -> CompilerM op s ()
GC.copyMemoryDefaultSpace Exp
destmem Exp
destidx Exp
srcmem Exp
srcidx Exp
nbytes
copyOpenCLMemory Exp
_ Exp
_ Space
destspace Exp
_ Exp
_ Space
srcspace Exp
_ =
String -> CompilerM OpenCL () ()
forall a. HasCallStack => String -> a
error (String -> CompilerM OpenCL () ())
-> String -> CompilerM OpenCL () ()
forall a b. (a -> b) -> a -> b
$ String
"Cannot copy to " String -> String -> String
forall a. [a] -> [a] -> [a]
++ Space -> String
forall a. Show a => a -> String
show Space
destspace String -> String -> String
forall a. [a] -> [a] -> [a]
++ String
" from " String -> String -> String
forall a. [a] -> [a] -> [a]
++ Space -> String
forall a. Show a => a -> String
show Space
srcspace
openclMemoryType :: GC.MemoryType OpenCL ()
openclMemoryType :: MemoryType OpenCL ()
openclMemoryType String
"device" = Type -> CompilerM OpenCL () Type
forall (f :: * -> *) a. Applicative f => a -> f a
pure [C.cty|typename cl_mem|]
openclMemoryType String
space =
MemoryType OpenCL ()
forall a. HasCallStack => String -> a
error MemoryType OpenCL () -> MemoryType OpenCL ()
forall a b. (a -> b) -> a -> b
$ String
"OpenCL backend does not support '" String -> String -> String
forall a. [a] -> [a] -> [a]
++ String
space String -> String -> String
forall a. [a] -> [a] -> [a]
++ String
"' memory space."
staticOpenCLArray :: GC.StaticArray OpenCL ()
staticOpenCLArray :: StaticArray OpenCL ()
staticOpenCLArray VName
name String
"device" PrimType
t ArrayContents
vs = do
let ct :: Type
ct = PrimType -> Type
GC.primTypeToCType PrimType
t
VName
name_realtype <- String -> CompilerM OpenCL () VName
forall (m :: * -> *). MonadFreshNames m => String -> m VName
newVName (String -> CompilerM OpenCL () VName)
-> String -> CompilerM OpenCL () VName
forall a b. (a -> b) -> a -> b
$ VName -> String
baseString VName
name String -> String -> String
forall a. [a] -> [a] -> [a]
++ String
"_realtype"
Int
num_elems <- case ArrayContents
vs of
ArrayValues [PrimValue]
vs' -> do
let vs'' :: [Initializer]
vs'' = [[C.cinit|$exp:v|] | Exp
v <- (PrimValue -> Exp) -> [PrimValue] -> [Exp]
forall a b. (a -> b) -> [a] -> [b]
map PrimValue -> Exp
GC.compilePrimValue [PrimValue]
vs']
Definition -> CompilerM OpenCL () ()
forall op s. Definition -> CompilerM op s ()
GC.earlyDecl [C.cedecl|static $ty:ct $id:name_realtype[$int:(length vs'')] = {$inits:vs''};|]
Int -> CompilerM OpenCL () Int
forall (m :: * -> *) a. Monad m => a -> m a
return (Int -> CompilerM OpenCL () Int) -> Int -> CompilerM OpenCL () Int
forall a b. (a -> b) -> a -> b
$ [Initializer] -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length [Initializer]
vs''
ArrayZeros Int
n -> do
Definition -> CompilerM OpenCL () ()
forall op s. Definition -> CompilerM op s ()
GC.earlyDecl [C.cedecl|static $ty:ct $id:name_realtype[$int:n];|]
Int -> CompilerM OpenCL () Int
forall (m :: * -> *) a. Monad m => a -> m a
return Int
n
Id -> Type -> Maybe Exp -> CompilerM OpenCL () ()
forall op s. Id -> Type -> Maybe Exp -> CompilerM op s ()
GC.contextField (VName -> SrcLoc -> Id
forall a. ToIdent a => a -> SrcLoc -> Id
C.toIdent VName
name SrcLoc
forall a. Monoid a => a
mempty) [C.cty|struct memblock_device|] Maybe Exp
forall a. Maybe a
Nothing
Stm -> CompilerM OpenCL () ()
forall op s. Stm -> CompilerM op s ()
GC.atInit
[C.cstm|{
typename cl_int success;
ctx->$id:name.references = NULL;
ctx->$id:name.size = 0;
ctx->$id:name.mem =
clCreateBuffer(ctx->opencl.ctx, CL_MEM_READ_WRITE,
($int:num_elems > 0 ? $int:num_elems : 1)*sizeof($ty:ct), NULL,
&success);
OPENCL_SUCCEED_OR_RETURN(success);
if ($int:num_elems > 0) {
OPENCL_SUCCEED_OR_RETURN(
clEnqueueWriteBuffer(ctx->opencl.queue, ctx->$id:name.mem, CL_TRUE,
0, $int:num_elems*sizeof($ty:ct),
$id:name_realtype,
0, NULL, NULL));
}
}|]
BlockItem -> CompilerM OpenCL () ()
forall op s. BlockItem -> CompilerM op s ()
GC.item [C.citem|struct memblock_device $id:name = ctx->$id:name;|]
staticOpenCLArray VName
_ String
space PrimType
_ ArrayContents
_ =
String -> CompilerM OpenCL () ()
forall a. HasCallStack => String -> a
error (String -> CompilerM OpenCL () ())
-> String -> CompilerM OpenCL () ()
forall a b. (a -> b) -> a -> b
$ String
"OpenCL backend cannot create static array in memory space '" String -> String -> String
forall a. [a] -> [a] -> [a]
++ String
space String -> String -> String
forall a. [a] -> [a] -> [a]
++ String
"'"
callKernel :: GC.OpCompiler OpenCL ()
callKernel :: OpCompiler OpenCL ()
callKernel (GetSize VName
v KernelName
key) =
Stm -> CompilerM OpenCL () ()
forall op s. Stm -> CompilerM op s ()
GC.stm [C.cstm|$id:v = ctx->sizes.$id:key;|]
callKernel (CmpSizeLe VName
v KernelName
key Exp
x) = do
Exp
x' <- Exp -> CompilerM OpenCL () Exp
forall op s. Exp -> CompilerM op s Exp
GC.compileExp Exp
x
Stm -> CompilerM OpenCL () ()
forall op s. Stm -> CompilerM op s ()
GC.stm [C.cstm|$id:v = ctx->sizes.$id:key <= $exp:x';|]
Stm -> CompilerM OpenCL () ()
forall op s. Stm -> CompilerM op s ()
GC.stm
[C.cstm|if (ctx->logging) {
fprintf(ctx->log, "Compared %s <= %ld: %s.\n", $string:(prettyOneLine key), (long)$exp:x', $id:v ? "true" : "false");
}|]
callKernel (GetSizeMax VName
v SizeClass
size_class) =
let field :: String
field = String
"max_" String -> String -> String
forall a. [a] -> [a] -> [a]
++ SizeClass -> String
forall a. Pretty a => a -> String
pretty SizeClass
size_class
in Stm -> CompilerM OpenCL () ()
forall op s. Stm -> CompilerM op s ()
GC.stm [C.cstm|$id:v = ctx->opencl.$id:field;|]
callKernel (LaunchKernel KernelSafety
safety KernelName
name [KernelArg]
args [Exp]
num_workgroups [Exp]
workgroup_size) = do
Bool -> CompilerM OpenCL () () -> CompilerM OpenCL () ()
forall (f :: * -> *). Applicative f => Bool -> f () -> f ()
when (KernelSafety
safety KernelSafety -> KernelSafety -> Bool
forall a. Eq a => a -> a -> Bool
== KernelSafety
SafetyFull) (CompilerM OpenCL () () -> CompilerM OpenCL () ())
-> CompilerM OpenCL () () -> CompilerM OpenCL () ()
forall a b. (a -> b) -> a -> b
$
Stm -> CompilerM OpenCL () ()
forall op s. Stm -> CompilerM op s ()
GC.stm
[C.cstm|
OPENCL_SUCCEED_OR_RETURN(clSetKernelArg(ctx->$id:name, 1,
sizeof(ctx->failure_is_an_option),
&ctx->failure_is_an_option));
|]
(Int -> KernelArg -> CompilerM OpenCL () ())
-> [Int] -> [KernelArg] -> CompilerM OpenCL () ()
forall (m :: * -> *) a b c.
Applicative m =>
(a -> b -> m c) -> [a] -> [b] -> m ()
zipWithM_ Int -> KernelArg -> CompilerM OpenCL () ()
forall a op s.
(Show a, Integral a) =>
a -> KernelArg -> CompilerM op s ()
setKernelArg [KernelSafety -> Int
numFailureParams KernelSafety
safety ..] [KernelArg]
args
[Exp]
num_workgroups' <- (Exp -> CompilerM OpenCL () Exp)
-> [Exp] -> CompilerM OpenCL () [Exp]
forall (t :: * -> *) (m :: * -> *) a b.
(Traversable t, Monad m) =>
(a -> m b) -> t a -> m (t b)
mapM Exp -> CompilerM OpenCL () Exp
forall op s. Exp -> CompilerM op s Exp
GC.compileExp [Exp]
num_workgroups
[Exp]
workgroup_size' <- (Exp -> CompilerM OpenCL () Exp)
-> [Exp] -> CompilerM OpenCL () [Exp]
forall (t :: * -> *) (m :: * -> *) a b.
(Traversable t, Monad m) =>
(a -> m b) -> t a -> m (t b)
mapM Exp -> CompilerM OpenCL () Exp
forall op s. Exp -> CompilerM op s Exp
GC.compileExp [Exp]
workgroup_size
Exp
local_bytes <- (Exp -> KernelArg -> CompilerM OpenCL () Exp)
-> Exp -> [KernelArg] -> CompilerM OpenCL () Exp
forall (t :: * -> *) (m :: * -> *) b a.
(Foldable t, Monad m) =>
(b -> a -> m b) -> b -> t a -> m b
foldM Exp -> KernelArg -> CompilerM OpenCL () Exp
forall op s. Exp -> KernelArg -> CompilerM op s Exp
localBytes [C.cexp|0|] [KernelArg]
args
KernelName -> [Exp] -> [Exp] -> Exp -> CompilerM OpenCL () ()
forall a op s.
ToExp a =>
KernelName -> [a] -> [a] -> a -> CompilerM op s ()
launchKernel KernelName
name [Exp]
num_workgroups' [Exp]
workgroup_size' Exp
local_bytes
Bool -> CompilerM OpenCL () () -> CompilerM OpenCL () ()
forall (f :: * -> *). Applicative f => Bool -> f () -> f ()
when (KernelSafety
safety KernelSafety -> KernelSafety -> Bool
forall a. Ord a => a -> a -> Bool
>= KernelSafety
SafetyFull) (CompilerM OpenCL () () -> CompilerM OpenCL () ())
-> CompilerM OpenCL () () -> CompilerM OpenCL () ()
forall a b. (a -> b) -> a -> b
$
Stm -> CompilerM OpenCL () ()
forall op s. Stm -> CompilerM op s ()
GC.stm [C.cstm|ctx->failure_is_an_option = 1;|]
where
setKernelArg :: a -> KernelArg -> CompilerM op s ()
setKernelArg a
i (ValueKArg Exp
e PrimType
bt) = do
VName
v <- String -> PrimType -> Exp -> CompilerM op s VName
forall op s. String -> PrimType -> Exp -> CompilerM op s VName
GC.compileExpToName String
"kernel_arg" PrimType
bt Exp
e
Stm -> CompilerM op s ()
forall op s. Stm -> CompilerM op s ()
GC.stm
[C.cstm|
OPENCL_SUCCEED_OR_RETURN(clSetKernelArg(ctx->$id:name, $int:i, sizeof($id:v), &$id:v));
|]
setKernelArg a
i (MemKArg VName
v) = do
Exp
v' <- VName -> CompilerM op s Exp
forall op s. VName -> CompilerM op s Exp
GC.rawMem VName
v
Stm -> CompilerM op s ()
forall op s. Stm -> CompilerM op s ()
GC.stm
[C.cstm|
OPENCL_SUCCEED_OR_RETURN(clSetKernelArg(ctx->$id:name, $int:i, sizeof($exp:v'), &$exp:v'));
|]
setKernelArg a
i (SharedMemoryKArg Count Bytes Exp
num_bytes) = do
Exp
num_bytes' <- Exp -> CompilerM op s Exp
forall op s. Exp -> CompilerM op s Exp
GC.compileExp (Exp -> CompilerM op s Exp) -> Exp -> CompilerM op s Exp
forall a b. (a -> b) -> a -> b
$ Count Bytes Exp -> Exp
forall u e. Count u e -> e
unCount Count Bytes Exp
num_bytes
Stm -> CompilerM op s ()
forall op s. Stm -> CompilerM op s ()
GC.stm
[C.cstm|
OPENCL_SUCCEED_OR_RETURN(clSetKernelArg(ctx->$id:name, $int:i, $exp:num_bytes', NULL));
|]
localBytes :: Exp -> KernelArg -> CompilerM op s Exp
localBytes Exp
cur (SharedMemoryKArg Count Bytes Exp
num_bytes) = do
Exp
num_bytes' <- Exp -> CompilerM op s Exp
forall op s. Exp -> CompilerM op s Exp
GC.compileExp (Exp -> CompilerM op s Exp) -> Exp -> CompilerM op s Exp
forall a b. (a -> b) -> a -> b
$ Count Bytes Exp -> Exp
forall u e. Count u e -> e
unCount Count Bytes Exp
num_bytes
Exp -> CompilerM op s Exp
forall (m :: * -> *) a. Monad m => a -> m a
return [C.cexp|$exp:cur + $exp:num_bytes'|]
localBytes Exp
cur KernelArg
_ = Exp -> CompilerM op s Exp
forall (m :: * -> *) a. Monad m => a -> m a
return Exp
cur
launchKernel ::
C.ToExp a =>
KernelName ->
[a] ->
[a] ->
a ->
GC.CompilerM op s ()
launchKernel :: KernelName -> [a] -> [a] -> a -> CompilerM op s ()
launchKernel KernelName
kernel_name [a]
num_workgroups [a]
workgroup_dims a
local_bytes = do
VName
global_work_size <- String -> CompilerM op s VName
forall (m :: * -> *). MonadFreshNames m => String -> m VName
newVName String
"global_work_size"
VName
time_start <- String -> CompilerM op s VName
forall (m :: * -> *). MonadFreshNames m => String -> m VName
newVName String
"time_start"
VName
time_end <- String -> CompilerM op s VName
forall (m :: * -> *). MonadFreshNames m => String -> m VName
newVName String
"time_end"
VName
time_diff <- String -> CompilerM op s VName
forall (m :: * -> *). MonadFreshNames m => String -> m VName
newVName String
"time_diff"
VName
local_work_size <- String -> CompilerM op s VName
forall (m :: * -> *). MonadFreshNames m => String -> m VName
newVName String
"local_work_size"
Stm -> CompilerM op s ()
forall op s. Stm -> CompilerM op s ()
GC.stm
[C.cstm|
if ($exp:total_elements != 0) {
const size_t $id:global_work_size[$int:kernel_rank] = {$inits:kernel_dims'};
const size_t $id:local_work_size[$int:kernel_rank] = {$inits:workgroup_dims'};
typename int64_t $id:time_start = 0, $id:time_end = 0;
if (ctx->debugging) {
fprintf(ctx->log, "Launching %s with global work size [", $string:(pretty kernel_name));
$stms:(printKernelSize global_work_size)
fprintf(ctx->log, "] and local work size [");
$stms:(printKernelSize local_work_size)
fprintf(ctx->log, "]; local memory parameters sum to %d bytes.\n", (int)$exp:local_bytes);
$id:time_start = get_wall_time();
}
OPENCL_SUCCEED_OR_RETURN(
clEnqueueNDRangeKernel(ctx->opencl.queue, ctx->$id:kernel_name, $int:kernel_rank, NULL,
$id:global_work_size, $id:local_work_size,
0, NULL, $exp:(profilingEvent kernel_name)));
if (ctx->debugging) {
OPENCL_SUCCEED_FATAL(clFinish(ctx->opencl.queue));
$id:time_end = get_wall_time();
long int $id:time_diff = $id:time_end - $id:time_start;
fprintf(ctx->log, "kernel %s runtime: %ldus\n",
$string:(pretty kernel_name), $id:time_diff);
}
}|]
where
kernel_rank :: Int
kernel_rank = [Exp] -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length [Exp]
kernel_dims
kernel_dims :: [Exp]
kernel_dims = (Exp -> Exp -> Exp) -> [Exp] -> [Exp] -> [Exp]
forall a b c. (a -> b -> c) -> [a] -> [b] -> [c]
zipWith Exp -> Exp -> Exp
forall a a. (ToExp a, ToExp a) => a -> a -> Exp
multExp ((a -> Exp) -> [a] -> [Exp]
forall a b. (a -> b) -> [a] -> [b]
map a -> Exp
forall a. ToExp a => a -> Exp
toSize [a]
num_workgroups) ((a -> Exp) -> [a] -> [Exp]
forall a b. (a -> b) -> [a] -> [b]
map a -> Exp
forall a. ToExp a => a -> Exp
toSize [a]
workgroup_dims)
kernel_dims' :: [Initializer]
kernel_dims' = (Exp -> Initializer) -> [Exp] -> [Initializer]
forall a b. (a -> b) -> [a] -> [b]
map Exp -> Initializer
forall a. ToExp a => a -> Initializer
toInit [Exp]
kernel_dims
workgroup_dims' :: [Initializer]
workgroup_dims' = (a -> Initializer) -> [a] -> [Initializer]
forall a b. (a -> b) -> [a] -> [b]
map a -> Initializer
forall a. ToExp a => a -> Initializer
toInit [a]
workgroup_dims
total_elements :: Exp
total_elements = (Exp -> Exp -> Exp) -> Exp -> [Exp] -> Exp
forall (t :: * -> *) b a.
Foldable t =>
(b -> a -> b) -> b -> t a -> b
foldl Exp -> Exp -> Exp
forall a a. (ToExp a, ToExp a) => a -> a -> Exp
multExp [C.cexp|1|] [Exp]
kernel_dims
toInit :: a -> Initializer
toInit a
e = [C.cinit|$exp:e|]
multExp :: a -> a -> Exp
multExp a
x a
y = [C.cexp|$exp:x * $exp:y|]
toSize :: a -> Exp
toSize a
e = [C.cexp|(size_t)$exp:e|]
printKernelSize :: VName -> [C.Stm]
printKernelSize :: VName -> [Stm]
printKernelSize VName
work_size =
[Stm] -> [[Stm]] -> [Stm]
forall a. [a] -> [[a]] -> [a]
intercalate [[C.cstm|fprintf(ctx->log, ", ");|]] ([[Stm]] -> [Stm]) -> [[Stm]] -> [Stm]
forall a b. (a -> b) -> a -> b
$
(Int -> [Stm]) -> [Int] -> [[Stm]]
forall a b. (a -> b) -> [a] -> [b]
map (VName -> Int -> [Stm]
forall a a. (ToIdent a, Show a, Integral a) => a -> a -> [Stm]
printKernelDim VName
work_size) [Int
0 .. Int
kernel_rank Int -> Int -> Int
forall a. Num a => a -> a -> a
-Int
1]
printKernelDim :: a -> a -> [Stm]
printKernelDim a
global_work_size a
i =
[[C.cstm|fprintf(ctx->log, "%zu", $id:global_work_size[$int:i]);|]]