1 /* CUDA Driver API description.
2 Copyright (C) 2017-2023 Free Software Foundation, Inc.
3
4 This file is part of GCC.
5
6 GCC is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 3, or (at your option)
9 any later version.
10
11 GCC is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
15
16 Under Section 7 of GPL version 3, you are granted additional
17 permissions described in the GCC Runtime Library Exception, version
18 3.1, as published by the Free Software Foundation.
19
20 You should have received a copy of the GNU General Public License and
21 a copy of the GCC Runtime Library Exception along with this program;
22 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
23 <http://www.gnu.org/licenses/>.
24
25 This header provides parts of the CUDA Driver API, without having to rely on
26 the proprietary CUDA toolkit. */
27
28 #ifndef GCC_CUDA_H
29 #define GCC_CUDA_H
30
31 #include <stdlib.h>
32
33 #define CUDA_VERSION 8000
34
35 #ifdef __cplusplus
36 extern "C" {
37 #endif
38
39 typedef void *CUcontext;
40 typedef int CUdevice;
41 #if defined(__LP64__) || defined(_WIN64)
42 typedef unsigned long long CUdeviceptr;
43 #else
44 typedef unsigned CUdeviceptr;
45 #endif
46 typedef void *CUevent;
47 typedef void *CUfunction;
48 typedef void *CUlinkState;
49 typedef void *CUmodule;
50 typedef size_t (*CUoccupancyB2DSize)(int);
51 typedef void *CUstream;
52
53 typedef enum {
54 CUDA_SUCCESS = 0,
55 CUDA_ERROR_INVALID_VALUE = 1,
56 CUDA_ERROR_OUT_OF_MEMORY = 2,
57 CUDA_ERROR_INVALID_CONTEXT = 201,
58 CUDA_ERROR_NOT_FOUND = 500,
59 CUDA_ERROR_NOT_READY = 600,
60 CUDA_ERROR_LAUNCH_FAILED = 719,
61 CUDA_ERROR_COOPERATIVE_LAUNCH_TOO_LARGE = 720,
62 CUDA_ERROR_NOT_PERMITTED = 800,
63 CUDA_ERROR_NOT_SUPPORTED = 801,
64 CUDA_ERROR_UNKNOWN = 999
65 } CUresult;
66
67 typedef enum {
68 CU_DEVICE_ATTRIBUTE_MAX_THREADS_PER_BLOCK = 1,
69 CU_DEVICE_ATTRIBUTE_WARP_SIZE = 10,
70 CU_DEVICE_ATTRIBUTE_MAX_REGISTERS_PER_BLOCK = 12,
71 CU_DEVICE_ATTRIBUTE_CLOCK_RATE = 13,
72 CU_DEVICE_ATTRIBUTE_GPU_OVERLAP = 15,
73 CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT = 16,
74 CU_DEVICE_ATTRIBUTE_INTEGRATED = 18,
75 CU_DEVICE_ATTRIBUTE_CAN_MAP_HOST_MEMORY = 19,
76 CU_DEVICE_ATTRIBUTE_COMPUTE_MODE = 20,
77 CU_DEVICE_ATTRIBUTE_CONCURRENT_KERNELS = 31,
78 CU_DEVICE_ATTRIBUTE_MAX_THREADS_PER_MULTIPROCESSOR = 39,
79 CU_DEVICE_ATTRIBUTE_ASYNC_ENGINE_COUNT = 40,
80 CU_DEVICE_ATTRIBUTE_UNIFIED_ADDRESSING = 41,
81 CU_DEVICE_ATTRIBUTE_MAX_REGISTERS_PER_MULTIPROCESSOR = 82
82 } CUdevice_attribute;
83
84 enum {
85 CU_EVENT_DEFAULT = 0,
86 CU_EVENT_DISABLE_TIMING = 2
87 };
88
89 typedef enum {
90 CU_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK = 0,
91 CU_FUNC_ATTRIBUTE_NUM_REGS = 4
92 } CUfunction_attribute;
93
94 typedef enum {
95 CU_JIT_WALL_TIME = 2,
96 CU_JIT_INFO_LOG_BUFFER = 3,
97 CU_JIT_INFO_LOG_BUFFER_SIZE_BYTES = 4,
98 CU_JIT_ERROR_LOG_BUFFER = 5,
99 CU_JIT_ERROR_LOG_BUFFER_SIZE_BYTES = 6,
100 CU_JIT_OPTIMIZATION_LEVEL = 7,
101 CU_JIT_GENERATE_DEBUG_INFO = 11,
102 CU_JIT_LOG_VERBOSE = 12,
103 CU_JIT_GENERATE_LINE_INFO = 13,
104 } CUjit_option;
105
106 typedef enum {
107 CU_JIT_INPUT_PTX = 1
108 } CUjitInputType;
109
110 enum {
111 CU_CTX_SCHED_AUTO = 0
112 };
113
114 #define CU_LAUNCH_PARAM_END ((void *) 0)
115 #define CU_LAUNCH_PARAM_BUFFER_POINTER ((void *) 1)
116 #define CU_LAUNCH_PARAM_BUFFER_SIZE ((void *) 2)
117 #define CU_MEMHOSTALLOC_DEVICEMAP 0x02U
118
119 enum {
120 CU_STREAM_DEFAULT = 0,
121 CU_STREAM_NON_BLOCKING = 1
122 };
123
124 typedef enum {
125 CU_LIMIT_STACK_SIZE = 0x00,
126 CU_LIMIT_MALLOC_HEAP_SIZE = 0x02,
127 } CUlimit;
128
129 #define cuCtxCreate cuCtxCreate_v2
130 CUresult cuCtxCreate (CUcontext *, unsigned, CUdevice);
131 #define cuCtxDestroy cuCtxDestroy_v2
132 CUresult cuCtxDestroy (CUcontext);
133 CUresult cuCtxGetCurrent (CUcontext *);
134 CUresult cuCtxGetDevice (CUdevice *);
135 #define cuCtxPopCurrent cuCtxPopCurrent_v2
136 CUresult cuCtxPopCurrent (CUcontext *);
137 #define cuCtxPushCurrent cuCtxPushCurrent_v2
138 CUresult cuCtxPushCurrent (CUcontext);
139 CUresult cuCtxSynchronize (void);
140 CUresult cuCtxSetLimit (CUlimit, size_t);
141 CUresult cuDeviceGet (CUdevice *, int);
142 #define cuDeviceTotalMem cuDeviceTotalMem_v2
143 CUresult cuDeviceTotalMem (size_t *, CUdevice);
144 CUresult cuDeviceGetAttribute (int *, CUdevice_attribute, CUdevice);
145 CUresult cuDeviceGetCount (int *);
146 CUresult cuDeviceGetName (char *, int, CUdevice);
147 CUresult cuEventCreate (CUevent *, unsigned);
148 #define cuEventDestroy cuEventDestroy_v2
149 CUresult cuEventDestroy (CUevent);
150 CUresult cuEventElapsedTime (float *, CUevent, CUevent);
151 CUresult cuEventQuery (CUevent);
152 CUresult cuEventRecord (CUevent, CUstream);
153 CUresult cuEventSynchronize (CUevent);
154 CUresult cuFuncGetAttribute (int *, CUfunction_attribute, CUfunction);
155 CUresult cuGetErrorString (CUresult, const char **);
156 CUresult cuGetErrorName (CUresult, const char **);
157 CUresult cuInit (unsigned);
158 CUresult cuDriverGetVersion (int *);
159 CUresult cuLaunchKernel (CUfunction, unsigned, unsigned, unsigned, unsigned,
160 unsigned, unsigned, unsigned, CUstream, void **, void **);
161 #define cuLinkAddData cuLinkAddData_v2
162 CUresult cuLinkAddData (CUlinkState, CUjitInputType, void *, size_t, const char *,
163 unsigned, CUjit_option *, void **);
164 CUresult cuLinkComplete (CUlinkState, void **, size_t *);
165 #define cuLinkCreate cuLinkCreate_v2
166 CUresult cuLinkCreate (unsigned, CUjit_option *, void **, CUlinkState *);
167 CUresult cuLinkDestroy (CUlinkState);
168 #define cuMemGetInfo cuMemGetInfo_v2
169 CUresult cuMemGetInfo (size_t *, size_t *);
170 #define cuMemAlloc cuMemAlloc_v2
171 CUresult cuMemAlloc (CUdeviceptr *, size_t);
172 #define cuMemAllocHost cuMemAllocHost_v2
173 CUresult cuMemAllocHost (void **, size_t);
174 CUresult cuMemHostAlloc (void **, size_t, unsigned int);
175 CUresult cuMemcpy (CUdeviceptr, CUdeviceptr, size_t);
176 #define cuMemcpyDtoDAsync cuMemcpyDtoDAsync_v2
177 CUresult cuMemcpyDtoDAsync (CUdeviceptr, CUdeviceptr, size_t, CUstream);
178 #define cuMemcpyDtoH cuMemcpyDtoH_v2
179 CUresult cuMemcpyDtoH (void *, CUdeviceptr, size_t);
180 #define cuMemcpyDtoHAsync cuMemcpyDtoHAsync_v2
181 CUresult cuMemcpyDtoHAsync (void *, CUdeviceptr, size_t, CUstream);
182 #define cuMemcpyHtoD cuMemcpyHtoD_v2
183 CUresult cuMemcpyHtoD (CUdeviceptr, const void *, size_t);
184 #define cuMemcpyHtoDAsync cuMemcpyHtoDAsync_v2
185 CUresult cuMemcpyHtoDAsync (CUdeviceptr, const void *, size_t, CUstream);
186 #define cuMemFree cuMemFree_v2
187 CUresult cuMemFree (CUdeviceptr);
188 CUresult cuMemFreeHost (void *);
189 #define cuMemGetAddressRange cuMemGetAddressRange_v2
190 CUresult cuMemGetAddressRange (CUdeviceptr *, size_t *, CUdeviceptr);
191 #define cuMemHostGetDevicePointer cuMemHostGetDevicePointer_v2
192 CUresult cuMemHostGetDevicePointer (CUdeviceptr *, void *, unsigned);
193 CUresult cuModuleGetFunction (CUfunction *, CUmodule, const char *);
194 #define cuModuleGetGlobal cuModuleGetGlobal_v2
195 CUresult cuModuleGetGlobal (CUdeviceptr *, size_t *, CUmodule, const char *);
196 CUresult cuModuleLoad (CUmodule *, const char *);
197 CUresult cuModuleLoadData (CUmodule *, const void *);
198 CUresult cuModuleUnload (CUmodule);
199 CUresult cuOccupancyMaxPotentialBlockSize(int *, int *, CUfunction,
200 CUoccupancyB2DSize, size_t, int);
201 typedef void (*CUstreamCallback)(CUstream, CUresult, void *);
202 CUresult cuStreamAddCallback(CUstream, CUstreamCallback, void *, unsigned int);
203 CUresult cuStreamCreate (CUstream *, unsigned);
204 #define cuStreamDestroy cuStreamDestroy_v2
205 CUresult cuStreamDestroy (CUstream);
206 CUresult cuStreamQuery (CUstream);
207 CUresult cuStreamSynchronize (CUstream);
208 CUresult cuStreamWaitEvent (CUstream, CUevent, unsigned);
209
210 #ifdef __cplusplus
211 }
212 #endif
213
214 #endif /* GCC_CUDA_H */