Debugger API (PDF) - CUDA Toolkit v5.5 for POWER8 (older) - Last updated October 15, 2014 - Send Feedback

6.1. cudadebugger.h File Reference

Description

Header file for the CUDA debugger API.

Code Example

cudadebugger.h

‎/*
       * Copyright 2007-2014 NVIDIA Corporation.  All rights reserved.
       *
       * NOTICE TO LICENSEE:
       *
       * This source code and/or documentation ("Licensed Deliverables") are
       * subject to NVIDIA intellectual property rights under U.S. and
       * international Copyright laws.
       *
       * These Licensed Deliverables contained herein is PROPRIETARY and
       * CONFIDENTIAL to NVIDIA and is being provided under the terms and
       * conditions of a form of NVIDIA software license agreement by and
       * between NVIDIA and Licensee ("License Agreement") or electronically
       * accepted by Licensee.  Notwithstanding any terms or conditions to
       * the contrary in the License Agreement, reproduction or disclosure
       * of the Licensed Deliverables to any third party without the express
       * written consent of NVIDIA is prohibited.
       *
       * NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
       * LICENSE AGREEMENT, NVIDIA MAKES NO REPRESENTATION ABOUT THE
       * SUITABILITY OF THESE LICENSED DELIVERABLES FOR ANY PURPOSE.  IT IS
       * PROVIDED "AS IS" WITHOUT EXPRESS OR IMPLIED WARRANTY OF ANY KIND.
       * NVIDIA DISCLAIMS ALL WARRANTIES WITH REGARD TO THESE LICENSED
       * DELIVERABLES, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY,
       * NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
       * NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
       * LICENSE AGREEMENT, IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY
       * SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, OR ANY
       * DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
       * WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
       * ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
       * OF THESE LICENSED DELIVERABLES.
       *
       * U.S. Government End Users.  These Licensed Deliverables are a
       * "commercial item" as that term is defined at 48 C.F.R. 2.101 (OCT
       * 1995), consisting of "commercial computer software" and "commercial
       * computer software documentation" as such terms are used in 48
       * C.F.R. 12.212 (SEPT 1995) and is provided to the U.S. Government
       * only as a commercial end item.  Consistent with 48 C.F.R.12.212 and
       * 48 C.F.R. 227.7202-1 through 227.7202-4 (JUNE 1995), all
       * U.S. Government End Users acquire the Licensed Deliverables with
       * only those rights set forth herein.
       *
       * Any use of the Licensed Deliverables in individual and commercial
       * software must include, in the user documentation and internal
       * comments to the code, the above Disclaimer and U.S. Government End
       * Users Notice.
       */
      
      
      /*--------------------------------- Includes --------------------------------*/
      
      #ifndef CUDADEBUGGER_H
      #define CUDADEBUGGER_H
      
      #include <stdlib.h>
      #include "cuda_stdint.h"
      
      #if defined(__STDC__)
      #include <inttypes.h>
      #include <stdbool.h>
      #endif
      
      #ifdef __cplusplus
      extern "C" {
      #endif
      
      #if defined(_WIN32) && !defined(_WIN64)
      /* Windows 32-bit */
      #define PRIxPTR "I32x"
      #endif
      
      #if defined(_WIN64)
      /* Windows 64-bit */
      #define PRIxPTR "I64x"
      #endif
      
      #if defined(_WIN32)
      /* Windows 32- and 64-bit */
      #define PRIx64  "I64x"
      #define PRId64  "I64d"
      typedef unsigned char bool;
      #undef false
      #undef true
      #define false 0
      #define true  1
      #endif
      
      /*--------------------------------- API Version ------------------------------*/
      
      #define CUDBG_API_VERSION_MAJOR       6 /* Major release version number */
      #define CUDBG_API_VERSION_MINOR       5 /* Minor release version number */
      #define CUDBG_API_VERSION_REVISION  121 /* Revision (build) number */
      
      /*---------------------------------- Constants -------------------------------*/
      
      #define CUDBG_MAX_DEVICES 32  /* Maximum number of supported devices */
      #define CUDBG_MAX_SMS     64  /* Maximum number of SMs per device */
      #define CUDBG_MAX_WARPS   64  /* Maximum number of warps per SM */
      #define CUDBG_MAX_LANES   32  /* Maximum number of lanes per warp */
      
      /*----------------------- Thread/Block Coordinates Types ---------------------*/
      
      typedef struct { uint32_t x, y; }    CuDim2;   /* DEPRECATED */
      typedef struct { uint32_t x, y, z; } CuDim3;   /* 3-dimensional coordinates for threads,... */
      
      /*--------------------- Memory Segments (as used in DWARF) -------------------*/
      
      typedef enum {
          ptxUNSPECIFIEDStorage,
          ptxCodeStorage,
          ptxRegStorage,
          ptxSregStorage,
          ptxConstStorage,
          ptxGlobalStorage,
          ptxLocalStorage,
          ptxParamStorage,
          ptxSharedStorage,
          ptxSurfStorage,
          ptxTexStorage,
          ptxTexSamplerStorage,
          ptxGenericStorage,
          ptxIParamStorage,
          ptxOParamStorage,
          ptxFrameStorage,
          ptxMAXStorage
      } ptxStorageKind;
      
      /*--------------------------- Debugger System Calls --------------------------*/
      
      #define CUDBG_IPC_FLAG_NAME                 cudbgIpcFlag
      #define CUDBG_RPC_ENABLED                   cudbgRpcEnabled
      #define CUDBG_APICLIENT_PID                 cudbgApiClientPid
      #define CUDBG_DEBUGGER_INITIALIZED          cudbgDebuggerInitialized
      #define CUDBG_APICLIENT_REVISION            cudbgApiClientRevision
      #define CUDBG_SESSION_ID                    cudbgSessionId
      #define CUDBG_ATTACH_HANDLER_AVAILABLE      cudbgAttachHandlerAvailable
      #define CUDBG_DETACH_SUSPENDED_DEVICES_MASK cudbgDetachSuspendedDevicesMask
      #define CUDBG_ENABLE_LAUNCH_BLOCKING        cudbgEnableLaunchBlocking
      #define CUDBG_ENABLE_INTEGRATED_MEMCHECK    cudbgEnableIntegratedMemcheck
      #define CUDBG_ENABLE_PREEMPTION_DEBUGGING   cudbgEnablePreemptionDebugging
      #define CUDBG_RESUME_FOR_ATTACH_DETACH      cudbgResumeForAttachDetach
      
      /*---------------- Internal Breakpoint Entries for Error Reporting ------------*/
      
      #define CUDBG_REPORT_DRIVER_API_ERROR                   cudbgReportDriverApiError
      #define CUDBG_REPORT_DRIVER_API_ERROR_FLAGS             cudbgReportDriverApiErrorFlags
      #define CUDBG_REPORTED_DRIVER_API_ERROR_CODE            cudbgReportedDriverApiErrorCode
      #define CUDBG_REPORTED_DRIVER_API_ERROR_FUNC_NAME_SIZE  cudbgReportedDriverApiErrorFuncNameSize
      #define CUDBG_REPORTED_DRIVER_API_ERROR_FUNC_NAME_ADDR  cudbgReportedDriverApiErrorFuncNameAddr
      #define CUDBG_REPORT_DRIVER_INTERNAL_ERROR              cudbgReportDriverInternalError
      #define CUDBG_REPORTED_DRIVER_INTERNAL_ERROR_CODE       cudbgReportedDriverInternalErrorCode
      
      /*----------------------------- API Return Types -----------------------------*/
      
      typedef enum {
          CUDBG_SUCCESS                           = 0x0000,  /* Successful execution */
          CUDBG_ERROR_UNKNOWN                     = 0x0001,  /* Error type not listed below */
          CUDBG_ERROR_BUFFER_TOO_SMALL            = 0x0002,  /* Cannot copy all the queried data into the buffer argument */
          CUDBG_ERROR_UNKNOWN_FUNCTION            = 0x0003,  /* Function cannot be found in the CUDA kernel */
          CUDBG_ERROR_INVALID_ARGS                = 0x0004,  /* Wrong use of arguments (NULL pointer, illegal value,...) */
          CUDBG_ERROR_UNINITIALIZED               = 0x0005,  /* Debugger API has not yet been properly initialized */
          CUDBG_ERROR_INVALID_COORDINATES         = 0x0006,  /* Invalid block or thread coordinates were provided */
          CUDBG_ERROR_INVALID_MEMORY_SEGMENT      = 0x0007,  /* Invalid memory segment requested (read/write) */
          CUDBG_ERROR_INVALID_MEMORY_ACCESS       = 0x0008,  /* Requested address (+size) is not within proper segment boundaries */
          CUDBG_ERROR_MEMORY_MAPPING_FAILED       = 0x0009,  /* Memory is not mapped and can't be mapped */
          CUDBG_ERROR_INTERNAL                    = 0x000a,  /* A debugger internal error occurred */
          CUDBG_ERROR_INVALID_DEVICE              = 0x000b,  /* Specified device cannot be found */
          CUDBG_ERROR_INVALID_SM                  = 0x000c,  /* Specified sm cannot be found */
          CUDBG_ERROR_INVALID_WARP                = 0x000d,  /* Specified warp cannot be found */
          CUDBG_ERROR_INVALID_LANE                = 0x000e,  /* Specified lane cannot be found */
          CUDBG_ERROR_SUSPENDED_DEVICE            = 0x000f,  /* device is suspended */
          CUDBG_ERROR_RUNNING_DEVICE              = 0x0010,  /* device is running and not suspended */
          CUDBG_ERROR_INVALID_ADDRESS             = 0x0012,  /* address is out-of-range */
          CUDBG_ERROR_INCOMPATIBLE_API            = 0x0013,  /* API version does not match */
          CUDBG_ERROR_INITIALIZATION_FAILURE      = 0x0014,  /* The CUDA Driver failed to initialize */
          CUDBG_ERROR_INVALID_GRID                = 0x0015,  /* Specified grid cannot be found */
          CUDBG_ERROR_NO_EVENT_AVAILABLE          = 0x0016,  /* No event left to be processed */
          CUDBG_ERROR_SOME_DEVICES_WATCHDOGGED    = 0x0017,  /* One or more devices have an associated watchdog (eg. X) */
          CUDBG_ERROR_ALL_DEVICES_WATCHDOGGED     = 0x0018,  /* All devices have an associated watchdog (eg. X) */
          CUDBG_ERROR_INVALID_ATTRIBUTE           = 0x0019,  /* Specified attribute does not exist or is incorrect */
          CUDBG_ERROR_ZERO_CALL_DEPTH             = 0x001a,  /* No function calls have been made on the device */
          CUDBG_ERROR_INVALID_CALL_LEVEL          = 0x001b,  /* Specified call level is invalid */
          CUDBG_ERROR_COMMUNICATION_FAILURE       = 0x001c,  /* Communication error between the debugger and the application. */
          CUDBG_ERROR_INVALID_CONTEXT             = 0x001d,  /* Specified context cannot be found */
          CUDBG_ERROR_ADDRESS_NOT_IN_DEVICE_MEM   = 0x001e,  /* Requested address was not originally allocated from device memory (most likely visible in system memory) */
          CUDBG_ERROR_MEMORY_UNMAPPING_FAILED     = 0x001f,  /* Memory is not unmapped and can't be unmapped */
          CUDBG_ERROR_INCOMPATIBLE_DISPLAY_DRIVER = 0x0020,  /* The display driver is incompatible with the API */
          CUDBG_ERROR_INVALID_MODULE              = 0x0021,  /* The specified module is not valid */
          CUDBG_ERROR_LANE_NOT_IN_SYSCALL         = 0x0022,  /* The specified lane is not inside a device syscall */
          CUDBG_ERROR_MEMCHECK_NOT_ENABLED        = 0x0023,  /* Memcheck has not been enabled */
          CUDBG_ERROR_INVALID_ENVVAR_ARGS         = 0x0024,  /* Some environment variable's value is invalid */
          CUDBG_ERROR_OS_RESOURCES                = 0x0025,  /* Error while allocating resources from the OS */
          CUDBG_ERROR_FORK_FAILED                 = 0x0026,  /* Error while forking the debugger process */
          CUDBG_ERROR_NO_DEVICE_AVAILABLE         = 0x0027,  /* No CUDA capable device was found */
          CUDBG_ERROR_ATTACH_NOT_POSSIBLE         = 0x0028,  /* Attaching to the CUDA program is not possible */
          CUDBG_ERROR_WARP_RESUME_NOT_POSSIBLE    = 0x0029,  /* The resumeWarpsUntilPC() API is not possible, use resumeDevice() or singleStepWarp() instead */
          CUDBG_ERROR_INVALID_WARP_MASK           = 0x002a,  /* Specified warp mask is zero, or contains invalid warps */
          CUDBG_ERROR_AMBIGUOUS_MEMORY_ADDRESS    = 0x002b,  /* Address cannot be resolved to a GPU unambiguously */
          CUDBG_ERROR_RECURSIVE_API_CALL          = 0x002c,  /* Debug API entry point called from within a debug API callback */
      } CUDBGResultResult values of all the API routines. ;
      
      
      /*------------------------- API Error Reporting Flags -------------------------*/
      typedef enum {
          CUDBG_REPORT_DRIVER_API_ERROR_FLAGS_NONE = 0x0000, /* Default is that there is no flag */
          CUDBG_REPORT_DRIVER_API_ERROR_FLAGS_SUPPRESS_NOT_READY = ( 1U << 0 ), /* When set, cudaErrorNotReady/cuErrorNotReady will not be reported */
      } CUDBGReportDriverApiErrorFlags;
      
      /*------------------------------ Grid Attributes -----------------------------*/
      
      typedef enum {
          CUDBG_ATTR_GRID_LAUNCH_BLOCKINGHeader file for the CUDA debugger API.     = 0x000,   /* Whether the grid launch is blocking or not. */
          CUDBG_ATTR_GRID_TIDHeader file for the CUDA debugger API.                 = 0x001,   /* Id of the host thread that launched the grid. */
      } CUDBGAttributeHeader file for the CUDA debugger API. ;
      
      typedef struct {
          CUDBGAttributeHeader file for the CUDA debugger API.  attribute;
          uint64_t       value;
      } CUDBGAttributeValuePair;
      
      typedef enum {
          CUDBG_GRID_STATUS_INVALID,          /* An invalid grid ID was passed, or an error occurred during status lookup */
          CUDBG_GRID_STATUS_PENDING,          /* The grid was launched but is not running on the HW yet */
          CUDBG_GRID_STATUS_ACTIVE,           /* The grid is currently running on the HW */
          CUDBG_GRID_STATUS_SLEEPING,         /* The grid is on the device, doing a join */
          CUDBG_GRID_STATUS_TERMINATED,       /* The grid has finished executing */
          CUDBG_GRID_STATUS_UNDETERMINED,     /* The grid is either PENDING or TERMINATED */
      } CUDBGGridStatusGrid status. ;
      
      /*------------------------------- Kernel Types -------------------------------*/
      
      typedef enum {
          CUDBG_KNL_TYPE_UNKNOWNHeader file for the CUDA debugger API.              = 0x000,   /* Any type not listed below. */
          CUDBG_KNL_TYPE_SYSTEMHeader file for the CUDA debugger API.               = 0x001,   /* System kernel, such as MemCpy. */
          CUDBG_KNL_TYPE_APPLICATIONHeader file for the CUDA debugger API.          = 0x002,   /* Application kernel, user-defined or libraries. */
      } CUDBGKernelTypeHeader file for the CUDA debugger API. ;
      
      /*--------------------------- Elf Image Properties ---------------------------*/
      
      typedef enum {
          CUDBG_ELF_IMAGE_PROPERTIES_SYSTEMHeader file for the CUDA debugger API.   = 0x001,   /* ELF image contains system kernels. */
      } CUDBGElfImagePropertiesHeader file for the CUDA debugger API. ;
      
      /*-------------------------- Physical Register Types -------------------------*/
      
      typedef enum {
          REG_CLASS_INVALIDHeader file for the CUDA debugger API.                   = 0x000,   /* invalid register */
          REG_CLASS_REG_CCHeader file for the CUDA debugger API.                    = 0x001,   /* Condition register */
          REG_CLASS_REG_PREDHeader file for the CUDA debugger API.                  = 0x002,   /* Predicate register */
          REG_CLASS_REG_ADDRHeader file for the CUDA debugger API.                  = 0x003,   /* Address register */
          REG_CLASS_REG_HALFHeader file for the CUDA debugger API.                  = 0x004,   /* 16-bit register (Currently unused) */
          REG_CLASS_REG_FULLHeader file for the CUDA debugger API.                  = 0x005,   /* 32-bit register */
          REG_CLASS_MEM_LOCALHeader file for the CUDA debugger API.                 = 0x006,   /* register spilled in memory */
          REG_CLASS_LMEM_REG_OFFSETHeader file for the CUDA debugger API.           = 0x007,   /* register at stack offset (ABI only) */
      } CUDBGRegClassHeader file for the CUDA debugger API. ;
      
      /*---------------------------- Application Events ----------------------------*/
      
      typedef enum {
          CUDBG_EVENT_INVALID                = 0x000,   /* Invalid event */
          CUDBG_EVENT_ELF_IMAGE_LOADED       = 0x001,   /* ELF image for CUDA kernel(s) is ready */
          CUDBG_EVENT_KERNEL_READY           = 0x002,   /* A CUDA kernel is ready to be launched */
          CUDBG_EVENT_KERNEL_FINISHED        = 0x003,   /* A CUDA kernel has terminated */
          CUDBG_EVENT_INTERNAL_ERROR         = 0x004,   /* Unexpected error. The API may be unstable. */
          CUDBG_EVENT_CTX_PUSH               = 0x005,   /* A CUDA context has been pushed. */
          CUDBG_EVENT_CTX_POP                = 0x006,   /* A CUDA context has been popped. */
          CUDBG_EVENT_CTX_CREATE             = 0x007,   /* A CUDA context has been created and pushed. */
          CUDBG_EVENT_CTX_DESTROY            = 0x008,   /* A CUDA context has been, popped if pushed, then destroyed. */
          CUDBG_EVENT_TIMEOUT                = 0x009,   /* Nothing happened for a while. This is heartbeat event. */
          CUDBG_EVENT_ATTACH_COMPLETE        = 0x00a,   /* Attach complete. */
          CUDBG_EVENT_DETACH_COMPLETE        = 0x00b,   /* Detach complete. */
          CUDBG_EVENT_ELF_IMAGE_UNLOADED     = 0x00c,   /* ELF image for CUDA kernels(s) no longer available */
      } CUDBGEventKindCUDA Kernel Events. ;
      
      /*------------------------------- Kernel Origin ------------------------------*/
      
      typedef enum {
          CUDBG_KNL_ORIGIN_CPUHeader file for the CUDA debugger API.                = 0x000,   /* The kernel was launched from the CPU. */
          CUDBG_KNL_ORIGIN_GPUHeader file for the CUDA debugger API.                = 0x001,   /* The kernel was launched from the GPU. */
      } CUDBGKernelOriginHeader file for the CUDA debugger API. ;
      
      /*------------------------ Kernel Launch Notify Mode --------------------------*/
      
      typedef enum {
          CUDBG_KNL_LAUNCH_NOTIFY_EVENTHeader file for the CUDA debugger API.       = 0x000,   /* The kernel notifications generate events */
          CUDBG_KNL_LAUNCH_NOTIFY_DEFER      = 0x001,   /* The kernel notifications are deferred */
      } CUDBGKernelLaunchNotifyModeHeader file for the CUDA debugger API. ;
      
      /*---------------------- Application Event Queue Type ------------------------*/
      
      typedef enum {
          CUDBG_EVENT_QUEUE_TYPE_SYNC      = 0,   /* Synchronous event queue */
          CUDBG_EVENT_QUEUE_TYPE_ASYNC     = 1,   /* Asynchronous event queue */
      } CUDBGEventQueueType;
      
      /*------------------------------ Elf Image Type ------------------------------*/
      
      typedef enum {
          CUDBG_ELF_IMAGE_TYPE_NONRELOCATED      = 0,   /* Non-relocated ELF image */
          CUDBG_ELF_IMAGE_TYPE_RELOCATED         = 1,   /* Relocated ELF image */
      } CUDBGElfImageType;
      
      /*------------------------------ Code Address --------------------------------*/
      
      typedef enum {
          CUDBG_ADJ_PREVIOUS_ADDRESSHeader file for the CUDA debugger API.          = 0x000,   /* Get the adjusted previous code address. */
          CUDBG_ADJ_CURRENT_ADDRESSHeader file for the CUDA debugger API.           = 0x001,   /* Get the adjusted current code address. */
          CUDBG_ADJ_NEXT_ADDRESSHeader file for the CUDA debugger API.              = 0x002,   /* Get the adjusted next code address. */
      } CUDBGAdjAddrActionHeader file for the CUDA debugger API. ;
      
      /* Deprecated */
      typedef struct {
          CUDBGEventKindCUDA Kernel Events.  kind;
          union cases30_st {
              struct elfImageLoaded30_st {
                  char     *relocatedElfImage;
                  char     *nonRelocatedElfImage;
                  uint32_t  size;
              } elfImageLoaded;
              struct kernelReady30_st {
                  uint32_t dev;
                  uint32_t gridId;
                  uint32_t tid;
              } kernelReady;
              struct kernelFinished30_st {
                  uint32_t dev;
                  uint32_t gridId;
                  uint32_t tid;
              } kernelFinished;
          } cases;
      } CUDBGEvent30;
      
      /* Deprecated */
      typedef struct {
          CUDBGEventKindCUDA Kernel Events.  kind;
          union cases32_st {
              struct elfImageLoaded32_st {
                  char     *relocatedElfImage;
                  char     *nonRelocatedElfImage;
                  uint32_t  size;
                  uint32_t  dev;
                  uint64_t  context;
                  uint64_t  module;
              } elfImageLoaded;
              struct kernelReady32_st {
                  uint32_t dev;
                  uint32_t gridId;
                  uint32_t tid;
                  uint64_t context;
                  uint64_t module;
                  uint64_t function;
                  uint64_t functionEntry;
              } kernelReady;
              struct kernelFinished32_st {
                  uint32_t dev;
                  uint32_t gridId;
                  uint32_t tid;
                  uint64_t context;
                  uint64_t module;
                  uint64_t function;
                  uint64_t functionEntry;
              } kernelFinished;
              struct contextPush32_st {
                  uint32_t dev;
                  uint32_t tid;
                  uint64_t context;
              } contextPush;
              struct contextPop32_st {
                  uint32_t dev;
                  uint32_t tid;
                  uint64_t context;
              } contextPop;
              struct contextCreate32_st {
                  uint32_t dev;
                  uint32_t tid;
                  uint64_t context;
              } contextCreate;
              struct contextDestroy32_st {
                  uint32_t dev;
                  uint32_t tid;
                  uint64_t context;
              } contextDestroy;
          } cases;
      } CUDBGEvent32;
      
      /* Deprecated */
      typedef struct {
          CUDBGEventKindCUDA Kernel Events.  kind;
          union cases42_st {
              struct elfImageLoaded42_st {
                  char     *relocatedElfImage;
                  char     *nonRelocatedElfImage;
                  uint32_t  size32;
                  uint32_t  dev;
                  uint64_t  context;
                  uint64_t  module;
                  uint64_t  size;
              } elfImageLoaded;
              struct kernelReady42_st {
                  uint32_t dev;
                  uint32_t gridId;
                  uint32_t tid;
                  uint64_t context;
                  uint64_t module;
                  uint64_t function;
                  uint64_t functionEntry;
                  CuDim3   gridDim;
                  CuDim3   blockDim;
                  CUDBGKernelTypeHeader file for the CUDA debugger API.  type;
              } kernelReady;
              struct kernelFinished42_st {
                  uint32_t dev;
                  uint32_t gridId;
                  uint32_t tid;
                  uint64_t context;
                  uint64_t module;
                  uint64_t function;
                  uint64_t functionEntry;
              } kernelFinished;
              struct contextPush42_st {
                  uint32_t dev;
                  uint32_t tid;
                  uint64_t context;
              } contextPush;
              struct contextPop42_st {
                  uint32_t dev;
                  uint32_t tid;
                  uint64_t context;
              } contextPop;
              struct contextCreate42_st {
                  uint32_t dev;
                  uint32_t tid;
                  uint64_t context;
              } contextCreate;
              struct contextDestroy42_st {
                  uint32_t dev;
                  uint32_t tid;
                  uint64_t context;
              } contextDestroy;
          } cases;
      } CUDBGEvent42;
      
      typedef struct {
          CUDBGEventKindCUDA Kernel Events.  kind;
          union cases50_st {
              struct elfImageLoaded50_st {
                  char     *relocatedElfImage;
                  char     *nonRelocatedElfImage;
                  uint32_t  size32;
                  uint32_t  dev;
                  uint64_t  context;
                  uint64_t  module;
                  uint64_t  size;
              } elfImageLoaded;
              struct kernelReady50_st{
                  uint32_t dev;
                  uint32_t gridId;
                  uint32_t tid;
                  uint64_t context;
                  uint64_t module;
                  uint64_t function;
                  uint64_t functionEntry;
                  CuDim3   gridDim;
                  CuDim3   blockDim;
                  CUDBGKernelTypeHeader file for the CUDA debugger API.  type;
              } kernelReady;
              struct kernelFinished50_st {
                  uint32_t dev;
                  uint32_t gridId;
                  uint32_t tid;
                  uint64_t context;
                  uint64_t module;
                  uint64_t function;
                  uint64_t functionEntry;
              } kernelFinished;
              struct contextPush50_st {
                  uint32_t dev;
                  uint32_t tid;
                  uint64_t context;
              } contextPush;
              struct contextPop50_st {
                  uint32_t dev;
                  uint32_t tid;
                  uint64_t context;
              } contextPop;
              struct contextCreate50_st {
                  uint32_t dev;
                  uint32_t tid;
                  uint64_t context;
              } contextCreate;
              struct contextDestroy50_st {
                  uint32_t dev;
                  uint32_t tid;
                  uint64_t context;
              } contextDestroy;
              struct internalError50_st {
                  CUDBGResultResult values of all the API routines.  errorType;
              } internalError;
          } cases;
      } CUDBGEvent50;
      
      typedef struct {
          CUDBGEventKindCUDA Kernel Events.  kind;
          union cases55_st {
              struct elfImageLoaded55_st {
                  char     *relocatedElfImage;
                  char     *nonRelocatedElfImage;
                  uint32_t  size32;
                  uint32_t  dev;
                  uint64_t  context;
                  uint64_t  module;
                  uint64_t  size;
              } elfImageLoaded;
              struct kernelReady55_st{
                  uint32_t dev;
                  uint32_t gridId;
                  uint32_t tid;
                  uint64_t context;
                  uint64_t module;
                  uint64_t function;
                  uint64_t functionEntry;
                  CuDim3   gridDim;
                  CuDim3   blockDim;
                  CUDBGKernelTypeHeader file for the CUDA debugger API.  type;
                  uint64_t parentGridId;
                  uint64_t gridId64;
                  CUDBGKernelOriginHeader file for the CUDA debugger API.  origin;
              } kernelReady;
              struct kernelFinished55_st {
                  uint32_t dev;
                  uint32_t gridId;
                  uint32_t tid;
                  uint64_t context;
                  uint64_t module;
                  uint64_t function;
                  uint64_t functionEntry;
                  uint64_t gridId64;
              } kernelFinished;
              struct contextPush55_st {
                  uint32_t dev;
                  uint32_t tid;
                  uint64_t context;
              } contextPush;
              struct contextPop55_st {
                  uint32_t dev;
                  uint32_t tid;
                  uint64_t context;
              } contextPop;
              struct contextCreate55_st {
                  uint32_t dev;
                  uint32_t tid;
                  uint64_t context;
              } contextCreate;
              struct contextDestroy55_st {
                  uint32_t dev;
                  uint32_t tid;
                  uint64_t context;
              } contextDestroy;
              struct internalError55_st {
                  CUDBGResultResult values of all the API routines.  errorType;
              } internalError;
          } cases;
      } CUDBGEvent55;
      
      #pragma pack(push,1)
      typedef struct {
          CUDBGEventKindCUDA Kernel Events.  kind;
          union cases_st {
              struct elfImageLoaded_st {
                  uint32_t  dev;
                  uint64_t  context;
                  uint64_t  module;
                  uint64_t  size;
                  uint64_t  handle;
                  uint32_t  properties;
              } elfImageLoaded;
              struct elfImageUnloaded_st {
                  uint32_t  dev;
                  uint64_t  context;
                  uint64_t  module;
                  uint64_t  size;
                  uint64_t  handle;
              } elfImageUnloaded;
              struct kernelReady_st{
                  uint32_t dev;
                  uint32_t tid;
                  uint64_t gridId;
                  uint64_t context;
                  uint64_t module;
                  uint64_t function;
                  uint64_t functionEntry;
                  CuDim3   gridDim;
                  CuDim3   blockDim;
                  CUDBGKernelTypeHeader file for the CUDA debugger API.  type;
                  uint64_t parentGridId;
                  CUDBGKernelOriginHeader file for the CUDA debugger API.  origin;
              } kernelReady;
              struct kernelFinished_st {
                  uint32_t dev;
                  uint32_t tid;
                  uint64_t context;
                  uint64_t module;
                  uint64_t function;
                  uint64_t functionEntry;
                  uint64_t gridId;
              } kernelFinished;
              struct contextPush_st {
                  uint32_t dev;
                  uint32_t tid;
                  uint64_t context;
              } contextPush;
              struct contextPop_st {
                  uint32_t dev;
                  uint32_t tid;
                  uint64_t context;
              } contextPop;
              struct contextCreate_st {
                  uint32_t dev;
                  uint32_t tid;
                  uint64_t context;
              } contextCreate;
              struct contextDestroy_st {
                  uint32_t dev;
                  uint32_t tid;
                  uint64_t context;
              } contextDestroy;
              struct internalError_st {
                  CUDBGResultResult values of all the API routines.  errorType;
              } internalError;
          } cases;
      } CUDBGEventEvent information container. ;
      #pragma pack(pop)
      
      
      typedef struct {
          uint32_t tid;
      } CUDBGEventCallbackData40Event information passed to callback set with setNotifyNewEventCallback function. ;
      
      typedef struct {
          uint32_t tid;
          uint32_t timeout;
      } CUDBGEventCallbackDataEvent information passed to callback set with setNotifyNewEventCallback function. ;
      
      #pragma pack(push,1)
      typedef struct {
          uint32_t dev;
          uint64_t gridId64;
          uint32_t tid;
          uint64_t context;
          uint64_t module;
          uint64_t function;
          uint64_t functionEntry;
          CuDim3   gridDim;
          CuDim3   blockDim;
          CUDBGKernelTypeHeader file for the CUDA debugger API.  type;
          uint64_t parentGridId;
          CUDBGKernelOriginHeader file for the CUDA debugger API.  origin;
      } CUDBGGridInfoGrid info. ;
      #pragma pack(pop)
      
      typedef void (*CUDBGNotifyNewEventCallback31function type of the function called to notify debugger of the presence of a new event in the event queue. )(void *data);
      typedef void (*CUDBGNotifyNewEventCallback40)(CUDBGEventCallbackData40Event information passed to callback set with setNotifyNewEventCallback function.  *data);
      typedef void (*CUDBGNotifyNewEventCallbackfunction type of the function called to notify debugger of the presence of a new event in the event queue. )(CUDBGEventCallbackDataEvent information passed to callback set with setNotifyNewEventCallback function.  *data);
      
      /*-------------------------------- Exceptions ------------------------------*/
      
      typedef enum {
          CUDBG_EXCEPTION_UNKNOWNHeader file for the CUDA debugger API.  = 0xFFFFFFFFU, // Force sizeof(CUDBGException_t)==4
          CUDBG_EXCEPTION_NONEHeader file for the CUDA debugger API.  = 0,
          CUDBG_EXCEPTION_LANE_ILLEGAL_ADDRESSHeader file for the CUDA debugger API.  = 1,
          CUDBG_EXCEPTION_LANE_USER_STACK_OVERFLOWHeader file for the CUDA debugger API.  = 2,
          CUDBG_EXCEPTION_DEVICE_HARDWARE_STACK_OVERFLOWHeader file for the CUDA debugger API.  = 3,
          CUDBG_EXCEPTION_WARP_ILLEGAL_INSTRUCTIONHeader file for the CUDA debugger API.  = 4,
          CUDBG_EXCEPTION_WARP_OUT_OF_RANGE_ADDRESSHeader file for the CUDA debugger API.  = 5,
          CUDBG_EXCEPTION_WARP_MISALIGNED_ADDRESSHeader file for the CUDA debugger API.  = 6,
          CUDBG_EXCEPTION_WARP_INVALID_ADDRESS_SPACEHeader file for the CUDA debugger API.  = 7,
          CUDBG_EXCEPTION_WARP_INVALID_PCHeader file for the CUDA debugger API.  = 8,
          CUDBG_EXCEPTION_WARP_HARDWARE_STACK_OVERFLOWHeader file for the CUDA debugger API.  = 9,
          CUDBG_EXCEPTION_DEVICE_ILLEGAL_ADDRESSHeader file for the CUDA debugger API.  = 10,
          CUDBG_EXCEPTION_LANE_MISALIGNED_ADDRESSHeader file for the CUDA debugger API.  = 11,
          CUDBG_EXCEPTION_WARP_ASSERT = 12,
          CUDBG_EXCEPTION_LANE_SYSCALL_ERROR = 13,
          CUDBG_EXCEPTION_WARP_ILLEGAL_ADDRESS = 14,
      } CUDBGException_tHeader file for the CUDA debugger API. ;
      
      /*------------------------------ Warp State --------------------------------*/
      #pragma pack(push,1)
      typedef struct {
          uint64_t virtualPC;
          CuDim3 threadIdx;
          CUDBGException_tHeader file for the CUDA debugger API.  exception;
      } CUDBGLaneState;
      
      typedef struct {
          uint64_t gridId;
          uint64_t errorPC;
          CuDim3 blockIdx;
          uint32_t validLanes;
          uint32_t activeLanes;
          uint32_t errorPCValid;
          CUDBGLaneState lane[32];
      } CUDBGWarpState;
      #pragma pack(pop)
      
      #pragma pack(push,1)
      typedef struct {
          uint64_t startAddress;
          uint64_t size;
      } CUDBGMemoryInfo;
      #pragma pack(pop)
      
      /*--------------------------------- Exports --------------------------------*/
      
      typedef const struct CUDBGAPI_stThe CUDA debugger API routines.  *CUDBGAPI;
      
      CUDBGResultResult values of all the API routines.  cudbgGetAPI(uint32_t major, uint32_t minor, uint32_t rev, CUDBGAPI *api);
      CUDBGResultResult values of all the API routines.  cudbgGetAPIVersion(uint32_t *major, uint32_t *minor, uint32_t *rev);
      CUDBGResultResult values of all the API routines.  cudbgMain(int apiClientPid, uint32_t apiClientRevision, int sessionId, int attachState,
                            int attachEventInitialized, int writeFd, int detachFd, int attachStubInUse,
                            int enablePreemptionDebugging);
      void cudbgApiInit(uint32_t arg);
      void cudbgApiAttach(void);
      void cudbgApiDetach(void);
      void CUDBG_REPORT_DRIVER_API_ERROR(void);
      void CUDBG_REPORT_DRIVER_INTERNAL_ERROR(void);
      
      extern uint32_t CUDBG_IPC_FLAG_NAME;
      extern uint32_t CUDBG_RPC_ENABLED;
      extern uint32_t CUDBG_APICLIENT_PID;
      extern uint32_t CUDBG_I_AM_DEBUGGER;
      extern uint32_t CUDBG_DEBUGGER_INITIALIZED;
      extern uint32_t CUDBG_APICLIENT_REVISION;
      extern uint32_t CUDBG_SESSION_ID;
      extern uint64_t CUDBG_REPORTED_DRIVER_API_ERROR_CODE;
      extern uint64_t CUDBG_REPORTED_DRIVER_API_ERROR_FUNC_NAME_SIZE;
      extern uint64_t CUDBG_REPORTED_DRIVER_API_ERROR_FUNC_NAME_ADDR;
      extern uint64_t CUDBG_REPORTED_DRIVER_INTERNAL_ERROR_CODE;
      extern uint32_t CUDBG_ATTACH_HANDLER_AVAILABLE;
      extern uint32_t CUDBG_DETACH_SUSPENDED_DEVICES_MASK;
      extern uint32_t CUDBG_ENABLE_LAUNCH_BLOCKING;
      extern uint32_t CUDBG_ENABLE_INTEGRATED_MEMCHECK;
      extern uint32_t CUDBG_ENABLE_PREEMPTION_DEBUGGING;
      extern uint32_t CUDBG_RESUME_FOR_ATTACH_DETACH;
      extern uint32_t CUDBG_REPORT_DRIVER_API_ERROR_FLAGS;
      
      
      struct CUDBGAPI_stThe CUDA debugger API routines.  {
          /* Initialization */
          CUDBGResultResult values of all the API routines.  (*initializeInitialize the API. )(void);
          CUDBGResultResult values of all the API routines.  (*finalizeFinalize the API and free all memory. )(void);
      
          /* Device Execution Control */
          CUDBGResultResult values of all the API routines.  (*suspendDeviceSuspends a running CUDA device. )(uint32_t dev);
          CUDBGResultResult values of all the API routines.  (*resumeDeviceResume a suspended CUDA device. )(uint32_t dev);
          CUDBGResultResult values of all the API routines.  (*singleStepWarp40Single step an individual warp on a suspended CUDA device. )(uint32_t dev, uint32_t sm, uint32_t wp);
      
          /* Breakpoints */
          CUDBGResultResult values of all the API routines.  (*setBreakpoint31Sets a breakpoint at the given instruction address. )(uint64_t addr);
          CUDBGResultResult values of all the API routines.  (*unsetBreakpoint31Unsets a breakpoint at the given instruction address. )(uint64_t addr);
      
          /* Device State Inspection */
          CUDBGResultResult values of all the API routines.  (*readGridId50Reads the CUDA grid index running on a valid warp. )(uint32_t dev, uint32_t sm, uint32_t wp, uint32_t *gridId);
          CUDBGResultResult values of all the API routines.  (*readBlockIdx32Reads the two-dimensional CUDA block index running on a valid warp. )(uint32_t dev, uint32_t sm, uint32_t wp, CuDim2 *blockIdx);
          CUDBGResultResult values of all the API routines.  (*readThreadIdxReads the CUDA thread index running on valid lane. )(uint32_t dev, uint32_t sm, uint32_t wp, uint32_t ln, CuDim3 *threadIdx);
          CUDBGResultResult values of all the API routines.  (*readBrokenWarpsReads the bitmask of warps that are at a breakpoint on a given SM. )(uint32_t dev, uint32_t sm, uint64_t *brokenWarpsMask);
          CUDBGResultResult values of all the API routines.  (*readValidWarpsReads the bitmask of valid warps on a given SM. )(uint32_t dev, uint32_t sm, uint64_t *validWarpsMask);
          CUDBGResultResult values of all the API routines.  (*readValidLanesReads the bitmask of valid lanes on a given warp. )(uint32_t dev, uint32_t sm, uint32_t wp, uint32_t *validLanesMask);
          CUDBGResultResult values of all the API routines.  (*readActiveLanesReads the bitmask of active lanes on a valid warp. )(uint32_t dev, uint32_t sm, uint32_t wp, uint32_t *activeLanesMask);
          CUDBGResultResult values of all the API routines.  (*readCodeMemoryReads content at address in the code memory segment. )(uint32_t dev, uint64_t addr, void *buf, uint32_t sz);
          CUDBGResultResult values of all the API routines.  (*readConstMemoryReads content at address in the constant memory segment. )(uint32_t dev, uint64_t addr, void *buf, uint32_t sz);
          CUDBGResultResult values of all the API routines.  (*readGlobalMemory31Reads content at address in the global memory segment. )(uint32_t dev, uint64_t addr, void *buf, uint32_t sz);
          CUDBGResultResult values of all the API routines.  (*readParamMemoryReads content at address in the param memory segment. )(uint32_t dev, uint32_t sm, uint32_t wp, uint64_t addr, void *buf, uint32_t sz);
          CUDBGResultResult values of all the API routines.  (*readSharedMemoryReads content at address in the shared memory segment. )(uint32_t dev, uint32_t sm, uint32_t wp, uint64_t addr, void *buf, uint32_t sz);
          CUDBGResultResult values of all the API routines.  (*readLocalMemoryReads content at address in the local memory segment. )(uint32_t dev, uint32_t sm, uint32_t wp, uint32_t ln, uint64_t addr, void *buf, uint32_t sz);
          CUDBGResultResult values of all the API routines.  (*readRegisterReads content of a hardware register. )(uint32_t dev, uint32_t sm, uint32_t wp, uint32_t ln, uint32_t regno, uint32_t *val);
          CUDBGResultResult values of all the API routines.  (*readPCReads the PC on the given active lane. )(uint32_t dev, uint32_t sm, uint32_t wp, uint32_t ln, uint64_t *pc);
          CUDBGResultResult values of all the API routines.  (*readVirtualPCReads the virtual PC on the given active lane. )(uint32_t dev, uint32_t sm, uint32_t wp, uint32_t ln, uint64_t *pc);
          CUDBGResultResult values of all the API routines.  (*readLaneStatusReads the status of the given lane. For specific error values, use readLaneException. )(uint32_t dev, uint32_t sm, uint32_t wp, uint32_t ln, bool *error);
      
          /* Device State Alteration */
          CUDBGResultResult values of all the API routines.  (*writeGlobalMemory31Writes content to address in the global memory segment. )(uint32_t dev, uint64_t addr, const void *buf, uint32_t sz);
          CUDBGResultResult values of all the API routines.  (*writeParamMemoryWrites content to address in the param memory segment. )(uint32_t dev, uint32_t sm, uint32_t wp, uint64_t addr, const void *buf, uint32_t sz);
          CUDBGResultResult values of all the API routines.  (*writeSharedMemoryWrites content to address in the shared memory segment. )(uint32_t dev, uint32_t sm, uint32_t wp, uint64_t addr, const void *buf, uint32_t sz);
          CUDBGResultResult values of all the API routines.  (*writeLocalMemoryWrites content to address in the local memory segment. )(uint32_t dev, uint32_t sm, uint32_t wp, uint32_t ln, uint64_t addr, const void *buf, uint32_t sz);
          CUDBGResultResult values of all the API routines.  (*writeRegisterWrites content to a hardware register. )(uint32_t dev, uint32_t sm, uint32_t wp, uint32_t ln, uint32_t regno, uint32_t val);
      
          /* Grid Properties */
          CUDBGResultResult values of all the API routines.  (*getGridDim32Get the number of blocks in the given grid. )(uint32_t dev, uint32_t sm, uint32_t wp, CuDim2 *gridDim);
          CUDBGResultResult values of all the API routines.  (*getBlockDimGet the number of threads in the given block. )(uint32_t dev, uint32_t sm, uint32_t wp, CuDim3 *blockDim);
          CUDBGResultResult values of all the API routines.  (*getTIDGet the ID of the Linux thread hosting the context of the grid. )(uint32_t dev, uint32_t sm, uint32_t wp, uint32_t *tid);
          CUDBGResultResult values of all the API routines.  (*getElfImage32Get the relocated or non-relocated ELF image and size for the grid on the given device. )(uint32_t dev, uint32_t sm, uint32_t wp, bool relocated, void **elfImage, uint32_t *size);
      
          /* Device Properties */
          CUDBGResultResult values of all the API routines.  (*getDeviceTypeGet the string description of the device. )(uint32_t dev, char *buf, uint32_t sz);
          CUDBGResultResult values of all the API routines.  (*getSmTypeGet the SM type of the device. )(uint32_t dev, char *buf, uint32_t sz);
          CUDBGResultResult values of all the API routines.  (*getNumDevicesGet the number of installed CUDA devices. )(uint32_t *numDev);
          CUDBGResultResult values of all the API routines.  (*getNumSMsGet the total number of SMs on the device. )(uint32_t dev, uint32_t *numSMs);
          CUDBGResultResult values of all the API routines.  (*getNumWarpsGet the number of warps per SM on the device. )(uint32_t dev, uint32_t *numWarps);
          CUDBGResultResult values of all the API routines.  (*getNumLanesGet the number of lanes per warp on the device. )(uint32_t dev, uint32_t *numLanes);
          CUDBGResultResult values of all the API routines.  (*getNumRegistersGet the number of registers per lane on the device. )(uint32_t dev, uint32_t *numRegs);
      
          /* DWARF-related routines */
          CUDBGResultResult values of all the API routines.  (*getPhysicalRegister30Get the physical register number(s) assigned to a virtual register name 'reg' at a given PC, if 'reg' is live at that PC. )(uint64_t pc, char *reg, uint32_t *buf, uint32_t sz, uint32_t *numPhysRegs, CUDBGRegClassHeader file for the CUDA debugger API.  *regClass);
          CUDBGResultResult values of all the API routines.  (*disassembleDisassemble instruction at instruction address. )(uint32_t dev, uint64_t addr, uint32_t *instSize, char *buf, uint32_t sz);
          CUDBGResultResult values of all the API routines.  (*isDeviceCodeAddress55Determines whether a virtual address resides within device code. This API is strongly deprecated. Use CUDBGAPI_st::isDeviceCodeAddress instead. )(uintptr_t addr, bool *isDeviceAddress);
          CUDBGResultResult values of all the API routines.  (*lookupDeviceCodeSymbolDetermines whether a symbol represents a function in device code and returns its virtual address. )(char *symName, bool *symFound, uintptr_t *symAddr);
      
          /* Events */
          CUDBGResultResult values of all the API routines.  (*setNotifyNewEventCallback31Provides the API with the function to call to notify the debugger of a new application or device event. )(CUDBGNotifyNewEventCallback31function type of the function called to notify debugger of the presence of a new event in the event queue.  callback, void *data);
          CUDBGResultResult values of all the API routines.  (*getNextEvent30Copies the next available event in the event queue into 'event' and removes it from the queue. )(CUDBGEvent30 *event);
          CUDBGResultResult values of all the API routines.  (*acknowledgeEvent30Inform the debugger API that the event has been processed. )(CUDBGEvent30 *event);
      
          /* 3.1 Extensions */
          CUDBGResultResult values of all the API routines.  (*getGridAttributeGet the value of a grid attribute. )(uint32_t dev, uint32_t sm, uint32_t wp, CUDBGAttributeHeader file for the CUDA debugger API.  attr, uint64_t *value);
          CUDBGResultResult values of all the API routines.  (*getGridAttributesGet several grid attribute values in a single API call. )(uint32_t dev, uint32_t sm, uint32_t wp, CUDBGAttributeValuePair *pairs, uint32_t numPairs);
          CUDBGResultResult values of all the API routines.  (*getPhysicalRegister40Get the physical register number(s) assigned to a virtual register name 'reg' at a given PC, if 'reg' is live at that PC. )(uint32_t dev, uint32_t sm, uint32_t wp, uint64_t pc, char *reg, uint32_t *buf, uint32_t sz, uint32_t *numPhysRegs, CUDBGRegClassHeader file for the CUDA debugger API.  *regClass);
          CUDBGResultResult values of all the API routines.  (*readLaneExceptionReads the exception type for a given lane. )(uint32_t dev, uint32_t sm, uint32_t wp, uint32_t ln, CUDBGException_tHeader file for the CUDA debugger API.  *exception);
          CUDBGResultResult values of all the API routines.  (*getNextEvent32Copies the next available event in the event queue into 'event' and removes it from the queue. )(CUDBGEvent32 *event);
          CUDBGResultResult values of all the API routines.  (*acknowledgeEvents42Inform the debugger API that synchronous events have been processed. )(void);
      
          /* 3.1 - ABI */
          CUDBGResultResult values of all the API routines.  (*readCallDepth32Reads the call depth (number of calls) for a given warp. )(uint32_t dev, uint32_t sm, uint32_t wp, uint32_t *depth);
          CUDBGResultResult values of all the API routines.  (*readReturnAddress32Reads the physical return address for a call level. )(uint32_t dev, uint32_t sm, uint32_t wp, uint32_t level, uint64_t *ra);
          CUDBGResultResult values of all the API routines.  (*readVirtualReturnAddress32Reads the virtual return address for a call level. )(uint32_t dev, uint32_t sm, uint32_t wp, uint32_t level, uint64_t *ra);
      
          /* 3.2 Extensions */
          CUDBGResultResult values of all the API routines.  (*readGlobalMemory55Reads content at address in the global memory segment (entire 40-bit VA on Fermi+). )(uint32_t dev, uint32_t sm, uint32_t wp, uint32_t ln, uint64_t addr, void *buf, uint32_t sz);
          CUDBGResultResult values of all the API routines.  (*writeGlobalMemory55Writes content to address in the global memory segment (entire 40-bit VA on Fermi+). )(uint32_t dev, uint32_t sm, uint32_t wp, uint32_t ln, uint64_t addr, const void *buf, uint32_t sz);
          CUDBGResultResult values of all the API routines.  (*readPinnedMemoryReads content at pinned address in system memory. )(uint64_t addr, void *buf, uint32_t sz);
          CUDBGResultResult values of all the API routines.  (*writePinnedMemoryWrites content to pinned address in system memory. )(uint64_t addr, const void *buf, uint32_t sz);
          CUDBGResultResult values of all the API routines.  (*setBreakpointSets a breakpoint at the given instruction address for the given device. Before setting a breakpoint, CUDBGAPI_st::getAdjustedCodeAddress should be called to get the adjusted breakpoint address. )(uint32_t dev, uint64_t addr);
          CUDBGResultResult values of all the API routines.  (*unsetBreakpointUnsets a breakpoint at the given instruction address for the given device. )(uint32_t dev, uint64_t addr);
          CUDBGResultResult values of all the API routines.  (*setNotifyNewEventCallback40Provides the API with the function to call to notify the debugger of a new application or device event. )(CUDBGNotifyNewEventCallback40 callback);
      
          /* 4.0 Extensions */
          CUDBGResultResult values of all the API routines.  (*getNextEvent42Copies the next available event in the event queue into 'event' and removes it from the queue. )(CUDBGEvent42 *event);
          CUDBGResultResult values of all the API routines.  (*readTextureMemoryRead the content of texture memory with given id and coords on sm_20 and lower. )(uint32_t devId, uint32_t vsm, uint32_t wp, uint32_t id, uint32_t dim, uint32_t *coords, void *buf, uint32_t sz);
          CUDBGResultResult values of all the API routines.  (*readBlockIdxReads the CUDA block index running on a valid warp. )(uint32_t dev, uint32_t sm, uint32_t wp, CuDim3 *blockIdx);
          CUDBGResultResult values of all the API routines.  (*getGridDimGet the number of blocks in the given grid. )(uint32_t dev, uint32_t sm, uint32_t wp, CuDim3 *gridDim);
          CUDBGResultResult values of all the API routines.  (*readCallDepthReads the call depth (number of calls) for a given lane. )(uint32_t dev, uint32_t sm, uint32_t wp, uint32_t ln, uint32_t *depth);
          CUDBGResultResult values of all the API routines.  (*readReturnAddressReads the physical return address for a call level. )(uint32_t dev, uint32_t sm, uint32_t wp, uint32_t ln, uint32_t level, uint64_t *ra);
          CUDBGResultResult values of all the API routines.  (*readVirtualReturnAddressReads the virtual return address for a call level. )(uint32_t dev, uint32_t sm, uint32_t wp, uint32_t ln, uint32_t level, uint64_t *ra);
          CUDBGResultResult values of all the API routines.  (*getElfImageGet the relocated or non-relocated ELF image and size for the grid on the given device. )(uint32_t dev, uint32_t sm, uint32_t wp, bool relocated, void **elfImage, uint64_t *size);
      
          /* 4.1 Extensions */
          CUDBGResultResult values of all the API routines.  (*getHostAddrFromDeviceAddrgiven a device virtual address, return a corresponding system memory virtual address. )(uint32_t dev, uint64_t device_addr, uint64_t *host_addr);
          CUDBGResultResult values of all the API routines.  (*singleStepWarpSingle step an individual warp on a suspended CUDA device. )(uint32_t dev, uint32_t sm, uint32_t wp, uint64_t *warpMask);
          CUDBGResultResult values of all the API routines.  (*setNotifyNewEventCallbackProvides the API with the function to call to notify the debugger of a new application or device event. )(CUDBGNotifyNewEventCallbackfunction type of the function called to notify debugger of the presence of a new event in the event queue.  callback);
          CUDBGResultResult values of all the API routines.  (*readSyscallCallDepthReads the call depth of syscalls for a given lane. )(uint32_t dev, uint32_t sm, uint32_t wp, uint32_t ln, uint32_t *depth);
      
          /* 4.2 Extensions */
          CUDBGResultResult values of all the API routines.  (*readTextureMemoryBindlessRead the content of texture memory with given symtab index and coords on sm_30 and higher. )(uint32_t devId, uint32_t vsm, uint32_t wp, uint32_t texSymtabIndex, uint32_t dim, uint32_t *coords, void *buf, uint32_t sz);
      
          /* 5.0 Extensions */
          CUDBGResultResult values of all the API routines.  (*clearAttachStateClear attach-specific state prior to detach. )(void);
          CUDBGResultResult values of all the API routines.  (*getNextSyncEvent50)(CUDBGEvent50 *event);
          CUDBGResultResult values of all the API routines.  (*memcheckReadErrorAddressGet the address that memcheck detected an error on. )(uint32_t dev, uint32_t sm, uint32_t wp, uint32_t ln, uint64_t *address, ptxStorageKind *storage);
          CUDBGResultResult values of all the API routines.  (*acknowledgeSyncEventsInform the debugger API that synchronous events have been processed. )(void);
          CUDBGResultResult values of all the API routines.  (*getNextAsyncEvent50Copies the next available event in the asynchronous event queue into 'event' and removes it from the queue. The asynchronous event queue is held separate from the normal event queue, and does not require acknowledgement from the debug client. )(CUDBGEvent50 *event);
          CUDBGResultResult values of all the API routines.  (*requestCleanupOnDetach55Request for cleanup of driver state when detaching. )(void);
          CUDBGResultResult values of all the API routines.  (*initializeAttachStubInitialize the attach stub. )(void);
          CUDBGResultResult values of all the API routines.  (*getGridStatus50Check whether the grid corresponding to the given gridId is still present on the device. )(uint32_t dev, uint32_t gridId, CUDBGGridStatusGrid status.  *status);
      
          /* 5.5 Extensions */
          CUDBGResultResult values of all the API routines.  (*getNextSyncEvent55Copies the next available event in the synchronous event queue into 'event' and removes it from the queue. )(CUDBGEvent55 *event);
          CUDBGResultResult values of all the API routines.  (*getNextAsyncEvent55Copies the next available event in the asynchronous event queue into 'event' and removes it from the queue. The asynchronous event queue is held separate from the normal event queue, and does not require acknowledgement from the debug client. )(CUDBGEvent55 *event);
          CUDBGResultResult values of all the API routines.  (*getGridInfoGet information about the specified grid. If the context of the grid has already been destroyed, the function will return CUDBG_ERROR_INVALID_GRID, although the grid id is correct. )(uint32_t dev, uint64_t gridId64, CUDBGGridInfoGrid info.  *gridInfo);
          CUDBGResultResult values of all the API routines.  (*readGridIdReads the 64-bit CUDA grid index running on a valid warp. )(uint32_t dev, uint32_t sm, uint32_t wp, uint64_t *gridId64);
          CUDBGResultResult values of all the API routines.  (*getGridStatusCheck whether the grid corresponding to the given gridId is still present on the device. )(uint32_t dev, uint64_t gridId64, CUDBGGridStatusGrid status.  *status);
          CUDBGResultResult values of all the API routines.  (*setKernelLaunchNotificationModeSet the launch notification policy. ) (CUDBGKernelLaunchNotifyModeHeader file for the CUDA debugger API.  mode);
          CUDBGResultResult values of all the API routines.  (*getDevicePCIBusInfoGet PCI bus and device ids associated with device devId. ) (uint32_t devId, uint32_t *pciBusId, uint32_t *pciDevId);
          CUDBGResultResult values of all the API routines.  (*readDeviceExceptionStateGet the exception state of the SMs on the device. ) (uint32_t devId, uint64_t *exceptionSMMask);
      
         /* 6.0 Extensions */
          CUDBGResultResult values of all the API routines.  (*getAdjustedCodeAddressThe client must call this function before inserting a breakpoint, or when the previous or next code address is needed. Returns the adjusted code address for a given code address for a given device. )(uint32_t devId, uint64_t address, uint64_t *adjustedAddress, CUDBGAdjAddrActionHeader file for the CUDA debugger API.  adjAction);
          CUDBGResultResult values of all the API routines.  (*readErrorPCGet the hardware reported error PC if it exists. )(uint32_t devId, uint32_t sm, uint32_t wp, uint64_t *errorPC, bool *errorPCValid);
          CUDBGResultResult values of all the API routines.  (*getNextEventCopies the next available event into 'event' and removes it from the queue. )(CUDBGEventQueueType type, CUDBGEventEvent information container.   *event);
          CUDBGResultResult values of all the API routines.  (*getElfImageByHandleGet the relocated or non-relocated ELF image for the given handle on the given device. )(uint32_t devId, uint64_t handle, CUDBGElfImageType type, void *elfImage, uint64_t size);
          CUDBGResultResult values of all the API routines.  (*resumeWarpsUntilPCInserts a temporary breakpoint at the specified virtual PC, and resumes all warps in the specified bitmask on a given SM. As compared to CUDBGAPI_st::resumeDevice, CUDBGAPI_st::resumeWarpsUntilPC provides finer-grain control by resuming a selected set of warps on the same SM. The main intended usage is to accelerate the single-stepping process when the target PC is known in advance. Instead of single-stepping each warp individually until the target PC is hit, the client can issue this API. When this API is used, errors within CUDA kernels will no longer be reported precisely. In the situation where resuming warps is not possible, this API will return CUDBG_ERROR_WARP_RESUME_NOT_POSSIBLE. The client should then fall back to using CUDBGAPI_st::singleStepWarp or CUDBGAPI_st::resumeDevice. )(uint32_t devId, uint32_t sm, uint64_t warpMask, uint64_t virtPC);
          CUDBGResultResult values of all the API routines.  (*readWarpStateGet state of a given warp. )(uint32_t devId, uint32_t sm, uint32_t wp, CUDBGWarpState *state);
          CUDBGResultResult values of all the API routines.  (*readRegisterRangeReads content of a hardware range of hardware registers. )(uint32_t devId, uint32_t sm, uint32_t wp, uint32_t ln, uint32_t index, uint32_t registers_size, uint32_t *registers);
          CUDBGResultResult values of all the API routines.  (*readGenericMemoryReads content at an address in the generic address space. This function determines if the given address falls into the local, shared, or global memory window. It then accesses memory taking into account the hardware co-ordinates provided as inputs. )(uint32_t dev, uint32_t sm, uint32_t wp, uint32_t ln, uint64_t addr, void *buf, uint32_t sz);
          CUDBGResultResult values of all the API routines.  (*writeGenericMemoryWrites content to an address in the generic address space. This function determines if the given address falls into the local, shared, or global memory window. It then accesses memory taking into account the hardware co-ordinates provided as inputs. )(uint32_t dev, uint32_t sm, uint32_t wp, uint32_t ln, uint64_t addr, const void *buf, uint32_t sz);
          CUDBGResultResult values of all the API routines.  (*readGlobalMemoryReads content at an address in the global address space. If the address is valid on more than one device and one of those devices does not support UVA, an error is returned. )(uint64_t addr, void *buf, uint32_t sz);
          CUDBGResultResult values of all the API routines.  (*writeGlobalMemoryWrites content to an address in the global address space. If the address is valid on more than one device and one of those devices does not support UVA, an error is returned. )(uint64_t addr, const void *buf, uint32_t sz);
          CUDBGResultResult values of all the API routines.  (*getManagedMemoryRegionInfoReturns a sorted list of managed memory regions The sorted list of memory regions starts from a region containing the specified starting address. If the starting address is set to 0, a sorted list of managed memory regions is returned which starts from the managed memory region with the lowest start address. )(uint64_t startAddress, CUDBGMemoryInfo *memoryInfo, uint32_t memoryInfo_size, uint32_t *numEntries);
          CUDBGResultResult values of all the API routines.  (*isDeviceCodeAddressDetermines whether a virtual address resides within device code. )(uintptr_t addr, bool *isDeviceAddress);
          CUDBGResultResult values of all the API routines.  (*requestCleanupOnDetachRequest for cleanup of driver state when detaching. )(uint32_t appResumeFlag);
      
         /* 6.5 Extensions */
          CUDBGResultResult values of all the API routines.  (*readPredicatesReads content of hardware predicate registers. )(uint32_t dev, uint32_t sm, uint32_t wp, uint32_t ln, uint32_t predicates_size, uint32_t *predicates);
          CUDBGResultResult values of all the API routines.  (*writePredicatesWrites content to hardware predicate registers. )(uint32_t dev, uint32_t sm, uint32_t wp, uint32_t ln, uint32_t predicates_size, const uint32_t *predicates);
          CUDBGResultResult values of all the API routines.  (*getNumPredicatesGet the number of predicate registers per lane on the device. )(uint32_t dev, uint32_t *numPredicates);
          CUDBGResultResult values of all the API routines.  (*readCCRegisterReads the hardware CC register. )(uint32_t dev, uint32_t sm, uint32_t wp, uint32_t ln, uint32_t *val);
          CUDBGResultResult values of all the API routines.  (*writeCCRegisterWrites the hardware CC register. )(uint32_t dev, uint32_t sm, uint32_t wp, uint32_t ln, uint32_t val);
      
          CUDBGResultResult values of all the API routines.  (*getDeviceNameGet the device name string. )(uint32_t dev, char *buf, uint32_t sz);
      };
      
      #ifdef __cplusplus
      }
      #endif
      
      
      
      
      
      
      #endif

Classes

struct 
The CUDA debugger API routines.
struct 
Event information container.
struct 
Event information passed to callback set with setNotifyNewEventCallback function.
struct 
Event information passed to callback set with setNotifyNewEventCallback function.
struct 
Grid info.

Debugger API (PDF) - CUDA Toolkit v5.5 for POWER8 (older) - Last updated October 15, 2014 - Send Feedback