Why do I receive the error "CUDA_ERROR_ILLEGAL_ADDRESS" despite of successfully compiling the cuda mex function.

Question

hasan huseyin karaoglu on 22 Mar 2022

0
Link

Direct link to this question

https://in.mathworks.com/matlabcentral/answers/1677524-why-do-i-receive-the-error-cuda_error_illegal_address-despite-of-successfully-compiling-the-cuda-m

Answered: Joss Knight on 24 Mar 2022

Hello! I'm trying to convert a C/MEX file to Cuda Mex file with MATLAB 2019a, CUDA Toolkit version 10.0 and Visual Studio 2015 Professional. The Graphics Processing Unit in my PC is GeForce RTX 2080 Ti. The Cuda MEX codes are given below:

#include "mex.h"
#include <string.h>
#include "gpu/mxGPUArray.h"
/* Input Arguments */
#define	X_IN   prhs[0] // Image
#define SZ_IN  prhs[1] // Patch Size
#define S_IN   prhs[2] // Step Size
/* Output Arguments */
#define	B_OUT	plhs[0] // Output (Columnized Patches)
void __global__ im2colstep2GPU(double const *x, double *b, mwSize sz[], mwSize stepsize[], mwSize n[]){
   mwIndex i, j, k, l, m, blocknum;
   
   mwSize NN = sz[0] * sz[1] *sz[2] ;
   mwSize nn = blockDim.x * blockIdx.x + threadIdx.x;
   // int total_threads = gridDim.x * blockDim.x;
   /* Do the actual computation */
   blocknum = 0;
   
for(; nn<NN; nn++){   
   /* iterate over all blocks */
   for (k=0; k<=n[2]-sz[2]; k+=stepsize[2]) {
     for (j=0; j<=n[1]-sz[1]; j+=stepsize[1]) {
       for (i=0; i<=n[0]-sz[0]; i+=stepsize[0]) {
          
         /* copy single block */
         for (m=0; m<sz[2]; m++) {
           for (l=0; l<sz[1]; l++) {
              // b[blocknum*sz[0]*sz[1]*sz[2] + m*sz[0]*sz[1] + l*sz[0]] = x[(k + m)*n[0]*n[1] + (j+l)*n[0] + i];
             memcpy(b + blocknum*sz[0]*sz[1]*sz[2] + m*sz[0]*sz[1] + l*sz[0], x+(k+m)*n[0]*n[1]+(j+l)*n[0]+i, sz[0]*sizeof(double));
           }
         }
         blocknum++;
      }
     }
   }
}    
   return;
}
void mexFunction(int nlhs, mxArray *plhs[], 
		             int nrhs, const mxArray*prhs[])
     
{ 
    mxGPUArray const *x;
    mxGPUArray *b;
    double *s;
    double const *device_x;
    double *device_b;
    mwSize sz[3], stepsize[3], n[3], ndims;
        
    /* Check for proper number of arguments */
    
    if (nrhs < 2 || nrhs > 3) {
      mexErrMsgTxt("Invalid number of input arguments."); 
    } else if (nlhs > 1) {
      mexErrMsgTxt("Too many output arguments."); 
    } 
    
    mxInitGPU();
    
    /* Check the the input dimensions */ 
    
//     ndims = mxGetNumberOfDimensions(X_IN);
//     mexPrintf("\n number of dimensions is %d", ndims);
// 
//     if (!mxIsDouble(X_IN) || mxIsComplex || ndims>3) {
//       mexErrMsgTxt("X should be a 2-D or 3-D double matrix.");
//     }
        /* Get Input Arrays*/
    x = mxGPUCreateFromMxArray(X_IN);
    
    /* Check the the input dimensions */ 
    ndims = mxGPUGetNumberOfDimensions(x);
    
    if (mxGPUGetClassID(x) != mxDOUBLE_CLASS  || mxGPUGetClassID(x) == mxCOMPLEX || ndims>3) {
      mexErrMsgTxt("X should be a 2-D or 3-D double matrix.");
    }
    if (!mxIsDouble(SZ_IN) || mxIsComplex(SZ_IN) || mxGetNumberOfDimensions(SZ_IN)>2 || mxGetM(SZ_IN)*mxGetN(SZ_IN)!=ndims) {
      mexErrMsgTxt("Invalid block size.");
    }
    if (nrhs == 3) {
      if (!mxIsDouble(S_IN) || mxIsComplex(S_IN) || mxGetNumberOfDimensions(S_IN)>2 || mxGetM(S_IN)*mxGetN(S_IN)!=ndims) {
        mexErrMsgTxt("Invalid step size.");
      }
    }
    
    /* Get parameters */
    s = mxGetPr(SZ_IN);
    
    if (s[0]<1 || s[1]<1 || (ndims==3 && s[2]<1)) {
      mexErrMsgTxt("Invalid block size.");
    }
    sz[0] = (mwSize)(s[0] + 0.01);
    sz[1] = (mwSize)(s[1] + 0.01);
    sz[2] = ndims==3 ? (mwSize)(s[2] + 0.01) : 1;
    
    if (nrhs == 3) {
      s = mxGetPr(S_IN);
      if (s[0]<1 || s[1]<1 || (ndims==3 && s[2]<1)) {
        mexErrMsgTxt("Invalid step size.");
      }
      stepsize[0] = (mwSize)(s[0] + 0.01);
      stepsize[1] = (mwSize)(s[1] + 0.01);
      stepsize[2] = ndims==3 ? (mwSize)(s[2] + 0.01) : 1;
    }
    else {
      stepsize[0] = stepsize[1] = stepsize[2] = 1;
    }
    
    n[0] = (mxGPUGetDimensions(x))[0];
    n[1] = (mxGPUGetDimensions(x))[1];
    n[2] = ndims==3 ? (mxGPUGetDimensions(x))[2] : 1;
    
    if (n[0]<sz[0] || n[1]<sz[1] || (ndims==3 && n[2]<sz[2])) {
      mexErrMsgTxt("Block size too large.");
    }
    
    /* Create a matrix for the return argument */
    // b = mxGetPr(B_OUT);
    
    mwSize b_dims[2];
    b_dims[0] = (sz[0]*sz[1]*sz[2]);
    b_dims[1] = ((n[0]-sz[0])/stepsize[0]+1)*((n[1]-sz[1])/stepsize[1]+1)*((n[2]-sz[2])/stepsize[2]+1);
    
    b = mxGPUCreateGPUArray(2, b_dims, mxGPUGetClassID(x), /* Create the output matrix */
                            mxREAL, MX_GPU_INITIALIZE_VALUES);
    device_x = (double const *)(mxGPUGetDataReadOnly(x));
    device_b = (double *)(mxGPUGetData(b));
    /* Invoke Kernel*/
    int NN = mxGPUGetNumberOfElements(x);
    int const threadsPerBlock  = 256;
    int blocksPerGrid = (NN + threadsPerBlock - 1)/threadsPerBlock;
    im2colstep2GPU<<<blocksPerGrid, threadsPerBlock>>>
              (device_x, device_b, sz, stepsize, n);
    
    B_OUT = mxGPUCreateMxArrayOnGPU(b);
    
    /*
    * the mxGPUArray pointers are host-side structures that refer to
    * device data. These must be destroyed before leaving the MEX
    * function.
    */
    mxGPUDestroyGPUArray(x);
    mxGPUDestroyGPUArray(b);  
}

I have succesfully compiled the CUDA Mex function. But, when I enter the code below, I have taken the following error:

>> im2colstep2GPU(gpuArray(A),[3,3])

ERROR CODE

Error using gpuArray/gather
An unexpected error occurred during CUDA execution. The CUDA error was:
CUDA_ERROR_ILLEGAL_ADDRESS
Error in
parallel.internal.shared.buildDisplayHelper>iFirstNNumericDisplayHelper
(line 73)
        maybeTruncatedValue = gather( x );
Error in parallel.internal.shared.buildDisplayHelper>iBuildDisplayHelper
(line 33)
    dh = iFirstNNumericDisplayHelper( ...
Error in parallel.internal.shared.buildDisplayHelper (line 24)
    dh = iBuildDisplayHelper( x, transferDenseFcn, transferSparseFcn,
    xClassName, xName, N );
Error in dispInternal (line 13)
    dh = parallel.internal.shared.buildDisplayHelper( ...
Error in gpuArray/display (line 21)
dh = dispInternal( obj, thisClassName, objName );

Actually, it seems like that the computation have been accomplished when looking at the workspace(at least, the size of the output matches with the expected output variable). However, when I click on the output variable in the workspace, I take the following figure:

Can anyone help me on the subject of what I do for fixing this error.

0 Comments
Show -2 older commentsHide -2 older comments

Sign in to comment.

Sign in to answer this question.

Answer 1

Joss Knight on 24 Mar 2022

0
Link

Direct link to this answer

https://in.mathworks.com/matlabcentral/answers/1677524-why-do-i-receive-the-error-cuda_error_illegal_address-despite-of-successfully-compiling-the-cuda-m#answer_926174

Open in MATLAB Online

I suspect your kernel is writing into out-of-bounds memory. The next time the device synchronizes the error will be reported, which in your case was when you tried to display the gpuArray. To detect it earlier for debugging, add cudaDeviceSynchronize after your kernel is launched and then inspect the result of cudaGetLastError.

Exactly where your kernel is writing out of bounds will require some debugging. I suggest start by compiling your MEX functions with the -G and -g options (you may also need to add NVCC_FLAGS=-lineinfo as well to narrow it down to a line of code), then using the CUDA toolkit utility cuda-memcheck to detect the illegal access. I tend to write a script that reproduces the error and then launch it from a terminal using matlab -batch, something like

cuda-memcheck matlab -batch "myScript()"

Obviously a more laborious way to do this involves either using the NSight debugger or putting printf statements in your kernel. Note that MEX overloads printf (to display to the MATLAB command window) so you need put #undef printf at the top of your file to stop that happening. Also, try to run your kernel with the smallest possible matrix to avoid millions of lines of output. Note: the display will output to the terminal from which you launched MATLAB. Also, on Windows you need to run MATLAB with the options -wait -log to capture the output stream.

Hope this helps you find the problem. Let us know how you did!

0 Comments
Show -2 older commentsHide -2 older comments

Sign in to comment.

Why do I receive the error "CUDA_ERROR_ILLEGAL_ADDRESS" despite of successfully compiling the cuda mex function.

0 Comments
Show -2 older commentsHide -2 older comments

Answers (1)

0 Comments
Show -2 older commentsHide -2 older comments

See Also

Categories

Tags

Products

Release

Community Treasure Hunt

Why do I receive the error "CUDA_ERRO​R_ILLEGAL_​ADDRESS" despite of successfully compiling the cuda mex function.

0 Comments Show -2 older commentsHide -2 older comments

Answers (1)

0 Comments Show -2 older commentsHide -2 older comments

See Also

Categories

Tags

Products

Release

Community Treasure Hunt

Why do I receive the error "CUDA_ERROR_ILLEGAL_ADDRESS" despite of successfully compiling the cuda mex function.

0 Comments
Show -2 older commentsHide -2 older comments

0 Comments
Show -2 older commentsHide -2 older comments