Main Content

Lane Detection Optimized with GPU Coder

This example shows how to generate CUDA® code from a deep learning network, represented by a SeriesNetwork object. In this example, the series network is a convolutional neural network that can detect and output lane marker boundaries from an image.

Prerequisites

  • CUDA enabled NVIDIA® GPU.

  • NVIDIA CUDA toolkit and driver.

  • NVIDIA cuDNN library.

  • OpenCV libraries for video read and image display operations.

  • Environment variables for the compilers and libraries. For information on the supported versions of the compilers and libraries, see Third-Party Hardware. For setting up the environment variables, see Setting Up the Prerequisite Products.

Verify GPU Environment

Use the coder.checkGpuInstall function to verify that the compilers and libraries necessary for running this example are set up correctly.

envCfg = coder.gpuEnvConfig('host');
envCfg.DeepLibTarget = 'cudnn';
envCfg.DeepCodegen = 1;
envCfg.Quiet = 1;
coder.checkGpuInstall(envCfg);

Get Pretrained SeriesNetwork

[laneNet, coeffMeans, coeffStds] = getLaneDetectionNetworkGPU();

This network takes an image as an input and outputs two lane boundaries that correspond to the left and right lanes of the ego vehicle. Each lane boundary is represented by the parabolic equation: y=ax2+bx+c, where y is the lateral offset and x is the longitudinal distance from the vehicle. The network outputs the three parameters a, b, and c per lane. The network architecture is similar to AlexNet except that the last few layers are replaced by a smaller fully connected layer and regression output layer. To view the network architecture, use the analyzeNetwork function.

analyzeNetwork(laneNet)

Examine Main Entry-Point Function

type detect_lane.m
function [laneFound, ltPts, rtPts] = detect_lane(frame, laneCoeffMeans, laneCoeffStds) 
% From the networks output, compute left and right lane points in the image
% coordinates. The camera coordinates are described by the caltech mono
% camera model.

%#codegen

% A persistent object mynet is used to load the series network object. At
% the first call to this function, the persistent object is constructed and
% setup. When the function is called subsequent times, the same object is
% reused to call predict on inputs, thus avoiding reconstructing and
% reloading the network object.
persistent lanenet;

if isempty(lanenet)
    lanenet = coder.loadDeepLearningNetwork('laneNet.mat', 'lanenet');
end

lanecoeffsNetworkOutput = lanenet.predict(permute(frame, [2 1 3]));

% Recover original coeffs by reversing the normalization steps

params = lanecoeffsNetworkOutput .* laneCoeffStds + laneCoeffMeans;

isRightLaneFound = abs(params(6)) > 0.5; %c should be more than 0.5 for it to be a right lane
isLeftLaneFound =  abs(params(3)) > 0.5;

vehicleXPoints = 3:30; %meters, ahead of the sensor
ltPts = coder.nullcopy(zeros(28,2,'single'));
rtPts = coder.nullcopy(zeros(28,2,'single'));

if isRightLaneFound && isLeftLaneFound
    rtBoundary = params(4:6);		
	rt_y = computeBoundaryModel(rtBoundary, vehicleXPoints);
	ltBoundary = params(1:3);
	lt_y = computeBoundaryModel(ltBoundary, vehicleXPoints);
	
	% Visualize lane boundaries of the ego vehicle
    tform = get_tformToImage;
    % map vehicle to image coordinates
    ltPts =  tform.transformPointsInverse([vehicleXPoints', lt_y']);
    rtPts =  tform.transformPointsInverse([vehicleXPoints', rt_y']);
	laneFound = true;
else
	laneFound = false;
end

end

function yWorld = computeBoundaryModel(model, xWorld)
	yWorld = polyval(model, xWorld);	
end

function tform = get_tformToImage 
% Compute extrinsics based on camera setup
yaw = 0;
pitch = 14; % pitch of the camera in degrees
roll = 0;

translation = translationVector(yaw, pitch, roll);
rotation    = rotationMatrix(yaw, pitch, roll);

% Construct a camera matrix
focalLength    = [309.4362, 344.2161];
principalPoint = [318.9034, 257.5352];
Skew = 0;

camMatrix = [rotation; translation] * intrinsicMatrix(focalLength, ...
	Skew, principalPoint);

% Turn camMatrix into 2-D homography
tform2D = [camMatrix(1,:); camMatrix(2,:); camMatrix(4,:)]; % drop Z

tform = projective2d(tform2D);
tform = tform.invert();
end

function translation = translationVector(yaw, pitch, roll)
SensorLocation = [0 0];
Height = 2.1798;    % mounting height in meters from the ground
rotationMatrix = (...
	rotZ(yaw)*... % last rotation
	rotX(90-pitch)*...
	rotZ(roll)... % first rotation
	);


% Adjust for the SensorLocation by adding a translation
sl = SensorLocation;

translationInWorldUnits = [sl(2), sl(1), Height];
translation = translationInWorldUnits*rotationMatrix;
end

%------------------------------------------------------------------
% Rotation around X-axis
function R = rotX(a)
a = deg2rad(a);
R = [...
	1   0        0;
	0   cos(a)  -sin(a);
	0   sin(a)   cos(a)];

end

%------------------------------------------------------------------
% Rotation around Y-axis
function R = rotY(a)
a = deg2rad(a);
R = [...
	cos(a)  0 sin(a);
	0       1 0;
	-sin(a) 0 cos(a)];

end

%------------------------------------------------------------------
% Rotation around Z-axis
function R = rotZ(a)
a = deg2rad(a);
R = [...
	cos(a) -sin(a) 0;
	sin(a)  cos(a) 0;
	0       0      1];
end

%------------------------------------------------------------------
% Given the Yaw, Pitch, and Roll, determine the appropriate Euler angles
% and the sequence in which they are applied to align the camera's
% coordinate system with the vehicle coordinate system. The resulting
% matrix is a Rotation matrix that together with the Translation vector
% defines the extrinsic parameters of the camera.
function rotation = rotationMatrix(yaw, pitch, roll)

rotation = (...
	rotY(180)*...            % last rotation: point Z up
	rotZ(-90)*...            % X-Y swap
	rotZ(yaw)*...            % point the camera forward
	rotX(90-pitch)*...       % "un-pitch"
	rotZ(roll)...            % 1st rotation: "un-roll"
	);
end

function intrinsicMat = intrinsicMatrix(FocalLength, Skew, PrincipalPoint)
intrinsicMat = ...
	[FocalLength(1)  , 0                     , 0; ...
	 Skew             , FocalLength(2)   , 0; ...
	 PrincipalPoint(1), PrincipalPoint(2), 1];
end

Generate Code for Network and Post-Processing Code

The network computes parameters a, b, and c that describe the parabolic equation for the left and right lane boundaries.

From these parameters, compute the x and y coordinates corresponding to the lane positions. The coordinates must be mapped to image coordinates. The function detect_lane.m performs all these computations. Generate CUDA code for this function by creating a GPU code configuration object for a 'lib' target and set the target language to C++. Use the coder.DeepLearningConfig function to create a CuDNN deep learning configuration object and assign it to the DeepLearningConfig property of the GPU code configuration object. Run the codegen command.

cfg = coder.gpuConfig('lib');
cfg.DeepLearningConfig = coder.DeepLearningConfig('cudnn');
cfg.GenerateReport = true;
cfg.TargetLang = 'C++';
inputs = {ones(227,227,3,'single'),ones(1,6,'double'),ones(1,6,'double')};
codegen -args inputs -config cfg detect_lane
Code generation successful: View report

Generated Code Description

The series network is generated as a C++ class containing an array of 23 layer classes.

class c_lanenet {
 public:
  int32_T batchSize; int32_T numLayers; real32_T *inputData; real32_T
  *outputData; MWCNNLayer *layers[23];
 public:
  c_lanenet(void); void setup(void); void predict(void); void
  cleanup(void); ~c_lanenet(void);
};

The setup() method of the class sets up handles and allocates memory for each layer object. The predict() method invokes prediction for each of the 23 layers in the network.

The cnn_lanenet_conv*_w and cnn_lanenet_conv*_b files are the binary weights and bias file for convolution layer in the network. The cnn_lanenet_fc*_w and cnn_lanenet_fc*_b files are the binary weights and bias file for fully connected layer in the network.

codegendir = fullfile('codegen', 'lib', 'detect_lane');
dir(codegendir)
.                                      MWReLULayer.o                          
..                                     MWReLULayerImpl.cu                     
.gitignore                             MWReLULayerImpl.hpp                    
DeepLearningNetwork.cu                 MWReLULayerImpl.o                      
DeepLearningNetwork.h                  MWTargetNetworkImpl.cu                 
DeepLearningNetwork.o                  MWTargetNetworkImpl.hpp                
MWCNNLayer.cpp                         MWTargetNetworkImpl.o                  
MWCNNLayer.hpp                         MWTensor.hpp                           
MWCNNLayer.o                           MWTensorBase.cpp                       
MWCNNLayerImpl.cu                      MWTensorBase.hpp                       
MWCNNLayerImpl.hpp                     MWTensorBase.o                         
MWCNNLayerImpl.o                       _clang-format                          
MWCUSOLVERUtils.cpp                    buildInfo.mat                          
MWCUSOLVERUtils.hpp                    cnn_lanenet0_0_conv1_b.bin             
MWCUSOLVERUtils.o                      cnn_lanenet0_0_conv1_w.bin             
MWCudaDimUtility.hpp                   cnn_lanenet0_0_conv2_b.bin             
MWCustomLayerForCuDNN.cpp              cnn_lanenet0_0_conv2_w.bin             
MWCustomLayerForCuDNN.hpp              cnn_lanenet0_0_conv3_b.bin             
MWCustomLayerForCuDNN.o                cnn_lanenet0_0_conv3_w.bin             
MWElementwiseAffineLayer.cpp           cnn_lanenet0_0_conv4_b.bin             
MWElementwiseAffineLayer.hpp           cnn_lanenet0_0_conv4_w.bin             
MWElementwiseAffineLayer.o             cnn_lanenet0_0_conv5_b.bin             
MWElementwiseAffineLayerImpl.cu        cnn_lanenet0_0_conv5_w.bin             
MWElementwiseAffineLayerImpl.hpp       cnn_lanenet0_0_data_offset.bin         
MWElementwiseAffineLayerImpl.o         cnn_lanenet0_0_data_scale.bin          
MWElementwiseAffineLayerImplKernel.cu  cnn_lanenet0_0_fc6_b.bin               
MWElementwiseAffineLayerImplKernel.o   cnn_lanenet0_0_fc6_w.bin               
MWFCLayer.cpp                          cnn_lanenet0_0_fcLane1_b.bin           
MWFCLayer.hpp                          cnn_lanenet0_0_fcLane1_w.bin           
MWFCLayer.o                            cnn_lanenet0_0_fcLane2_b.bin           
MWFCLayerImpl.cu                       cnn_lanenet0_0_fcLane2_w.bin           
MWFCLayerImpl.hpp                      cnn_lanenet0_0_responseNames.txt       
MWFCLayerImpl.o                        codeInfo.mat                           
MWFusedConvReLULayer.cpp               codedescriptor.dmr                     
MWFusedConvReLULayer.hpp               compileInfo.mat                        
MWFusedConvReLULayer.o                 defines.txt                            
MWFusedConvReLULayerImpl.cu            detect_lane.a                          
MWFusedConvReLULayerImpl.hpp           detect_lane.cu                         
MWFusedConvReLULayerImpl.o             detect_lane.h                          
MWInputLayer.cpp                       detect_lane.o                          
MWInputLayer.hpp                       detect_lane_data.cu                    
MWInputLayer.o                         detect_lane_data.h                     
MWInputLayerImpl.hpp                   detect_lane_data.o                     
MWKernelHeaders.hpp                    detect_lane_initialize.cu              
MWMaxPoolingLayer.cpp                  detect_lane_initialize.h               
MWMaxPoolingLayer.hpp                  detect_lane_initialize.o               
MWMaxPoolingLayer.o                    detect_lane_internal_types.h           
MWMaxPoolingLayerImpl.cu               detect_lane_rtw.mk                     
MWMaxPoolingLayerImpl.hpp              detect_lane_terminate.cu               
MWMaxPoolingLayerImpl.o                detect_lane_terminate.h                
MWNormLayer.cpp                        detect_lane_terminate.o                
MWNormLayer.hpp                        detect_lane_types.h                    
MWNormLayer.o                          examples                               
MWNormLayerImpl.cu                     gpu_codegen_info.mat                   
MWNormLayerImpl.hpp                    html                                   
MWNormLayerImpl.o                      interface                              
MWOutputLayer.cpp                      mean.bin                               
MWOutputLayer.hpp                      predict.cu                             
MWOutputLayer.o                        predict.h                              
MWOutputLayerImpl.cu                   predict.o                              
MWOutputLayerImpl.hpp                  rtw_proj.tmw                           
MWOutputLayerImpl.o                    rtwtypes.h                             
MWReLULayer.cpp                        
MWReLULayer.hpp                        

Generate Additional Files for Post-Processing the Output

Export mean and std values from the trained network for use during execution.

codegendir = fullfile(pwd, 'codegen', 'lib','detect_lane');
fid = fopen(fullfile(codegendir,'mean.bin'), 'w');
A = [coeffMeans coeffStds];
fwrite(fid, A, 'double');
fclose(fid);

Main File

Compile the network code by using a main file. The main file uses the OpenCV VideoCapture method to read frames from the input video. Each frame is processed and classified until no more frames are read. Before displaying the output for each frame, the outputs are post-processed by using the detect_lane function generated in detect_lane.cu.

type main_lanenet.cu
/* Copyright 2016 The MathWorks, Inc. */

#include <stdio.h>
#include <stdlib.h>
#include <cuda.h>
#include <opencv2/opencv.hpp>
#include <opencv2/imgproc.hpp>
#include <opencv2/core/core.hpp>
#include <opencv2/core/types.hpp>
#include <opencv2/highgui.hpp>
#include <list>
#include <cmath>
#include "detect_lane.h"

using namespace cv;
void readData(float *input, Mat& orig, Mat & im)
{
	Size size(227,227);
	resize(orig,im,size,0,0,INTER_LINEAR);
	for(int j=0;j<227*227;j++)
	{
		//BGR to RGB
		input[2*227*227+j]=(float)(im.data[j*3+0]);
		input[1*227*227+j]=(float)(im.data[j*3+1]);
		input[0*227*227+j]=(float)(im.data[j*3+2]);
	}
}

void addLane(float pts[28][2], Mat & im, int numPts)
{
    std::vector<Point2f> iArray;
    for(int k=0; k<numPts; k++) 
    {
        iArray.push_back(Point2f(pts[k][0],pts[k][1]));    
    }	
    Mat curve(iArray, true);
    curve.convertTo(curve, CV_32S); //adapt type for polylines
    polylines(im, curve, false, CV_RGB(255,255,0), 2, LINE_AA);
}


void writeData(float *outputBuffer, Mat & im, int N, double means[6], double stds[6])
{
    // get lane coordinates
    boolean_T laneFound = 0;	
    float ltPts[56];
    float rtPts[56];	
    detect_lane(outputBuffer, means, stds, &laneFound, ltPts, rtPts);    
	
	if (!laneFound)
	{
		return;
	}
	
	float ltPtsM[28][2];
	float rtPtsM[28][2];
	for(int k=0; k<28; k++)
	{
		ltPtsM[k][0] = ltPts[k];
		ltPtsM[k][1] = ltPts[k+28];
		rtPtsM[k][0] = rtPts[k];
		rtPtsM[k][1] = rtPts[k+28];   
	}		  

	addLane(ltPtsM, im, 28);
	addLane(rtPtsM, im, 28);
}

void readMeanAndStds(const char* filename, double means[6], double stds[6])
{
    FILE* pFile = fopen(filename, "rb");
    if (pFile==NULL)
    {
        fputs ("File error",stderr);
        return;
    }

    // obtain file size
    fseek (pFile , 0 , SEEK_END);
    long lSize = ftell(pFile);
    rewind(pFile);
    
    double* buffer = (double*)malloc(lSize);
    
    size_t result = fread(buffer,sizeof(double),lSize,pFile);
    if (result*sizeof(double) != lSize) {    
        fputs ("Reading error",stderr);
        return;
    }
    
    for (int k = 0 ; k < 6; k++)
    {
        means[k] = buffer[k];
        stds[k] = buffer[k+6];
    }
    free(buffer);        
}


// Main function
int main(int argc, char* argv[])
{    
	
    float *inputBuffer = (float*)calloc(sizeof(float),227*227*3);
    float *outputBuffer = (float*)calloc(sizeof(float),6);

    if ((inputBuffer == NULL) || (outputBuffer == NULL)) {
        printf("ERROR: Input/Output buffers could not be allocated!\n");
        exit(-1);
    }
    
    // get ground truth mean and std
    double means[6];
    double stds[6];	
    readMeanAndStds("mean.bin", means, stds);	
	
	if (argc < 2)
    {
        printf("Pass in input video file name as argument\n");
        return -1;
    }
    
    VideoCapture cap(argv[1]);
    if (!cap.isOpened()) {
        printf("Could not open the video capture device.\n");
        return -1;
    }

    cudaEvent_t start, stop;
    float fps = 0;
    cudaEventCreate(&start);
    cudaEventCreate(&stop);    
    Mat orig, im;    
    namedWindow("Lane detection demo",WINDOW_NORMAL);
    while(true)
    {
        cudaEventRecord(start);
        cap >> orig;
        if (orig.empty()) break;                
        readData(inputBuffer, orig, im);		

        writeData(inputBuffer, orig, 6, means, stds);
        
        cudaEventRecord(stop);
        cudaEventSynchronize(stop);
        
        char strbuf[50];
        float milliseconds = -1.0; 
        cudaEventElapsedTime(&milliseconds, start, stop);
        fps = fps*.9+1000.0/milliseconds*.1;
        sprintf (strbuf, "%.2f FPS", fps);
        putText(orig, strbuf, Point(200,30), FONT_HERSHEY_DUPLEX, 1, CV_RGB(0,0,0), 2);
        imshow("Lane detection demo", orig); 		
        if( waitKey(50)%256 == 27 ) break; // stop capturing by pressing ESC	*/       
    }
    destroyWindow("Lane detection demo");
	
    free(inputBuffer);
    free(outputBuffer);
        
    return 0;
}

Download Example Video

if ~exist('./caltech_cordova1.avi', 'file')
    url = 'https://www.mathworks.com/supportfiles/gpucoder/media/caltech_cordova1.avi';
    websave('caltech_cordova1.avi', url);
end

Build Executable

if ispc
    setenv('MATLAB_ROOT', matlabroot);
    vcvarsall = mex.getCompilerConfigurations('C++').Details.CommandLineShell;
    setenv('VCVARSALL', vcvarsall);
    system('make_win_lane_detection.bat');
    cd(codegendir);
    system('lanenet.exe ..\..\..\caltech_cordova1.avi');
else
    setenv('MATLAB_ROOT', matlabroot);
    system('make -f Makefile_lane_detection.mk');
    cd(codegendir);
    system('./lanenet ../../../caltech_cordova1.avi');
end

Input Screenshot

Output Screenshot

See Also

Functions

Objects

Related Topics