How can i modify step 3 section of code, to visualize several objects in the image?

%% Step 1: Define Paths and Initialize Data
% Define paths to train, test, and valid folders
baseFolder = 'C:\Users\grace\Downloads\dataset';
folders = {'train', 'test', 'valid'};
% Initialize a structure to store data tables for each set
datasetTables = struct();
% Iterate over each dataset folder (train, test, valid)
for f = 1:numel(folders)
datasetFolder = fullfile(baseFolder, folders{f});
% Step 1: Load class names from _darknet.labels (only need to load once)
if f == 1
labelFilePath = fullfile(datasetFolder, '_darknet.labels');
classNames = readlines(labelFilePath); % Read class names into a string array
disp('Class names in _darknet.labels:');
disp(classNames);
end
%AB
classNames = replace(classNames, " ", "_");
% Step 2: Create an ImageDatastore for images
imageFiles = dir(fullfile(datasetFolder, '*.jpg')); % Get all .jpg files
imagePaths = fullfile(datasetFolder, {imageFiles.name}); % Full paths to images
imds = imageDatastore(imagePaths); % Create ImageDatastore
% Step 3: Load annotations from .txt files
annotationFiles = dir(fullfile(datasetFolder, '*.txt')); % Get all .txt files
annotationFiles = annotationFiles(~strcmp({annotationFiles.name}, '_darknet.labels')); % Exclude _darknet.labels
annotationPaths = fullfile(datasetFolder, {annotationFiles.name}); % Full paths to annotations
% Initialize a cell array to store annotations
annotations = cell(numel(annotationPaths), 1);
% Read annotations from each .txt file
for i = 1:numel(annotationPaths)
fileID = fopen(annotationPaths{i}, 'r');
annotations{i} = fscanf(fileID, '%c'); % Read the entire file as text
fclose(fileID);
end
% Step 4: Verify alignment between images and annotations
[~, imageNames] = cellfun(@fileparts, imagePaths, 'UniformOutput', false);
[~, annotationNames] = cellfun(@fileparts, annotationPaths, 'UniformOutput', false);
if ~isequal(imageNames, annotationNames)
error('Image and annotation files in %s do not match. Check file names.', folders{f});
end
% Step 5: Prepare data table for this dataset
firstImage = imread(imagePaths{1});
imageSize = size(firstImage); % imageSize = [height, width, channels]
imageSize = imageSize(1:2); % Extract only height and width
% Initialize a table to store image file names, bounding boxes, and labels
% @ABAssume classNames is an Mx1 string array
M = numel(classNames);
% Define variable names: 'imageFilename' followed by class names
variableNames = ["imageFilename", classNames']; % Convert to row for concatenation
% Define variable types: first column as 'string', the rest as 'cell'
variableTypes = ["string", repmat("cell", 1, M)];
% Create the table
data = table('Size', [numel(imagePaths), M+1], ...
'VariableTypes', cellstr(variableTypes), ...
'VariableNames', cellstr(variableNames));
% @AB OLD data = table('Size', [numel(imagePaths), 3], ...
% 'VariableTypes', {'string', 'cell', 'cell'}, ... % Ensure 'boxes' and 'labels' are cell arrays
% 'VariableNames', {'imageFilename', 'boxes', 'labels'});
% Parse annotations and populate the table
for i = 1:numel(imagePaths)
annotation = annotations{i};
annotationLines = splitlines(annotation); % Split into individual lines
% @AB Initialize arrays to store bounding boxes and labels for this image
%boxes = [];
%labels = [];
boxes = cell(1,M);
%labels = cell(1,M);
% Parse each line of the annotation
for j = 1:numel(annotationLines)
if isempty(annotationLines{j})
continue; % Skip empty lines
end
% Split the line into components (YOLO format: class_id x_center y_center width height)
components = str2double(strsplit(annotationLines{j}));
class_id = components(1) + 1; % YOLO class IDs start from 0, MATLAB starts from 1
x_center = components(2);
y_center = components(3);
width = components(4);
height = components(5);
% Convert YOLO format to [x_min y_min width height]
x_min = (x_center - width / 2) * imageSize(2); % imageSize(2) = width
y_min = (y_center - height / 2) * imageSize(1); % imageSize(1) = height
boxes{class_id} = [boxes{class_id}; x_min y_min width * imageSize(2) height * imageSize(1)];
% Add the corresponding label
% labels{class} = [labels; classNames(class_id)];
end
% Debug: Display labels for the current image
% disp(['Labels for image ', num2str(i), ':']);
% disp(labels);
% Add the data to the table
data.imageFilename(i) = imagePaths{i};
%data.boxes{i} = {boxes}; % Wrap in a cell array
for jj=1:M
data.(classNames{jj}){i}=boxes{jj};
end
%data.boxes{i} = {boxes}; % Wrap in a cell array
%data.labels{i} = {categorical(labels, classNames)}; % Wrap in a cell array
end
% Store the data table in the datasetTables structure
datasetTables.(folders{f}) = data;
% Display a preview of the first few rows
fprintf('Dataset: %s\n', folders{f});
disp(head(data));
end
% Load the training data
trainingData = datasetTables.train;
testingData = datasetTables.test;
validData = datasetTables.valid;
%% Step 2: Verify Bounding Boxes and Labels
% Verify bounding box format
numRows = size(trainingData, 1); % Use size instead of height
disp("Number of rows in trainData: " + numRows);
for i = 1:numRows
flag_empty=true;
% if isempty(trainingData.boxes{i})
for jj=1:M
if ~isempty(trainingData.(classNames{jj}){i})
flag_empty=false;
break;
end
end
%if isempty(trainingData.boxes{i})
if flag_empty
error('Bounding boxes for image %s are missing.', trainingData.imageFilename(i));
end
% bbox = trainingData.boxes{i}{1}; % Extract bounding boxes for the current image
%if size(bbox, 2) ~= 4
% error('Bounding boxes for image %s are not in [x_min y_min width height] format.', trainData.imageFilename(i));
%end
end
% Verify labels are categorical
%for i = 1:numRows
% labels = trainingData.labels{i}{1}; % Extract labels for the current image
% if ~iscategorical(labels)
% error('Labels for image %s are not categorical.', trainingData.imageFilename(i));
% end
%end
% Verify boxes and labels are cell arrays
%if ~iscell(trainingData.boxes) || ~iscell(trainingData.labels)
% error('The boxes and labels columns must be cell arrays.');
%end
%% Step 3: Prepare R-CNN Training Pipeline
% Step 1: Extract Unique Classes
% Flatten the nested cell arrays
%flatLabels = cellfun(@(x) x{:}, trainingData.labels, 'UniformOutput', false);
% Concatenate all labels into a single categorical array
%labels = cat(1, flatLabels{:});
% Get unique classes
%uniqueClasses = unique(labels);
%numClasses = numel(uniqueClasses); % Number of unique classes
uniqueClasses=classNames;
numClasses = numel(uniqueClasses);
%disp(['Number of classes in trainData: ', num2str(numClasses)]);
% Add "background" class if not already present
if ~any(strcmp(uniqueClasses, "Background"))
uniqueClasses = [uniqueClasses; "Background"];
numClasses = numClasses + 1; % Increment numClasses to account for background
end
uniqueClasses
numClasses
% Load pre-trained ResNet-50
net = resnet50; % Use the pre-trained ResNet-50 model
% Create a layerGraph from the DAGNetwork
lgraph = layerGraph(net);
% Remove unnecessary layers (e.g., 'fc1000' and 'ClassificationLayer_fc1000')
lgraph = removeLayers(lgraph, {'fc1000', 'ClassificationLayer_fc1000', 'fc1000_softmax'});
% Add new layers for object detection
newFCLayer = fullyConnectedLayer(numClasses, 'Name', 'rcnnFC'); % numClasses includes background
lgraph = addLayers(lgraph, newFCLayer);
clear softmaxLayer;
% Add new softmax layer
softmaxLayerObj = softmaxLayer('Name','rcnnSoftmax'); % Use the built-in function
lgraph = addLayers(lgraph, softmaxLayerObj);
% Add new classification layer with explicit class names
newClassLayer = classificationLayer('Name', 'rcnnClassification', 'Classes', uniqueClasses);
lgraph = addLayers(lgraph, newClassLayer);
% Connect the layers
lgraph = connectLayers(lgraph, 'avg_pool', 'rcnnFC');
lgraph = connectLayers(lgraph, 'rcnnFC', 'rcnnSoftmax');
lgraph = connectLayers(lgraph, 'rcnnSoftmax', 'rcnnClassification');
% Verify the updated network
disp(lgraph.Layers);
analyzeNetwork(lgraph);
if(0) % the following code is removed....
%% Step 3: Visualize Bounding Boxes
% Visualize a few training samples
figure;
numSamples = 4; % Number of samples to visualize
numRows = size(trainingData, 1); % Get number of rows
idx = randperm(numRows, min(numSamples, numRows)); % Ensure it doesn't exceed available rows
for i = 1:numSamples
subplot(2, 2, i);
% Read the image
img = imread(trainingData.imageFilename(i));
% Get bounding boxes and labels for the current image
if isempty(trainingData.boxes{i}) || isempty(trainingData.labels{i})
error('Bounding boxes or labels for image %s are missing.', trainingData.imageFilename(i));
end
bbox = trainingData.boxes{i}{1}; % Extract bounding boxes
label = trainingData.labels{i}{1}; % Extract labels
% Display the image
imshow(img);
hold on;
% Draw bounding boxes and labels
for j = 1:size(bbox, 1)
rectangle('Position', bbox(j, :), 'EdgeColor', 'r', 'LineWidth', 2);
text(bbox(j, 1), bbox(j, 2) - 10, string(label(j)), ...
'Color', 'red', 'FontSize', 12, 'BackgroundColor', 'white', 'EdgeColor', 'black');
end
hold off;
end
end % of if (0)
%====================================================================
%=======================================================================
% Define training options
options = trainingOptions('sgdm', ...
'MiniBatchSize', 32, ...
'InitialLearnRate', 1e-4, ...
'MaxEpochs', 10, ...
'Verbose', true);
%% Step 5: Train the R-CNN Object Detector
% Train the R-CNN detector
detector = trainRCNNObjectDetector(trainingData, lgraph, options, ...
'NegativeOverlapRange', [0 0.3], ...
'PositiveOverlapRange', [0.6 1]);
disp('? Training Complete! Model successfully trained.');
%% Step 5: Test the Trained Detector
% Test the detector on a sample image
testImage = imread(testingData.imageFilename{1});
[bboxes, scores, labels] = detect(detector, testImage);
if isempty(bboxes)
warning('No objects detected in the test image.');
else
% Proceed with visualization
end
% Display the results
figure;
imshow(testImage);
hold on;
for i = 1:size(bboxes, 1)
rectangle('Position', bboxes(i, :), 'EdgeColor', 'r', 'LineWidth', 2);
text(bboxes(i, 1), bboxes(i, 2) - 10, string(labels(i)), ...
'Color', 'red', 'FontSize', 12, 'BackgroundColor', 'white', 'EdgeColor', 'black');
end
hold off;

Answers (1)

Not sure how your data is formatted, but typically this is done using the showShape function.

4 Comments

data is formatted in Darknet YOLO format of annotation files, and images dataset for training for Object detection. Multiple objects in an image
I have ran the above code for training, but step 3 is not executed, not sure how i can modify or i need to wait till the training is completed?
0 0.7113486842105263 0.881578947368421 0.019736842105263157 0.04111842105263158
0 0.7129934210526315 0.8453947368421053 0.019736842105263157 0.05263157894736842
0 0.6932565789473685 0.9037828947368421 0.02138157894736842 0.044407894736842105
0 0.6949013157894737 0.8733552631578947 0.02138157894736842 0.044407894736842105
0 0.696546052631579 0.8421052631578947 0.02138157894736842 0.044407894736842105
0 0.6735197368421053 0.9276315789473685 0.02138157894736842 0.046052631578947366
0 0.6751644736842105 0.897203947368421 0.023026315789473683 0.047697368421052634
0 0.6768092105263158 0.8651315789473685 0.023026315789473683 0.046052631578947366
0 0.6521381578947368 0.9539473684210527 0.024671052631578948 0.049342105263157895
0 0.6546052631578947 0.9226973684210527 0.024671052631578948 0.049342105263157895
0 0.6554276315789473 0.8898026315789473 0.024671052631578948 0.049342105263157895
0 0.6373355263157895 0.977796052631579 0.008223684210526315 0.044407894736842105
0 0.6274671052631579 0.946546052631579 0.03453947368421053 0.047697368421052634
0 0.6291118421052632 0.9161184210526315 0.03289473684210526 0.05263157894736842
0 0.7730263157894737 0.6381578947368421 0.014802631578947368 0.03289473684210526
0 0.774671052631579 0.609375 0.01644736842105263 0.03289473684210526
0 0.7919407894736842 0.649671052631579 0.027960526315789474 0.039473684210526314
0 0.7952302631578947 0.6200657894736842 0.027960526315789474 0.04111842105263158
0 0.796875 0.59375 0.027960526315789474 0.03453947368421053
0 0.8009868421052632 0.571546052631579 0.023026315789473683 0.023026315789473683
0 0.7549342105263158 0.6949013157894737 0.019736842105263157 0.03782894736842105
0 0.7574013157894737 0.665296052631579 0.019736842105263157 0.03782894736842105
0 0.759046052631579 0.6299342105263158 0.019736842105263157 0.047697368421052634
0 0.7393092105263158 0.7113486842105263 0.019736842105263157 0.039473684210526314
0 0.7417763157894737 0.680921052631579 0.019736842105263157 0.039473684210526314
0 0.7393092105263158 0.6480263157894737 0.027960526315789474 0.039473684210526314
0 0.740953947368421 0.6233552631578947 0.02138157894736842 0.024671052631578948
0 0.7384868421052632 0.3815789473684211 0.024671052631578948 0.044407894736842105
0 0.740953947368421 0.3412828947368421 0.02631578947368421 0.044407894736842105
0 0.7425986842105263 0.30180921052631576 0.02631578947368421 0.044407894736842105
0 0.740953947368421 0.2582236842105263 0.03453947368421053 0.046052631578947366
0 0.7179276315789473 0.38898026315789475 0.02631578947368421 0.047697368421052634
0 0.7203947368421053 0.3470394736842105 0.027960526315789474 0.047697368421052634
0 0.7220394736842105 0.3059210526315789 0.027960526315789474 0.047697368421052634
0 0.7195723684210527 0.26151315789473684 0.03618421052631579 0.049342105263157895
0 0.696546052631579 0.3980263157894737 0.027960526315789474 0.049342105263157895
0 0.6981907894736842 0.35444078947368424 0.027960526315789474 0.049342105263157895
0 0.6998355263157895 0.3100328947368421 0.027960526315789474 0.049342105263157895
0 0.6973684210526315 0.26480263157894735 0.03618421052631579 0.049342105263157895
0 0.6726973684210527 0.40625 0.027960526315789474 0.05263157894736842
0 0.6735197368421053 0.3618421052631579 0.029605263157894735 0.05098684210526316
0 0.6751644736842105 0.3157894736842105 0.03125 0.049342105263157895
0 0.6726973684210527 0.2680921052631579 0.039473684210526314 0.05263157894736842
0 0.6455592105263158 0.4161184210526316 0.03453947368421053 0.05263157894736842
0 0.6480263157894737 0.3684210526315789 0.03289473684210526 0.05263157894736842
0 0.6480263157894737 0.3223684210526316 0.03453947368421053 0.054276315789473686
0 0.6504934210526315 0.2722039473684211 0.03453947368421053 0.054276315789473686
1 0.3100328947368421 0.9802631578947368 0.07894736842105263 0.039473684210526314 for example, these are annotations for one such image
Consider saving your variables to a mat file and attaching that to your post using the paperclip icon.
It would be helpful to have at least one image and its corresponding bounding boxes and labels.
I ended up finding a Darknet example data set to use here.
Of note
  • Darknet annotations are stored with the format class_id center_x center_y width height
  • Darknet annotations are normalized to be between [0,1]. You need to convert to pixels.
  • showShape expects a 5th column for yaw when defining rectangle location using xctr and yctr
Here's how I would do it using the example I found online.
unzip('air4.zip')
% Load image
img = imread('air4.jpg');
% Load classnames
classNames = readlines('obj.names');
% Load annotations
obj = readmatrix('air4.txt');
% extract labels
labelID = obj(:,1)+1;
% Scale position coordinates using image dimensions
bboxes = obj(:,2:5).*[size(img,[2 1 2 1])];
% Add column for yaw so showShape knows position is defined as (xctr,yctr)
bboxes(:,5)=0;
% Display labeled image
imshow(img)
showShape("rectangle",bboxes,Label=classNames(labelID))

Sign in to comment.

Asked:

on 21 Mar 2025

Commented:

on 24 Mar 2025

Community Treasure Hunt

Find the treasures in MATLAB Central and discover how the community can help you!

Start Hunting!