Id3 code: I don't understand the error: It says undefined function dtree(trainData,[],0) for input args of type double. The 1st arg is the data, the 2nd is a list that contains a string for displaying the tree, the 3rd is the dominant label
2 views (last 30 days)
Show older comments
load iris.dat
%discretizeData
discretizedData = zeros(150,5);
for i = 1:size(iris,2)
currentColumn = iris(:,i);
bins = length(currentColumn)/4;
discretizedData(:,i) = round(currentColumn/bins)+1;
if(i == 5)
for j = 1: size(iris,1)
if( iris(j,5) == 2 || iris(j,5) == 3)
discretizedData(j,5) = -1;
end
end
end
end
%generate test and train data
posIndex = randperm(50,25);
negIndex = randperm(150,50);
trainData = zeros(75,5);
trainData(1:25,:) = discretizedData(posIndex,:);
trainData(26:75,:) = discretizedData(negIndex,:);
list1 = [];
list2 = [];
%dominant label function
function dm = dominantLabel(trainingData)
posIndices = find(trainingData(:,5)==1);
negIndices = find(trainingData(:,5)==-1);
posCount = size(posIndices,1);
negCount = size(negIndices,1);
if posCount > negCount
dm = 1;
else
dm = -1;
end
end
%entropy function
function en = entropy(tExamples,fExamples)
posCount = size(tExamples,1);
negCount = size(fExamples,1);
pPos = posCount/(posCount+negCount);
pNeg = 1 - pPos;
en = -1*(pPos*log(pPos) + pNeg*log(pNeg));
end
%start of the decision tree algorithm
function e = dtree(trainData,list,dominant)
if size(trainData,1) == 0
list(length(list)+1) = strcat('=>',num2str(dominant));
disp(list) %print the rule at the leaf
e = 1;
end
dominant = dominantLabel(trainData);
nFeatures = size(trainData,2)-1;
levelEntropy = 999;
for i = 1:nFeatures
uniqueValues = unique(trainData(:,i));
nUnique = length(uniqueValues);
for u = 1:nUnique
tposExample = trainData((find(trainData(:,i) == uniqueValues(u) & trainData(:,5) == 1)),:);
tnegExample = trainData((find(trainData(:,i) == uniqueValues(u) & trainData(:,5) == -1)),:);
fposExample = trainData((find(trainData(:,i) ~= uniqueValues(u) & trainData(:,5) == 1)),:);
fnegExample = trainData((find(trainData(:,i) ~= uniqueValues(u) & trainData(:,5) == -1)),:);
tEntropy = entropy(tposExample,tnegExample);
fEntropy = entropy(fposExample,fnegExample);
thisLevelEntropy = (size(tposExample,1)+size(tnegExample,1))*tEntropy + (size(fposExample,1)+size(fnegExample,1))*fEntropy;
if thisLevelEntropy < levelEntropy
levelEntropy = thisLevelEntropy;
bestFeature1 = strcat('f',num2str(i),'=',num2str(u)+'^');
bestFeature2 = strcat('!f',num2str(i),'=',num2str(u)+'^');
end
end
end
end
list1(length(list)+1) = bestFeature1;
list2(length(list)+1) = bestFeature2
dtree(vertcat(tposExample,tnegExample),list1,dominant);
dtree(vertcat(fposExample,fnegExample),list2,dominant);
end
0 Comments
Answers (1)
Walter Roberson
on 26 Sep 2016
If you are using R2016b or later, you can save all of those together in one .m file whose name is not dtree.m or entropy.m or dominantLabel.m
If you are using an earlier release, you need to save all of the parts to different files, with the script saved to some .m that is not named dtree.m or entropy.m or dominantLabel.m and with each of the functions saved to a .m file that has the same name as the function. Or you could save the three functions into one file named dtree.m but you would need to move the dtree function to be the first function in that file. (The other two functions are only called by dtree, not by your script, so they do not need to be in their own .m files, provided that they are put at the end of the dtree.m file.)
0 Comments
See Also
Categories
Find more on Search Path in Help Center and File Exchange
Community Treasure Hunt
Find the treasures in MATLAB Central and discover how the community can help you!
Start Hunting!