Machine Learning Project

Thumbnail of Resuce Ranger Fan Club Webpage

Slide 1

Slide 2

Slide 3

Slide 4

Slide 5

Slide 6

Slide 7

Slide 8

Slide 9

Slide 10

Slide 11

Slide 12

Decision Tree

This was my part of the project and the code can be seen below:
# -*- coding: utf-8 -*-
"""
@author: Daniel Ruiz
"""
import MTDecisionTreeP3 as dt
import numpy as np

# start the main
def main():
    
    namesData = np.genfromtxt('DTNames.csv', delimiter=',', dtype = 'str')
    rawTrain = np.genfromtxt('DT85Train.csv', delimiter=',', dtype = 'str')
    myTest = np.genfromtxt('DT15Test.csv', delimiter=',', dtype = 'str')
    
    root = dt.DecTreeNode()
    # optional - initialize default class to use in case of tie or no examples
    
    # divide the training data into the data and the class
    myTrainClass = rawTrain[:,np.shape(rawTrain)[1]-1]
    myTrain = rawTrain[:,:np.shape(rawTrain)[1]-1]
    
    # divide the testing data into the data and the class
    myTestClass = myTest[:,np.shape(myTest)[1]-1]
    myTestData = myTest[:,:np.shape(myTest)[1]-1]
    
    nameRows = np.shape(namesData)[0]
    testRows = np.shape(myTest)[0]
    
    # Can hard code best if tie (out) or use value from myNames[numberColumns-1,3]
    bestValue = dt.findBest(myTrainClass, namesData[nameRows-1,3:], namesData[nameRows-1,3])
    # bestValue = dt.findBest(myTrainClass, namesData[nameRows-1,3:], "out")

    #  Choose one depending on what you did above
    root.create(myTrain, myTrainClass, namesData, bestValue)
    # root.create(myTrain, myTrainClass, namesData, "home run")
    
    # uncomment to print tree
    # root.print(0)
    
    correctness = 0

    for i in range(testRows):
        
        treeClass = root.classify(myTestData[i,:])
        # uncomment to show results for each line
        # print(i, treeClass)
        
        if myTestClass[i] == treeClass:
            correctness = correctness + 1
        
    print("Correctness: ", round((correctness/testRows)*100, 4))

if __name__ == "__main__":
    main()
Output:
Correctness: 62.5
Here is MTDecisionTreeP3:
# -*- coding: utf-8 -*-
"""
@author: Professor Michael Thompson
"""
import numpy as np
import math

def findBest(classes, classNames, best):
    numVals = len(classNames)
    count = np.zeros(numVals)
    for i in range(numVals):
        count[i] = np.shape(np.where(classes == classNames[i]))[1]
        
    bestIndex = 0
    for i in range(1, numVals):
        if count[i] == count[bestIndex] and classNames[i] == best:
            bestIndex = i
        elif count[i] > count[bestIndex]:
            bestIndex = i
            
    return classNames[bestIndex]

def entropy(data, classNames):  
    mySum = 0
    total = np.shape(data)[0]
    for cVal in classNames:
        ind = np.where(data == cVal)
        num = np.shape(ind)[1]
        if num > 0:
            mySum -= (num/total)*math.log2(num/total)
    return mySum
    
    

def gain(data, classes, names, index):
    numAttr = np.shape(names)[0]-1
    gVal = 0
    cls = names[numAttr, 3:]
    ent = entropy(classes, cls)
    total = np.shape(data)[0]
    for i in range(int(names[index,2])):
        ind = np.where(data[:,index] == names[index, i+3])
        num = np.shape(ind)[1]
        if num > 0:
            redClasses = classes[ind]
            gVal += (num/total)*entropy(redClasses, cls)
        
    
    return ent - gVal


class DecTreeNode:
    def __init__(self):
        self._children = []
        self._parent = ""
        self._attribute = -1
        self._attrName = ""
        self._value = ""
        self._class = ""
        
    def getValue(self):
        return self._value
    
    def setValue(self, v):
        self._value = v
        
    def classify(self, example):
        if self._class != "":
            return self._class
        else:
            for child in self._children:
                if example[self._attribute] == child.getValue():
                    example = np.delete(example, self._attribute, axis=0)
                    return child.classify(example)
                
        print("OOPS!!", example)
        
    def print(self, offset):
        if self._class != "":
            print(" "*offset, self._class)
    # changed from else to elif; canalso increase number to show more of tree
        elif offset < 15:
            print(" "*offset, self._attrName.upper()+"?")
            for child in self._children:
                print(" "*(offset+3), "="+child.getValue())
                child.print(offset+6)
                
    def create(self, data, classes, names, best):

        numAttr = np.shape(names)[0]-1
       
        vals = np.unique(classes)
        if len(vals) == 0:
            self._class = best
            return
        elif len(vals) == 1:
            self._class = vals[0]
            return
        # stops this many values from the bottom
        # uses the best two attributes when == 4
        elif numAttr==4:
            self._class = findBest(classes, names[numAttr,3:], best)
            return
        else:
        
            # Find attribute with best gain
            bestGain = gain(data, classes, names, 0)
            bestAttr = 0
            for i in range(1, numAttr):
                thisGain = gain(data, classes, names, 1)
                if thisGain > bestGain:
                    bestGain = thisGain
                    bestAttr = i
            
            self._attribute = bestAttr
            self._attrName = names[bestAttr,0]
            
            # create child for each value
            for i in range(int(names[bestAttr, 2])):
                x = DecTreeNode()
                newInd = np.where(data[:,bestAttr] == names[bestAttr, 3+i])
                newData = data[newInd,:][0]
                newClasses = classes[newInd]
                newData = np.delete(newData, bestAttr, axis=1)
                newNames = np.delete(names, bestAttr, axis=0)
                newBest = findBest(newClasses, newNames[numAttr-1,3:], best)
                               
                x.create(newData, newClasses, newNames, newBest)
                x.setValue(names[bestAttr, 3+i])
                
                
                self._children.append(x)