From de1d4d6f9a2a60fe940e38b24689b6a9f589f353 Mon Sep 17 00:00:00 2001 From: Alex Phillips Date: Wed, 14 Sep 2022 16:42:14 +0100 Subject: [PATCH 1/2] Make trees immutable --- discretesampling/domain/decision_tree/tree.py | 102 ++++++++++-------- 1 file changed, 59 insertions(+), 43 deletions(-) diff --git a/discretesampling/domain/decision_tree/tree.py b/discretesampling/domain/decision_tree/tree.py index f718f2f..ab882a7 100644 --- a/discretesampling/domain/decision_tree/tree.py +++ b/discretesampling/domain/decision_tree/tree.py @@ -7,16 +7,26 @@ class Tree(types.DiscreteVariable): def __init__(self, X_train, y_train, tree, leafs): self.X_train = X_train + self.X_train.flags.writeable = False self.y_train = y_train - self.tree = tree - self.leafs = leafs - self.lastAction = "" + self.y_train.flags.writeable = False + self.tree = tuple(tree) + self.leafs = tuple(leafs) + def __eq__(self, x) -> bool: return (x.X_train == self.X_train).all() and\ (x.y_train == self.y_train).all() and\ x.tree == self.tree and x.leafs == self.leafs + def __hash__(self) -> int: + return hash(( + self.X_train.tostring(), + self.y_train.tostring(), + self.tree, + self.leafs + )) + def __str__(self): return str(self.tree) @@ -30,14 +40,16 @@ def getTargetType(self): def grow(self): action = "grow" - self.lastAction = action ''' grow tree by just simply creating the individual nodes. each node holds their node index, the left and right leaf index, the node feature and threshold ''' random_index = RandomInt(0, len(self.leafs)-1).eval() - leaf_to_grow = self.leafs[random_index] + new_tree = list(list(x) for x in self.tree) + new_leafs = list(self.leafs) + + leaf_to_grow = new_leafs[random_index] # generating a random faeture feature = RandomInt(0, len(self.X_train[0])-1).eval() @@ -45,69 +57,71 @@ def grow(self): threshold = RandomInt(0, len(self.X_train)-1).eval() threshold = (self.X_train[threshold, feature]) - node = [leaf_to_grow, max(self.leafs)+1, max(self.leafs)+2, feature, + node = [leaf_to_grow, max(new_leafs)+1, max(new_leafs)+2, feature, threshold] # add the new leafs on the leafs array - self.leafs.append(max(self.leafs)+1) - self.leafs.append(max(self.leafs)+1) + new_leafs.append(max(new_leafs)+1) + new_leafs.append(max(new_leafs)+1) # delete from leafs the new node - self.leafs.remove(leaf_to_grow) - self.tree.append(node) + new_leafs.remove(leaf_to_grow) + new_tree.append(node) - return self + return Tree(self.X_train, self.y_train, new_tree, new_leafs) def prune(self): action = "prune" - self.lastAction = action ''' For example when we have nodes 0,1,2 and leafs 3,4,5,6 when we prune we take the leafs 6 and 5 out, and the node 2, now becomes a leaf. ''' + + new_leafs = list(self.leafs) + new_tree = list(list(x) for x in self.tree) + random_index = RandomInt(0, len(self.tree)-1).eval() - node_to_prune = self.tree[random_index] + node_to_prune = new_tree[random_index] while random_index == 0: random_index = RandomInt(0, len(self.tree)-1).eval() - node_to_prune = self.tree[random_index] + node_to_prune = new_tree[random_index] - if (node_to_prune[1] in self.leafs) and\ - (node_to_prune[2] in self.leafs): + if (node_to_prune[1] in new_leafs) and\ + (node_to_prune[2] in new_leafs): # remove the pruned leafs from leafs list and add the node as a # leaf - self.leafs.append(node_to_prune[0]) - self.leafs.remove(node_to_prune[1]) - self.leafs.remove(node_to_prune[2]) + new_leafs.append(node_to_prune[0]) + new_leafs.remove(node_to_prune[1]) + new_leafs.remove(node_to_prune[2]) # delete the specific node from the node lists - del self.tree[random_index] + del new_tree[random_index] else: - delete_node_indices = [] i = 0 - for node in self.tree: + for node in new_tree: if node_to_prune[1] == node[0] or node_to_prune[2] == node[0]: delete_node_indices.append(node) i += 1 - self.tree.remove(node_to_prune) + new_tree.remove(node_to_prune) for node in delete_node_indices: - self.tree.remove(node) + new_tree.remove(node) - for i in range(len(self.tree)): - for p in range(1, len(self.tree)): + for i in range(len(new_tree)): + for p in range(1, len(new_tree)): count = 0 - for k in range(len(self.tree)-1): - if self.tree[p][0] == self.tree[k][1] or\ - self.tree[p][0] == self.tree[k][2]: + for k in range(len(new_tree)-1): + if new_tree[p][0] == new_tree[k][1] or\ + new_tree[p][0] == new_tree[k][2]: count = 1 if count == 0: - self.tree.remove(self.tree[p]) + new_tree.remove(new_tree[p]) break new_leafs = [] - for node in self.tree: + for node in new_tree: count1 = 0 count2 = 0 - for check_node in self.tree: + for check_node in new_tree: if node[1] == check_node[0]: count1 = 1 if node[2] == check_node[0]: @@ -119,12 +133,10 @@ def prune(self): if count2 == 0: new_leafs.append(node[2]) - self.leafs[:] = new_leafs[:] - return self + return Tree(self.X_train, self.y_train, new_tree, new_leafs) def change(self): action = "change" - self.lastAction = action ''' we need to choose a new feature at first we then need to choose a new threshold base on the feature we have @@ -132,29 +144,33 @@ def change(self): thresholds ''' random_index = RandomInt(0, len(self.tree)-1).eval() - node_to_change = self.tree[random_index] + new_tree = list(list(x) for x in self.tree) + new_leafs = list(self.leafs) + node_to_change = new_tree[random_index] new_feature = RandomInt(0, len(self.X_train[0])-1).eval() new_threshold = RandomInt(0, len(self.X_train)-1).eval() + node_to_change[3] = new_feature node_to_change[4] = self.X_train[new_threshold, new_feature] - return self + return Tree(self.X_train, self.y_train, new_tree, new_leafs) def swap(self): action = "swap" - self.lastAction = action ''' need to swap the features and the threshold among the 2 nodes ''' random_index_1 = RandomInt(0, len(self.tree)-1).eval() random_index_2 = RandomInt(0, len(self.tree)-1).eval() - node_to_swap1 = self.tree[random_index_1] - node_to_swap2 = self.tree[random_index_2] + new_tree = list(list(x) for x in self.tree) + new_leafs = list(self.leafs) + node_to_swap1 = new_tree[random_index_1] + node_to_swap2 = new_tree[random_index_2] # in case we choose the same node while node_to_swap1 == node_to_swap2: - random_index_2 = RandomInt(0, len(self.tree)-1).eval() - node_to_swap2 = self.tree[random_index_2] + random_index_2 = RandomInt(0, len(new_tree)-1).eval() + node_to_swap2 = new_tree[random_index_2] temporary_feature = node_to_swap1[3] temporary_threshold = node_to_swap1[4] @@ -165,4 +181,4 @@ def swap(self): node_to_swap2[3] = temporary_feature node_to_swap2[4] = temporary_threshold - return self + return Tree(self.X_train, self.y_train, new_tree, new_leafs) From 42519b9e19bb8a41c1c306c03a92df9dbb5e5218 Mon Sep 17 00:00:00 2001 From: Alex Phillips Date: Wed, 14 Sep 2022 16:57:33 +0100 Subject: [PATCH 2/2] Fix tree init --- discretesampling/domain/decision_tree/tree.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/discretesampling/domain/decision_tree/tree.py b/discretesampling/domain/decision_tree/tree.py index ab882a7..470b392 100644 --- a/discretesampling/domain/decision_tree/tree.py +++ b/discretesampling/domain/decision_tree/tree.py @@ -10,7 +10,7 @@ def __init__(self, X_train, y_train, tree, leafs): self.X_train.flags.writeable = False self.y_train = y_train self.y_train.flags.writeable = False - self.tree = tuple(tree) + self.tree = tuple(tuple(x) for x in tree) self.leafs = tuple(leafs)