import numpy as np
import pandas as pd
import seaborn as sn
import matplotlib.pyplot as plt
from sklearn.base import clone
from abc import ABC, abstractmethod
from decisiontrees import DecisionTreeClassifier, DecisionTreeRegressor
## Base class for the random forest algorithm ##
class RandomForest(ABC):
”’
Summary base class for Random Forest implementation.
Attributes:
n_trees: int, variety of timber within the forest.
timber: record, shops the educated determination timber.
”’
def __init__(self, n_trees=100):
”’
Initializes the RandomForest with the given variety of timber.
n_trees: int, variety of timber to be created within the forest.
”’
self.n_trees = n_trees
self.timber = []
def __make_bootstraps(self, knowledge):
”’
Creates bootstrap samples from the given dataset.
knowledge: NumPy array, dataset to generate bootstrap samples from.
Returns:
dict: containing bootstrap samples and their out-of-bag samples.
”’
dc = {}
unip = 0
b_size = knowledge.form[0]
idx = [i for i in range(b_size)]
for b in vary(self.n_trees):
sidx = np.random.alternative(idx, exchange=True, dimension=b_size)
b_samp = knowledge[sidx, :]
unip += len(set(sidx))
oidx = record(set(idx) – set(sidx))
o_samp = np.array([])
if oidx:
o_samp = knowledge[oidx, :]
dc[‘boot_’ + str(b)] = {‘boot’: b_samp, ‘take a look at’: o_samp}
return dc
def get_params(self, deep=False):
”’
Returns the mannequin parameters.
”’
return {‘n_trees’: self.n_trees}
@abstractmethod
def _make_tree_model(self):
”’
Summary technique to create the suitable determination tree mannequin.
”’
go
def _train(self, X_train, y_train):
”’
Trains the ensemble by becoming determination timber to bootstrap samples.
X_train: NumPy array, function matrix.
y_train: NumPy array, goal values.
Returns:
dict: out-of-bag samples for error estimation.
”’
training_data = np.concatenate((X_train, y_train.reshape(-1, 1)), axis=1)
dcBoot = self.__make_bootstraps(training_data)
tree_m = self._make_tree_model()
dcOob = {}
for b in dcBoot:
mannequin = clone(tree_m)
mannequin.match(dcBoot[b][‘boot’][:, :-1], dcBoot[b][‘boot’][:, -1].reshape(-1, 1))
self.timber.append(mannequin)
dcOob[b] = dcBoot[b][‘test’] if dcBoot[b][‘test’].dimension else np.array([])
return dcOob
def _predict(self, X):
”’
Makes predictions utilizing the educated ensemble.
X: NumPy array, function matrix for prediction.
Returns:
NumPy array: averaged predictions from all timber.
”’
if not self.timber:
print(‘It’s essential to practice the ensemble earlier than making predictions!’)
return None
predictions = [m.predict(X).reshape(-1, 1) for m in self.trees]
ypred = np.imply(np.concatenate(predictions, axis=1), axis=1)
return ypred
## Random Forest Classifier ##
class RandomForestClassifierCustom(RandomForest):
”’
Customized implementation of a Random Forest Classifier.
Attributes:
n_trees: int, variety of timber within the forest.
max_depth: int, most depth of the timber.
min_samples_split: int, minimal samples required to separate a node.
criterion: str, criterion for splitting (‘gini’ or ‘entropy’).
class_weights: str, technique to stability class weights.
”’
def __init__(self, n_trees=100, max_depth=None, min_samples_split=2, criterion=’gini’, class_weights=’balanced’):
”’
Initializes the classifier with the given parameters.
”’
tremendous().__init__(n_trees)
self.max_depth = max_depth
self.min_samples_split = min_samples_split
self.criterion = criterion
self.class_weights = class_weights
def _make_tree_model(self):
”’
Creates and returns a DecisionTreeClassifier mannequin with given parameters.
”’
return DecisionTreeClassifier(
max_depth=self.max_depth,
min_samples_split=self.min_samples_split,
criterion=self.criterion,
class_weight=self.class_weights)
def get_params(self, deep=False):
”’
Returns the mannequin parameters.
”’
return {‘n_trees’: self.n_trees,
‘max_depth’: self.max_depth,
‘min_samples_split’: self.min_samples_split,
‘criterion’: self.criterion,
‘class_weights’: self.class_weights}
def match(self, X_train, y_train, print_metrics=False):
”’
Trains the ensemble and optionally prints customary error metrics.
”’
dcOob = self._train(X_train, y_train)
if print_metrics:
accs, pres, recs = np.array([]), np.array([]), np.array([])
for b, m in zip(dcOob, self.timber):
if dcOob[b].dimension:
yp = m.predict(dcOob[b][:, :-1])
acc = accuracy_score(dcOob[b][:, -1], yp)
pre = precision_score(dcOob[b][:, -1], yp, common=’weighted’)
rec = recall_score(dcOob[b][:, -1], yp, common=’weighted’)
accs, pres, recs = np.append(accs, acc), np.append(pres, pre), np.append(recs, rec)
print(“Customary error in accuracy: %.2f” % np.std(accs))
print(“Customary error in precision: %.2f” % np.std(pres))
print(“Customary error in recall: %.2f” % np.std(recs))
def predict(self, X):
”’
Predicts class labels utilizing the educated ensemble.
”’
return np.spherical(self._predict(X)).astype(int)