Understanding Random Forest & Naïve Bayes (Classifier) | by Alvin Octa Hidayathullah

## Imports ##
import numpy as np
import pandas as pd
import seaborn as sn
import matplotlib.pyplot as plt
from sklearn.base import clone
from abc import ABC, abstractmethod
from decisiontrees import DecisionTreeClassifier, DecisionTreeRegressor

## Base class for the random forest algorithm ##
class RandomForest(ABC):
”’
Summary base class for Random Forest implementation.

Attributes:
n_trees: int, variety of timber within the forest.
timber: record, shops the educated determination timber.
”’

def __init__(self, n_trees=100):
”’
Initializes the RandomForest with the given variety of timber.

n_trees: int, variety of timber to be created within the forest.
”’
self.n_trees = n_trees
self.timber = []

def __make_bootstraps(self, knowledge):
”’
Creates bootstrap samples from the given dataset.

knowledge: NumPy array, dataset to generate bootstrap samples from.

Returns:
dict: containing bootstrap samples and their out-of-bag samples.
”’
dc = {}
unip = 0
b_size = knowledge.form[0]
idx = [i for i in range(b_size)]

for b in vary(self.n_trees):
sidx = np.random.alternative(idx, exchange=True, dimension=b_size)
b_samp = knowledge[sidx, :]
unip += len(set(sidx))
oidx = record(set(idx) – set(sidx))
o_samp = np.array([])
if oidx:
o_samp = knowledge[oidx, :]
dc[‘boot_’ + str(b)] = {‘boot’: b_samp, ‘take a look at’: o_samp}

return dc

def get_params(self, deep=False):
”’
Returns the mannequin parameters.
”’
return {‘n_trees’: self.n_trees}

@abstractmethod
def _make_tree_model(self):
”’
Summary technique to create the suitable determination tree mannequin.
”’
go

def _train(self, X_train, y_train):
”’
Trains the ensemble by becoming determination timber to bootstrap samples.

X_train: NumPy array, function matrix.
y_train: NumPy array, goal values.

Returns:
dict: out-of-bag samples for error estimation.
”’
training_data = np.concatenate((X_train, y_train.reshape(-1, 1)), axis=1)
dcBoot = self.__make_bootstraps(training_data)
tree_m = self._make_tree_model()
dcOob = {}

for b in dcBoot:
mannequin = clone(tree_m)
mannequin.match(dcBoot[b][‘boot’][:, :-1], dcBoot[b][‘boot’][:, -1].reshape(-1, 1))
self.timber.append(mannequin)
dcOob[b] = dcBoot[b][‘test’] if dcBoot[b][‘test’].dimension else np.array([])

return dcOob

def _predict(self, X):
”’
Makes predictions utilizing the educated ensemble.

X: NumPy array, function matrix for prediction.

Returns:
NumPy array: averaged predictions from all timber.
”’
if not self.timber:
print(‘It’s essential to practice the ensemble earlier than making predictions!’)
return None

predictions = [m.predict(X).reshape(-1, 1) for m in self.trees]
ypred = np.imply(np.concatenate(predictions, axis=1), axis=1)
return ypred

## Random Forest Classifier ##
class RandomForestClassifierCustom(RandomForest):
”’
Customized implementation of a Random Forest Classifier.

Attributes:
n_trees: int, variety of timber within the forest.
max_depth: int, most depth of the timber.
min_samples_split: int, minimal samples required to separate a node.
criterion: str, criterion for splitting (‘gini’ or ‘entropy’).
class_weights: str, technique to stability class weights.
”’

def __init__(self, n_trees=100, max_depth=None, min_samples_split=2, criterion=’gini’, class_weights=’balanced’):
”’
Initializes the classifier with the given parameters.
”’
tremendous().__init__(n_trees)
self.max_depth = max_depth
self.min_samples_split = min_samples_split
self.criterion = criterion
self.class_weights = class_weights

def _make_tree_model(self):
”’
Creates and returns a DecisionTreeClassifier mannequin with given parameters.
”’
return DecisionTreeClassifier(
max_depth=self.max_depth,
min_samples_split=self.min_samples_split,
criterion=self.criterion,
class_weight=self.class_weights)

def get_params(self, deep=False):
”’
Returns the mannequin parameters.
”’
return {‘n_trees’: self.n_trees,
‘max_depth’: self.max_depth,
‘min_samples_split’: self.min_samples_split,
‘criterion’: self.criterion,
‘class_weights’: self.class_weights}

def match(self, X_train, y_train, print_metrics=False):
”’
Trains the ensemble and optionally prints customary error metrics.
”’
dcOob = self._train(X_train, y_train)

if print_metrics:
accs, pres, recs = np.array([]), np.array([]), np.array([])
for b, m in zip(dcOob, self.timber):
if dcOob[b].dimension:
yp = m.predict(dcOob[b][:, :-1])
acc = accuracy_score(dcOob[b][:, -1], yp)
pre = precision_score(dcOob[b][:, -1], yp, common=’weighted’)
rec = recall_score(dcOob[b][:, -1], yp, common=’weighted’)
accs, pres, recs = np.append(accs, acc), np.append(pres, pre), np.append(recs, rec)

print(“Customary error in accuracy: %.2f” % np.std(accs))
print(“Customary error in precision: %.2f” % np.std(pres))
print(“Customary error in recall: %.2f” % np.std(recs))

def predict(self, X):
”’
Predicts class labels utilizing the educated ensemble.
”’
return np.spherical(self._predict(X)).astype(int)

Source link

Makine Öğrenmesi Eğlencelidir! Bölüm 4: Derin Öğrenme ile Modern Yüz Tanıma | by Hasan Damirli | Mar, 2025

Newton’s Method in Focus: How a Machine Learning Lesson Sparked AI Crypto Market Shifts on March 13, 2025 | by ButerinBard | Mar, 2025

Explainable AI with SHAP: Making AI Decisions Transparent | by HIYA CHATTERJEE | Mar, 2025

‘Don’t Work at Anduril’ Recruitment Campaign Goes Viral

How LLMs Work: Reinforcement Learning, RLHF, DeepSeek R1, OpenAI o1, AlphaGo

Are Data Scientists at Risk in 2025? | by Natassha Selvaraj | Feb, 2025

Changing the World via AI … with Five Kids in Tow

I’m Extremely Competitive — Here’s How I Keep It from Becoming a Problem

Most Popular

Rethinking Software Development: What AI Assistance Means for Dev Teams | by Todd Schilling | Feb, 2025

5 ways having a financial plan can give you peace of mind

These 3 Questions Are Plaguing Small Business Owners in 2025 — and Here Are the Answers to Them

Our Picks

Zero Human Code: What I Learned from Forcing AI to Build (and Fix) Its Own Code for 27 Straight Days

The Cost of Everything is Going Up, But Sam’s Club Membership is 60% Off

AI and Crypto Security: Protecting Digital Assets with Advanced Technology

Understanding Random Forest & Naïve Bayes (Classifier) | by Alvin Octa Hidayathullah | Feb, 2025

Related Posts