Statistical Inference: Your Friendly Guide to Making Sense of Data | by Timothy Kimutai

import numpy as np
from scipy import stats
import matplotlib.pyplot as plt
import pandas as pd

# Generate practical information
np.random.seed(42)

# Espresso drinkers (n=50): barely larger productiveness scores
coffee_drinkers = np.random.regular(85, 12, 50)

# Non-coffee drinkers (n=45): baseline productiveness scores
non_coffee_drinkers = np.random.regular(80, 11, 45)

# Create a DataFrame for simple evaluation
information = pd.DataFrame({
‘productiveness’: np.concatenate([coffee_drinkers, non_coffee_drinkers]),
‘group’: [‘Coffee’] * len(coffee_drinkers) + [‘No Coffee’] * len(non_coffee_drinkers)
})

print(“☕ COFFEE vs PRODUCTIVITY ANALYSIS ☕”)
print(“=” * 50)

# Descriptive statistics
coffee_stats = information[data[‘group’] == ‘Espresso’][‘productivity’]
no_coffee_stats = information[data[‘group’] == ‘No Espresso’][‘productivity’]

print(f”Espresso Drinkers (n={len(coffee_stats)}):”)
print(f” Imply: {coffee_stats.imply():.2f}”)
print(f” Std Dev: {coffee_stats.std():.2f}”)

print(f”nNon-Espresso Drinkers (n={len(no_coffee_stats)}):”)
print(f” Imply: {no_coffee_stats.imply():.2f}”)
print(f” Std Dev: {no_coffee_stats.std():.2f}”)

# Step 1: Arrange hypotheses
print(f”n📋 HYPOTHESIS TESTING”)
print(“H0: No distinction in productiveness between teams (μ₁ = μ₂)”)
print(“H1: Espresso drinkers have totally different productiveness (μ₁ ≠ μ₂)”)

# Step 2: Carry out the take a look at
t_stat, p_val = stats.ttest_ind(coffee_stats, no_coffee_stats)

print(f”n🧮 TEST RESULTS:”)
print(f”T-statistic: {t_stat:.3f}”)
print(f”P-value: {p_val:.6f}”)

# Step 3: Make resolution
alpha = 0.05
if p_val print(f”n✅ DECISION: Reject H0 (p = {p_val:.6f} print(“There IS a statistically vital distinction between teams!”)
else:
print(f”n❌ DECISION: Fail to reject H0 (p = {p_val:.6f} ≥ {alpha})”)
print(“There’s NO statistically vital distinction between teams.”)

# Step 4: Calculate confidence intervals for each teams
def calculate_ci(information, confidence=0.95):
n = len(information)
imply = np.imply(information)
std = np.std(information, ddof=1)
se = std / np.sqrt(n)

alpha = 1 – confidence
t_crit = stats.t.ppf(1 – alpha/2, n – 1)

margin_error = t_crit * se
ci_lower = imply – margin_error
ci_upper = imply + margin_error

return ci_lower, ci_upper, margin_error

# Calculate 95% CIs
coffee_ci = calculate_ci(coffee_stats)
no_coffee_ci = calculate_ci(no_coffee_stats)

print(f”n📊 95% CONFIDENCE INTERVALS:”)
print(f”Espresso Drinkers: [{coffee_ci[0]:.2f}, {coffee_ci[1]:.2f}]”)
print(f”Non-Espresso Drinkers: [{no_coffee_ci[0]:.2f}, {no_coffee_ci[1]:.2f}]”)

# Examine if intervals overlap
if coffee_ci[1] print(“🚫 Confidence intervals do not overlap – robust proof of distinction!”)
else:
print(“🤝 Confidence intervals overlap – some uncertainty concerning the distinction.”)

# Step 5: Impact measurement
effect_size = (coffee_stats.imply() – no_coffee_stats.imply()) / np.sqrt(
((len(coffee_stats) – 1) * coffee_stats.var() + (len(no_coffee_stats) – 1) * no_coffee_stats.var()) /
(len(coffee_stats) + len(no_coffee_stats) – 2)
)

print(f”n📏 EFFECT SIZE (Cohen’s d): {effect_size:.3f}”)
if abs(effect_size) effect_interp = “negligible”
elif abs(effect_size) effect_interp = “small”
elif abs(effect_size) effect_interp = “medium”
else:
effect_interp = “giant”
print(f”Interpretation: {effect_interp} impact”)

# Visualization
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(15, 6))

# Field plot
information.boxplot(column=’productiveness’, by=’group’, ax=ax1)
ax1.set_title(‘Productiveness Scores by Group’)
ax1.set_ylabel(‘Productiveness Rating’)

# Histogram with confidence intervals
ax2.hist(coffee_stats, alpha=0.7, label=’Espresso Drinkers’, bins=15, coloration=’brown’)
ax2.hist(no_coffee_stats, alpha=0.7, label=’Non-Espresso Drinkers’, bins=15, coloration=’lightblue’)

# Add confidence interval bars
ax2.errorbar(coffee_stats.imply(), 5, xerr=coffee_ci[2],
fmt=’o’, coloration=’darkred’, capsize=5, capthick=2, label=’Espresso 95% CI’)
ax2.errorbar(no_coffee_stats.imply(), 3, xerr=no_coffee_ci[2],
fmt=’s’, coloration=’darkblue’, capsize=5, capthick=2, label=’No Espresso 95% CI’)

ax2.set_xlabel(‘Productiveness Rating’)
ax2.set_ylabel(‘Frequency’)
ax2.set_title(‘Distribution of Productiveness Scores’)
ax2.legend()
ax2.grid(True, alpha=0.3)

plt.tight_layout()
plt.present()

Source link

LLMs Finally Learn to Say “I Don’t Know” — And It’s a Game-Changer | by ArXiv In-depth Analysis | Jun, 2025

Unsupervised Learning: A Simple Revision Guide | by Samriddhi Saxena | Jun, 2025

Paper Insights: Masked Autoencoders that listen | by Shanmuka Sadhu | Jun, 2025

AI in Sports: How Machine Learning is Enhancing Performance, Strategy, and Injury Prevention | by Ranjotisingh | Mar, 2025

What Legally Counts as Wrongful Termination? A Lawyer Explains

Machine Learning for Human Behavior: Building Algorithms to Understand Psychological Patterns | by Paras Khulbe | Apr, 2025

A Beginner’s Guide to Reinforcement Learning with PyTorch! | by Emrullah AYDOGAN | Apr, 2025

How Young Workers Are Creating a New Opportunity for Unions

Most Popular

What A Recession Is Like For Early Retirees: The Good and Bad

Shaquille O’Neal on Franchising, Investing, and Fighting Nerves

Honestly Uncertain | Towards Data Science

Our Picks

AI ML Courses in Hyderabad | Best Artificial Intelligence | by Kalyanvisualpath | Apr, 2025

How Entrepreneurs Can Stay Ahead in the Age of Instant News

🐛 The Problem I Encountered While Studying Lesson 2 of fastai’s Practical Deep Learning | by thgirb | Jun, 2025

Statistical Inference: Your Friendly Guide to Making Sense of Data | by Timothy Kimutai | Jun, 2025

Related Posts