from scipy import stats
import matplotlib.pyplot as plt
import pandas as pd
# Generate practical information
np.random.seed(42)
# Espresso drinkers (n=50): barely larger productiveness scores
coffee_drinkers = np.random.regular(85, 12, 50)
# Non-coffee drinkers (n=45): baseline productiveness scores
non_coffee_drinkers = np.random.regular(80, 11, 45)
# Create a DataFrame for simple evaluation
information = pd.DataFrame({
‘productiveness’: np.concatenate([coffee_drinkers, non_coffee_drinkers]),
‘group’: [‘Coffee’] * len(coffee_drinkers) + [‘No Coffee’] * len(non_coffee_drinkers)
})
print(“☕ COFFEE vs PRODUCTIVITY ANALYSIS ☕”)
print(“=” * 50)
# Descriptive statistics
coffee_stats = information[data[‘group’] == ‘Espresso’][‘productivity’]
no_coffee_stats = information[data[‘group’] == ‘No Espresso’][‘productivity’]
print(f”Espresso Drinkers (n={len(coffee_stats)}):”)
print(f” Imply: {coffee_stats.imply():.2f}”)
print(f” Std Dev: {coffee_stats.std():.2f}”)
print(f”nNon-Espresso Drinkers (n={len(no_coffee_stats)}):”)
print(f” Imply: {no_coffee_stats.imply():.2f}”)
print(f” Std Dev: {no_coffee_stats.std():.2f}”)
# Step 1: Arrange hypotheses
print(f”n📋 HYPOTHESIS TESTING”)
print(“H0: No distinction in productiveness between teams (μ₁ = μ₂)”)
print(“H1: Espresso drinkers have totally different productiveness (μ₁ ≠ μ₂)”)
# Step 2: Carry out the take a look at
t_stat, p_val = stats.ttest_ind(coffee_stats, no_coffee_stats)
print(f”n🧮 TEST RESULTS:”)
print(f”T-statistic: {t_stat:.3f}”)
print(f”P-value: {p_val:.6f}”)
# Step 3: Make resolution
alpha = 0.05
if p_val print(f”n✅ DECISION: Reject H0 (p = {p_val:.6f} print(“There IS a statistically vital distinction between teams!”)
else:
print(f”n❌ DECISION: Fail to reject H0 (p = {p_val:.6f} ≥ {alpha})”)
print(“There’s NO statistically vital distinction between teams.”)
# Step 4: Calculate confidence intervals for each teams
def calculate_ci(information, confidence=0.95):
n = len(information)
imply = np.imply(information)
std = np.std(information, ddof=1)
se = std / np.sqrt(n)
alpha = 1 – confidence
t_crit = stats.t.ppf(1 – alpha/2, n – 1)
margin_error = t_crit * se
ci_lower = imply – margin_error
ci_upper = imply + margin_error
return ci_lower, ci_upper, margin_error
# Calculate 95% CIs
coffee_ci = calculate_ci(coffee_stats)
no_coffee_ci = calculate_ci(no_coffee_stats)
print(f”n📊 95% CONFIDENCE INTERVALS:”)
print(f”Espresso Drinkers: [{coffee_ci[0]:.2f}, {coffee_ci[1]:.2f}]”)
print(f”Non-Espresso Drinkers: [{no_coffee_ci[0]:.2f}, {no_coffee_ci[1]:.2f}]”)
# Examine if intervals overlap
if coffee_ci[1] print(“🚫 Confidence intervals do not overlap – robust proof of distinction!”)
else:
print(“🤝 Confidence intervals overlap – some uncertainty concerning the distinction.”)
# Step 5: Impact measurement
effect_size = (coffee_stats.imply() – no_coffee_stats.imply()) / np.sqrt(
((len(coffee_stats) – 1) * coffee_stats.var() + (len(no_coffee_stats) – 1) * no_coffee_stats.var()) /
(len(coffee_stats) + len(no_coffee_stats) – 2)
)
print(f”n📏 EFFECT SIZE (Cohen’s d): {effect_size:.3f}”)
if abs(effect_size) effect_interp = “negligible”
elif abs(effect_size) effect_interp = “small”
elif abs(effect_size) effect_interp = “medium”
else:
effect_interp = “giant”
print(f”Interpretation: {effect_interp} impact”)
# Visualization
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(15, 6))
# Field plot
information.boxplot(column=’productiveness’, by=’group’, ax=ax1)
ax1.set_title(‘Productiveness Scores by Group’)
ax1.set_ylabel(‘Productiveness Rating’)
# Histogram with confidence intervals
ax2.hist(coffee_stats, alpha=0.7, label=’Espresso Drinkers’, bins=15, coloration=’brown’)
ax2.hist(no_coffee_stats, alpha=0.7, label=’Non-Espresso Drinkers’, bins=15, coloration=’lightblue’)
# Add confidence interval bars
ax2.errorbar(coffee_stats.imply(), 5, xerr=coffee_ci[2],
fmt=’o’, coloration=’darkred’, capsize=5, capthick=2, label=’Espresso 95% CI’)
ax2.errorbar(no_coffee_stats.imply(), 3, xerr=no_coffee_ci[2],
fmt=’s’, coloration=’darkblue’, capsize=5, capthick=2, label=’No Espresso 95% CI’)
ax2.set_xlabel(‘Productiveness Rating’)
ax2.set_ylabel(‘Frequency’)
ax2.set_title(‘Distribution of Productiveness Scores’)
ax2.legend()
ax2.grid(True, alpha=0.3)
plt.tight_layout()
plt.present()