In [1]:
# -----------------------------
# Import Libraries
# -----------------------------
import pandas as pd
import numpy as np
from scipy.stats import chi2_contingency, ttest_ind
from statsmodels.stats.power import NormalIndPower

In [2]:
# -----------------------------
# Load Dataset
# -----------------------------
df = pd.read_csv("./raw_ab.csv")

# Inspect data
print(df.head(5))
print(df.shape)

# Remove duplicates
df = df.drop_duplicates(subset=['user_id'])
print(df.head(5))
print(df.shape)

# Columns: user_id, timestamp, group, landing_page, converted
print("\nInsight: dataset contains users split into control and treatment, with conversion info")

   user_id                   timestamp      group landing_page  converted
0   851104  2017-01-21 22:11:48.556739    control     old_page          0
1   804228  2017-01-12 08:01:45.159739    control     old_page          0
2   661590  2017-01-11 16:55:06.154213  treatment     new_page          0
3   853541  2017-01-08 18:28:03.143765  treatment     new_page          0
4   864975  2017-01-21 01:52:26.210827    control     old_page          1
(294478, 5)
   user_id                   timestamp      group landing_page  converted
0   851104  2017-01-21 22:11:48.556739    control     old_page          0
1   804228  2017-01-12 08:01:45.159739    control     old_page          0
2   661590  2017-01-11 16:55:06.154213  treatment     new_page          0
3   853541  2017-01-08 18:28:03.143765  treatment     new_page          0
4   864975  2017-01-21 01:52:26.210827    control     old_page          1
(290584, 5)

Insight: dataset contains users split into control and treatment, with conversion info


In [3]:
# -----------------------------
# Add Simulated Revenue
# -----------------------------
np.random.seed(42)  # make reproducible
df['revenue'] = df['converted'].apply(lambda x: np.random.randint(50,201) if x==1 else 0)

print(df.head())
print("\nInsight: revenue assigned only to converted users")

   user_id                   timestamp      group landing_page  converted  \
0   851104  2017-01-21 22:11:48.556739    control     old_page          0   
1   804228  2017-01-12 08:01:45.159739    control     old_page          0   
2   661590  2017-01-11 16:55:06.154213  treatment     new_page          0   
3   853541  2017-01-08 18:28:03.143765  treatment     new_page          0   
4   864975  2017-01-21 01:52:26.210827    control     old_page          1   

   revenue  
0        0  
1        0  
2        0  
3        0  
4      152  

Insight: revenue assigned only to converted users


In [4]:
# -----------------------------
# Aggregate Metrics by Group
# -----------------------------
metrics = df.groupby('group').agg(
    users=('user_id', 'nunique'),
    conversions=('converted', 'sum'),
    conversion_rate=('converted', 'mean'),
    revenue=('revenue', 'sum'),
    revenue_per_user=('revenue', 'mean')
).reset_index()

print("\nAggregated Metrics by Group:")
print(metrics)
print("\nInsight: control group has slightly higher conversion rate and revenue per user")


Aggregated Metrics by Group:
       group   users  conversions  conversion_rate  revenue  revenue_per_user
0    control  145232        17471         0.120297  2183225         15.032672
1  treatment  145352        17274         0.118843  2163364         14.883620

Insight: control group has slightly higher conversion rate and revenue per user


In [5]:
# -----------------------------
# Validate Sample Sizes
# -----------------------------
sample_check = df.groupby('group')['user_id'].nunique()
print("\nSample Sizes by Group:")
print(sample_check)
print("\nInsight: groups are balanced enough for analysis")


Sample Sizes by Group:
group
control      145232
treatment    145352
Name: user_id, dtype: int64

Insight: groups are balanced enough for analysis


In [6]:
# -----------------------------
# Chi-Square Test for Conversion
# -----------------------------
control_conv = df[df['group']=='control']['converted'].sum()
control_users = df[df['group']=='control'].shape[0]

treatment_conv = df[df['group']=='treatment']['converted'].sum()
treatment_users = df[df['group']=='treatment'].shape[0]

# Contingency table
table = [
    [control_conv, control_users - control_conv],
    [treatment_conv, treatment_users - treatment_conv]
]

chi2, p_value_conv, _, _ = chi2_contingency(table)
print(f"\nChi-Square Test for Conversion: p-value = {p_value_conv:.4f}")


Chi-Square Test for Conversion: p-value = 0.2291


In [7]:
# -----------------------------
# T-Test for Revenue
# -----------------------------
control_revenue = df[df['group']=='control']['revenue']
treatment_revenue = df[df['group']=='treatment']['revenue']

# Check if revenue difference between groups is significant
t_stat, p_value_revenue = ttest_ind(control_revenue, treatment_revenue, equal_var=False)
print(f"T-Test for Revenue: p-value = {p_value_revenue:.4f}")

T-Test for Revenue: p-value = 0.3534


In [8]:
# -----------------------------
# Interprete
# -----------------------------
if p_value_conv < 0.05:
    print("Conversion difference is statistically significant.")
else:
    print("Conversion difference is NOT statistically significant.")

if p_value_revenue < 0.05:
    print("Revenue difference is statistically significant.")
else:
    print("Revenue difference is NOT statistically significant.")

Conversion difference is NOT statistically significant.
Revenue difference is NOT statistically significant.


In [9]:
# -----------------------------
# Sample Size Efficiency Check
# -----------------------------

# Define parameters
baseline_rate = df[df['group']=='control']['converted'].mean()
lift = 0.02  # minimum detectable lift
alpha = 0.05
power = 0.8

# Calculate effect size using Cohen's h
effect_size = 2 * np.arcsin(np.sqrt(baseline_rate + lift)) - 2 * np.arcsin(np.sqrt(baseline_rate))

analysis = NormalIndPower()
sample_size_per_group = analysis.solve_power(
    effect_size=effect_size,
    alpha=alpha,
    power=power,
    alternative='two-sided'
)

print(f"\nRequired sample size per group for {lift*100:.1f}% lift: {int(sample_size_per_group)}")
print(f"Actual control group size: {control_users}")
print(f"Actual treatment group size: {treatment_users}")

if control_users >= sample_size_per_group and treatment_users >= sample_size_per_group:
    print("Sample size is sufficient to detect the desired lift.")
else:
    print("Sample size may be insufficient to detect the desired lift.")

print("\nInsight: experiment has enough users to detect a 2% lift")


Required sample size per group for 2.0% lift: 4441
Actual control group size: 145232
Actual treatment group size: 145352
Sample size is sufficient to detect the desired lift.

Insight: experiment has enough users to detect a 2% lift


In [10]:
# -----------------------------
# Prepare Data for Dashboard
# -----------------------------

# Add date column
df['event_date'] = pd.to_datetime(df.timestamp).dt.date

daily_metrics = df.groupby(['group', 'event_date']).agg(

    users=('user_id','nunique'),
    conversions=('converted','sum'),
    conversion_rate=('converted','mean'),
    revenue_per_user=('revenue','mean')
).reset_index()

# Export for Tableau
daily_metrics.to_csv("ab_test_daily_metrics.csv", index=False)
df.to_csv("ab_test_df.csv", index=False)