Delivery Time Prediction

Problem Statement

To train a regression model to predict the estimated delivery time.

Importing Python Libraries¶

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

import warnings
warnings.filterwarnings('ignore')

Loading the Dataset¶

df = pd.read_csv("data.csv")

Data Exploration (EDA)¶

df.shape

(175777, 14)

df.head(5)

df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 175777 entries, 0 to 175776
Data columns (total 14 columns):
 #   Column                                        Non-Null Count   Dtype  
---  ------                                        --------------   -----  
 0   market_id                                     175777 non-null  float64
 1   created_at                                    175777 non-null  object 
 2   actual_delivery_time                          175777 non-null  object 
 3   store_primary_category                        175777 non-null  int64  
 4   order_protocol                                175777 non-null  float64
 5   total_items                                   175777 non-null  int64  
 6   subtotal                                      175777 non-null  int64  
 7   num_distinct_items                            175777 non-null  int64  
 8   min_item_price                                175777 non-null  int64  
 9   max_item_price                                175777 non-null  int64  
 10  total_onshift_dashers                         175777 non-null  float64
 11  total_busy_dashers                            175777 non-null  float64
 12  total_outstanding_orders                      175777 non-null  float64
 13  estimated_store_to_consumer_driving_duration  175777 non-null  float64
dtypes: float64(6), int64(6), object(2)
memory usage: 18.8+ MB

df.describe()

df.isnull().sum()

market_id                                       0
created_at                                      0
actual_delivery_time                            0
store_primary_category                          0
order_protocol                                  0
total_items                                     0
subtotal                                        0
num_distinct_items                              0
min_item_price                                  0
max_item_price                                  0
total_onshift_dashers                           0
total_busy_dashers                              0
total_outstanding_orders                        0
estimated_store_to_consumer_driving_duration    0
dtype: int64

💡 Observation

There are no null values present in the data set.

# Converting the features to date time format

df['created_at'] = pd.to_datetime(df['created_at'])
df['actual_delivery_time'] = pd.to_datetime(df['actual_delivery_time'])

cols = df.columns

for col in cols:
    print(f"{col}:", df[col].nunique())

market_id: 6
created_at: 162649
actual_delivery_time: 160344
store_primary_category: 73
order_protocol: 7
total_items: 54
subtotal: 8182
num_distinct_items: 20
min_item_price: 2251
max_item_price: 2585
total_onshift_dashers: 172
total_busy_dashers: 158
total_outstanding_orders: 281
estimated_store_to_consumer_driving_duration: 1318

cols = df[['market_id', 'order_protocol']]

for col in cols:
    print(df[col].value_counts())

market_id
2.0    53469
4.0    46222
1.0    37115
3.0    21075
5.0    17258
6.0      638
Name: count, dtype: int64
order_protocol
1.0    48404
3.0    47125
5.0    41415
2.0    20890
4.0    17246
6.0      678
7.0       19
Name: count, dtype: int64

# Creating the Target feature "estimated_minutes"

df['estimated_time'] = df['actual_delivery_time'] - df['created_at']
df['estimated_minutes'] = (df['estimated_time'].dt.total_seconds() // 60).astype('Int64')
df['estimated_minutes'].describe()

count     175777.0
mean     46.203013
std       9.327424
min           32.0
25%           39.0
50%           45.0
75%           52.0
max          110.0
Name: estimated_minutes, dtype: Float64

💡 Observation

From the given data, the minimum time for the delivery is $32$ minutes and the maximum time for the delivery is $110$ minutes.
And average time of around ~$46$ minutes.

# Outlier detection

def outlier_det(data, col):
    q1 = data[col].quantile(0.25)
    q3 = data[col].quantile(0.75)
    iqr = q3 - q1
    lower_bound = q1-(1.5*iqr)
    upper_bound = q3+(1.5*iqr)

    outliers = (data[col] < lower_bound) | (data[col] > upper_bound)
    count = int(outliers.sum())
    percent = round((count/data.shape[0]) * 100, 2)
    print(f"{col}:\n{count} outliers ({percent}%)\n")

    return outliers

columns = df.select_dtypes(include=np.number).columns.tolist()

for col in columns:
    outlier_det(df, col)

market_id:
0 outliers (0.0%)

store_primary_category:
0 outliers (0.0%)

order_protocol:
0 outliers (0.0%)

total_items:
8486 outliers (4.83%)

subtotal:
8050 outliers (4.58%)

num_distinct_items:
5249 outliers (2.99%)

min_item_price:
4047 outliers (2.3%)

max_item_price:
6954 outliers (3.96%)

total_onshift_dashers:
1208 outliers (0.69%)

total_busy_dashers:
463 outliers (0.26%)

total_outstanding_orders:
5194 outliers (2.95%)

estimated_store_to_consumer_driving_duration:
315 outliers (0.18%)

estimated_time:
1749 outliers (1.0%)

estimated_minutes:
1749 outliers (1.0%)

# Outlier treatment

def iqr_cap(df, cols=None, k=1.5, round_int_bounds=True):
    if cols is None:
        cols = df.select_dtypes(include=np.number).columns
    capped = df.copy()
    for c in cols:
        s = capped[c]
        q1 = capped[c].quantile(0.25)
        q3 = capped[c].quantile(0.75)
        iqr = q3 - q1
        low = q1 - k * iqr
        up  = q3 + k * iqr

        if pd.api.types.is_integer_dtype(s.dtype):
            
            if round_int_bounds:
                low_i = int(np.floor(low))
                up_i  = int(np.ceil(up))
                capped[c] = s.clip(lower=low_i, upper=up_i)
            else:
                
                capped[c] = s.astype(float).clip(lower=low, upper=up).round().astype(s.dtype)
        else:
            capped[c] = s.clip(lower=low, upper=up)
        
    return capped

df1 = iqr_cap(df, k=1.5)

Visual Analysis¶

plt.figure(figsize=(10,6))
sns.histplot(data=df1, x='estimated_minutes', kde=True)
plt.show()

💡Observation

The target feature, estimated minutes is right skewed.

plt.figure(figsize=(10,6))
sns.boxplot(data=df1, x='estimated_minutes')
plt.show()

df1["created_date"] = df1["created_at"].dt.strftime("%Y-%m-%d")
df1["created_time"] = df1["created_at"].dt.strftime("%H:%M:%S")
df1["delivered_date"] = df1["actual_delivery_time"].dt.strftime("%Y-%m-%d")
df1["delivered_time"] = df1["actual_delivery_time"].dt.strftime("%H:%M:%S")

df1["actual_delivery_time"] = pd.to_datetime(df1["actual_delivery_time"], errors="coerce")

h = df1["actual_delivery_time"].dt.hour  # 0..23 [web:145]

# Shift hours so that 21->0, 22->1, 23->2, 0->3, ... 20->23
h_shift = (h - 21) % 24  # wrap-around via modulo [web:146]

# Define bins on shifted scale:
# 0 -> night:    0–8   (maps to 21:00–04:59)
# 1 -> morning:  9–15  (05:00–11:59)
# 2 -> afternoon:16–20 (12:00–16:59)
# 3 -> evening:  21–23 (17:00–20:59)
bins   = [-0.1, 9, 16, 21, 24]
labels = [0, 1, 2, 3]

df1["delivery_session"] = pd.cut(
    h_shift, bins=bins, labels=labels, right=False, include_lowest=True
)

# If week day then 1 else if weekend then 0

df1["created_on"]  = np.where(df1["created_at"].dt.weekday < 5, 1, 0)
df1["delivered_on"] = np.where(df1["actual_delivery_time"].dt.weekday < 5, 1, 0)

df1_corr = df1.corr('spearman', numeric_only=True)

plt.figure(figsize=(9, 6))
sns.heatmap(df1_corr, annot=True, cmap='coolwarm', fmt = '.2f', linewidths=0.5)
plt.show()

💡Observation

From the above correlation heat map, the following features are highly correlated.
- Number of distinct items vs total items → quite obivious, because of the number of items.
- The features → total_onshift_dashers, total_busy_dashers, total_outstanding_orders are also highly correlated.

# Prep counts
left_ct = df1.groupby(['created_on','delivery_session']).size().unstack(fill_value=0).sort_index()
right_ct = df1.groupby(['delivered_on','delivery_session']).size().unstack(fill_value=0).sort_index()

def stacked_bar(ax, ct, title, xlab):
    bottoms = None
    colors = plt.cm.tab10(np.linspace(0, 1, ct.shape[1]))
    for (col, color) in zip(ct.columns, colors):
        ax.bar(ct.index, ct[col], bottom=bottoms, label=col, color=color, width=0.6)
        bottoms = (ct[col] if bottoms is None else bottoms + ct[col])
    ax.set_title(title)
    ax.set_xlabel(xlab)
    ax.set_ylabel('count')
    ax.legend(title='delivery_session')

plt.figure(figsize=(9,6))
ax1 = plt.subplot(1,2,1)
stacked_bar(ax1, left_ct, 'Number of orders created', 'created_on')

ax2 = plt.subplot(1,2,2)
stacked_bar(ax2, right_ct, 'Number of orders delivered', 'delivered_on')

plt.tight_layout()
plt.show()

💡 Observation

The majority of the orders placed during the weekdays. And all the orders are delivered in the same day. But there is a small discrepancy because if someone placed order during Friday before midnight then the order delivered after midnight which is technically the next day (saturday) and vice versa.
Also most of the orders were placed during night and evening time.

Label	Session	Day
0	Night	Weekend
1	Morning	Weekday
2	Afternoon	-
3	Evening	-

avg_os = df1.groupby(['created_on','delivery_session'])['total_outstanding_orders'].mean().unstack(fill_value=0).sort_index()
avg_busy = df1.groupby(['created_on','delivery_session'])['total_busy_dashers'].mean().unstack(fill_value=0).sort_index()
avg_os_das = df1.groupby(['created_on','delivery_session'])['total_onshift_dashers'].mean().unstack(fill_value=0).sort_index()

def stacked_bar(ax, ct, title, xlab):
    bottoms = None
    colors = plt.cm.tab10(np.linspace(0, 1, ct.shape[1]))
    for (col, color) in zip(ct.columns, colors):
        ax.bar(ct.index.astype(str), ct[col].values, bottom=bottoms,
               label=f'session {col}', color=color, width=0.6)
        bottoms = ct[col].values if bottoms is None else bottoms + ct[col].values
    ax.set_title(title)
    ax.set_xlabel(xlab)
    ax.set_ylabel('average')
    ax.legend(title='delivery_session')

plt.figure(figsize=(12, 8))
ax1 = plt.subplot(1, 3, 1)
stacked_bar(ax1, avg_os, 'Avg. Outstanding Orders', 'created_on')

ax2 = plt.subplot(1, 3, 2)
stacked_bar(ax2, avg_busy, 'Avg. Busy Partners', 'created_on')

ax3 = plt.subplot(1, 3, 3)
stacked_bar(ax3, avg_os_das, 'Avg. Onshift Partners', 'created_on')

plt.tight_layout()
plt.show()

💡Observation

The average outstanding orders, average busy dashers and average onshift dashers are more in the sessions morning, evening and night.

avg_os = df1.groupby('market_id')['total_outstanding_orders'].mean().sort_index().to_frame('avg_outstanding')
avg_busy = df1.groupby('market_id')['total_busy_dashers'].mean().sort_index().to_frame('avg_busy')
avg_os_das = df1.groupby('market_id')['total_onshift_dashers'].mean().sort_index().to_frame('avg_onshift')

def stacked_bar(ax, ct, title, xlab):
    bottoms = None
    colors = plt.cm.tab10(np.linspace(0, 1, ct.shape[1]))
    for (col, color) in zip(ct.columns, colors):
        ax.bar(ct.index.astype(str), ct[col].values, 
               bottom=bottoms, label=col, color=color, width=0.6)
        bottoms = ct[col].values if bottoms is None else bottoms + ct[col].values
    ax.set_title(title)
    ax.set_xlabel('Market ID')
    ax.set_ylabel('Average')

plt.figure(figsize=(14, 6))

ax1 = plt.subplot(1, 3, 1)
stacked_bar(ax1, avg_os, 'Avg. Outstanding Orders', 'market_id')

ax2 = plt.subplot(1, 3, 2)
stacked_bar(ax2, avg_busy, 'Avg. Busy Partners', 'market_id')

ax3 = plt.subplot(1, 3, 3)
stacked_bar(ax3, avg_os_das, 'Avg. Onshift Partners', 'market_id')

plt.tight_layout()
plt.show()

💡Observation

The average outstanding orders, average busy dashers and average onshift dashers are more from the markets 2, 4 & 6.

counts = df1['market_id'].value_counts().sort_values(ascending=False)

plt.figure(figsize=(10,6))
ax = sns.barplot(
    x=counts.index.astype(str),  # cast to string to keep exact order
    y=counts.values,
    order=counts.index.astype(str),  # ensure order is enforced
    palette='viridis'
)

for bar, value in zip(ax.patches, counts.values):
    x = bar.get_x() + bar.get_width() / 2
    y = bar.get_height()
    ax.text(x, y + (0.01 * y), f"{value:,}", 
            ha='center', va='bottom', fontsize=10)

plt.xlabel("Market ID")
plt.ylabel("Number of orders placed")
plt.title("Orders per Market ID")
plt.show()

💡Observation

From the above chart, most of the orders are placed from the Market 2 & 4.
Market 6 has only very few orders.

counts = df1['order_protocol'].value_counts().sort_values(ascending=False)

plt.figure(figsize=(10,6))
ax = sns.barplot(
    x=counts.index.astype(str),  # cast to string to keep exact order
    y=counts.values,
    order=counts.index.astype(str),  # ensure order is enforced
    palette='viridis'
)

for bar, value in zip(ax.patches, counts.values):
    x = bar.get_x() + bar.get_width() / 2
    y = bar.get_height()
    ax.text(x, y + (0.01 * y), f"{value:,}", 
            ha='center', va='bottom', fontsize=10)

plt.xlabel("Order Protocol")
plt.ylabel("Number of orders placed")
plt.title("Orders per Order protocol")
plt.show()

💡Observation

From the above chart, most of the orders are placed in 1, 3 & 5.
Only very few orders placed in 6 & 7.

cols = ['order_protocol', 'market_id', 'created_on', 'delivery_session']

fig, axes = plt.subplots(1, 4, figsize=(20, 4), sharey=True)

for ax, col in zip(axes, cols):
    avg_min = df1.groupby(col)['estimated_minutes'].mean().sort_index()
    ax.plot(avg_min, marker='o', color='steelblue')
    ax.set_xlabel(col)
    ax.set_title(f'{col} vs Avg. estimated minutes')

axes[0].set_ylabel('Average estimated minutes')

plt.tight_layout()
plt.show()

💡Observation

Strong influence of order protocol
- Some protocols inherently take longer → operational review needed.
Market variations are large
- Market 1 has high delivery time.
Estimated delivery time is lower in week days
Delivery sessions later in day are faster

Statistical Analysis¶

import statsmodels.api as sm
import statsmodels.formula.api as smf
from scipy import stats
from scipy.stats import kruskal
import scikit_posthocs as sp

df1['_log_dm'] = np.log(df1['estimated_minutes'].clip(lower=1e-6))
model_log = smf.ols('_log_dm ~ C(delivery_session)', data=df1).fit()
print(sm.stats.anova_lm(model_log, typ=2))
sm.stats.diagnostic.het_breuschpagan(model_log.resid,
                                     model_log.model.exog)

                          sum_sq        df           F  PR(>F)
C(delivery_session)   154.630079       3.0  1406.65839     0.0
Residual             6440.747111  175773.0         NaN     NaN

(np.float64(3121.3235911380525),
 np.float64(0.0),
 np.float64(1059.2265156419912),
 np.float64(0.0))

fig = sm.qqplot(df1['_log_dm'].dropna(), line='45')
plt.title('Q-Q plot of _log_dm')
plt.show()

fig = sm.qqplot(df1['estimated_minutes'].dropna(), line='45')
plt.title('Q-Q plot of _log_dm')
plt.show()

The target column 'estimated minutes' is not normally distributed. Even after log transformations also it is still not normally distributed.
Hence, we will use Kruskal-Wallis (non-parametric) test for our statistical analysis.

Kruskal - Wallis Test¶

Null Hypothesis (H0): All groups come from the same distribution (i.e., their population medians are equal)
Alternate Hypothesis (H1): At least one group has a different median → groups are not from the same distribution.

Target variable (estimated minutes) vs Order created¶

weekday_data = df1[df1['created_on'] == 0]['estimated_minutes']
weekend_data = df1[df1['created_on'] == 1]['estimated_minutes']

stat, p = kruskal(weekday_data, weekend_data)

print("Kruskal-Wallis Test: Weekday vs Weekend")
print("Statistic:", stat)
print("p-value:", p)

alpha = 0.05

if alpha < p:
    print('Fail to reject the null hypothesis. All groups come from the same distribution.')
else:
    print('Reject the null hypothesis. At least one group has significantly different median → groups are not from the same distribution.')

Kruskal-Wallis Test: Weekday vs Weekend
Statistic: 3873.949865402171
p-value: 0.0
Reject the null hypothesis. At least one group has significantly different median → groups are not from the same distribution.

posthoc = sp.posthoc_dunn([weekday_data, weekend_data], p_adjust='bonferroni')

posthoc.index = ["weekday_data", "weekend_data"]
posthoc.columns = ["weekday_data", "weekend_data"]

print("Post-hoc Dunn Test (Weekday vs Weekend)")
print(posthoc)

Post-hoc Dunn Test (Weekday vs Weekend)
              weekday_data  weekend_data
weekday_data           1.0           0.0
weekend_data           0.0           1.0

💡Observation

From the above 2 analysis, estimated delivery time differ between the weekday and weekend.

Target variable (estimated minutes) vs Session of the day¶

groups = []
labels = ["Night", "Morning", "Afternoon", "Evening"]
for s in [0, 1, 2, 3]:
    arr = df1[df1['delivery_session'] == s]['estimated_minutes'].astype(float).to_numpy()
    groups.append(arr)

stat, p = kruskal(*groups)

print("Kruskal-Wallis Test: Different sessions of the day")
print("Statistic:", stat)
print("p-value:", p)

alpha = 0.05

if alpha < p:
    print('Fail to reject the null hypothesis. All groups come from the same distribution.')
else:
    print('Reject the null hypothesis. At least one group has a different median → groups are not from the same distribution.')

Kruskal-Wallis Test: Different sessions of the day
Statistic: 4164.77145802514
p-value: 0.0
Reject the null hypothesis. At least one group has a different median → groups are not from the same distribution.

posthoc = sp.posthoc_dunn(groups, p_adjust='bonferroni')

posthoc.index = labels
posthoc.columns = labels

print("Post-hoc Dunn Test (Time of Day)")
print(posthoc)

Post-hoc Dunn Test (Time of Day)
                  Night        Morning     Afternoon        Evening
Night      1.000000e+00   1.000000e+00  1.854087e-51   0.000000e+00
Morning    1.000000e+00   1.000000e+00  1.280991e-32  1.531033e-187
Afternoon  1.854087e-51   1.280991e-32  1.000000e+00   8.944415e-20
Evening    0.000000e+00  1.531033e-187  8.944415e-20   1.000000e+00

💡Observation

A Kruskal–Wallis test indicated a statistically significant difference in estimated delivery times across session-of-day groups $(p < 0.001)$.
Dunn post-hoc comparisons with Bonferroni correction revealed:
- No significant difference between Night and Morning deliveries $(p = 1.0)$.
- Afternoon and Evening deliveries differ significantly from both Night and Morning $(p < 0.001)$.
- Afternoon and Evening also differ significantly from each other $(p < 0.001)$.

This suggests that delivery times during Afternoon and Evening are statistically distinct and likely higher, indicating systematic variation based on time of day.

Target variable (estimated minutes) vs Market ID¶

mgroups = []
labels = list(df1['market_id'].unique().astype(str))
for s in range (1, df1['market_id'].nunique()+1):
    arr = df1[df1['market_id'] == s]['estimated_minutes'].astype(float).to_numpy()
    mgroups.append(arr)

stat, p = kruskal(*mgroups)

print("Kruskal-Wallis Test: Different Market ID")
print("Statistic:", stat)
print("p-value:", p)

alpha = 0.05

if alpha < p:
    print('Fail to reject the null hypothesis. All groups come from the same distribution.')
else:
    print('Reject the null hypothesis. At least one group has a different median → groups are not from the same distribution.')

Kruskal-Wallis Test: Different Market ID
Statistic: 6110.689278888985
p-value: 0.0
Reject the null hypothesis. At least one group has a different median → groups are not from the same distribution.

posthoc = sp.posthoc_dunn(mgroups, p_adjust='bonferroni')

posthoc.index = labels
posthoc.columns = labels

print("Post-hoc Dunn Test (Market ID)")
print(posthoc)

Post-hoc Dunn Test (Market ID)
          1.0            2.0            3.0            4.0           5.0  \
1.0  1.000000   0.000000e+00   0.000000e+00   0.000000e+00  0.000000e+00   
2.0  0.000000   1.000000e+00   2.333147e-03  2.207087e-232  3.772597e-16   
3.0  0.000000   2.333147e-03   1.000000e+00  3.666024e-179  2.496803e-23   
4.0  0.000000  2.207087e-232  3.666024e-179   1.000000e+00  3.530477e-49   
5.0  0.000000   3.772597e-16   2.496803e-23   3.530477e-49  1.000000e+00   
6.0  0.000021   1.323411e-11   6.275633e-14   7.818687e-01  2.651938e-06   

              6.0  
1.0  2.129766e-05  
2.0  1.323411e-11  
3.0  6.275633e-14  
4.0  7.818687e-01  
5.0  2.651938e-06  
6.0  1.000000e+00

💡Observation

Market 4 vs Market 6

$p = 0.7818687$ → NOT significant

Conclusion: Market 4 and Market 6 are similar.
Market 1 vs All Others

Market 1 shows $p = 0$ for all comparisons except itself.

Conclusion: Market 1 is significantly different from Markets 2, 3, 4, 5, 6.
Market 2 vs:

Market 3 → $p = 0.00233$ → significant

Market 4 → $p ≈ 2e-232$ → highly significant

Market 5 → $p ≈ 3e-16$ → highly significant

Market 6 → $p ≈ 1e-11$ → significant

Conclusion: Market 2 differs significantly from all markets.
Market 3 vs:

Market 4 → $p ≈ 3.6e-179$ → highly significant

Market 5 → $p ≈ 2.4e-23$ → highly significant

Market 6 → $p ≈ 6e-14$ → highly significant

Conclusion: Market 3 differs from all except itself.
Market 4 vs:

Market 5 → $p ≈ 3.5e-49$ → highly significant
Market 5 vs Market 6

$p ≈ 2.6e-06$ → significant

Conclusion

Except for Market 4 and Market 6, every market pair shows a statistically significant difference.
This implies the estimated minutes varies strongly across most Markets.

Target variable (estimated minutes) vs Order Protocol¶

ogroups = []
olabels = list(df1['order_protocol'].unique().astype(str))
for s in range (1, df1['order_protocol'].nunique()+1):
    arr = df1[df1['order_protocol'] == s]['estimated_minutes'].astype(float).to_numpy()
    ogroups.append(arr)

stat, p = kruskal(*ogroups)

print("Kruskal-Wallis Test: Different Order Protocol")
print("Statistic:", stat)
print("p-value:", p)

alpha = 0.05

if alpha < p:
    print('Fail to reject the null hypothesis. All groups come from the same distribution.')
else:
    print('Reject the null hypothesis. At least one group has a different median → groups are not from the same distribution.')

Kruskal-Wallis Test: Different Order Protocol
Statistic: 5193.747600908804
p-value: 0.0
Reject the null hypothesis. At least one group has a different median → groups are not from the same distribution.

posthoc = sp.posthoc_dunn(ogroups, p_adjust='bonferroni')

posthoc.index = olabels
posthoc.columns = olabels

print("Post-hoc Dunn Test (Order Protocols)")
print(posthoc)

Post-hoc Dunn Test (Order Protocols)
               1.0            2.0            3.0           5.0            4.0  \
1.0   1.000000e+00   9.030098e-53  1.038717e-130  0.000000e+00   0.000000e+00   
2.0   9.030098e-53   1.000000e+00   6.351447e-03  0.000000e+00  4.664705e-185   
3.0  1.038717e-130   6.351447e-03   1.000000e+00  0.000000e+00  1.607581e-226   
5.0   0.000000e+00   0.000000e+00   0.000000e+00  1.000000e+00   7.485737e-58   
4.0   0.000000e+00  4.664705e-185  1.607581e-226  7.485737e-58   1.000000e+00   
6.0   1.106204e-05   1.000000e+00   1.000000e+00  1.063761e-15   5.954326e-05   
7.0   6.448451e-02   3.431164e-01   4.861123e-01  1.000000e+00   1.000000e+00   

              6.0       7.0  
1.0  1.106204e-05  0.064485  
2.0  1.000000e+00  0.343116  
3.0  1.000000e+00  0.486112  
5.0  1.063761e-15  1.000000  
4.0  5.954326e-05  1.000000  
6.0  1.000000e+00  0.775569  
7.0  7.755687e-01  1.000000

💡Observation

Protocol 1, Protocol 4, and Protocol 5 stand out—they have very different outcomes.
Protocols 2, 3, and 6 behave almost the same.
Protocol 7 is the "neutral" protocol—it does not differ from any others.

Chi-Square Test¶

from scipy.stats import chi2_contingency

def run_chi_square(df1, col1, col2):
    alpha = 0.05
    print("*"*100)
    print(f"▶️ Chi-Square Test: {col1} vs {col2}")
    
    table = pd.crosstab(df1[col1], df1[col2])
    chi2, p, dof, expected = chi2_contingency(table)

    print("Contingency Table:")
    print(table)

    print("\nResults:")
    print(f"Chi-square statistic: {chi2:.4f}")
    print(f"Degrees of freedom: {dof}")
    print(f"p-value: {p:.6f}")

    if p < alpha:
        print(f"➡️ Reject H₀: Significant association between {col1} and {col2}\n")
    else:
        print(f"➡️ Fail to Reject H₀: No association between {col1} and {col2}\n")

Market ID vs Others¶

cols = ['created_on', 'delivery_session', 'order_protocol']
col1 = 'market_id'
for col2 in cols:
    run_chi_square(df1, col1, col2)

****************************************************************************************************
▶️ Chi-Square Test: market_id vs created_on
Contingency Table:
created_on      0      1
market_id               
1.0         12481  24634
2.0         19040  34429
3.0          7322  13753
4.0         15654  30568
5.0          6045  11213
6.0           209    429

Results:
Chi-square statistic: 53.0003
Degrees of freedom: 5
p-value: 0.000000
➡️ Reject H₀: Significant association between market_id and created_on

****************************************************************************************************
▶️ Chi-Square Test: market_id vs delivery_session
Contingency Table:
delivery_session      0     1    2     3
market_id                               
1.0               30464  1248  763  4640
2.0               43326  1819  949  7375
3.0               16975   573  571  2956
4.0               38436  1033  672  6081
5.0               14165   641  241  2211
6.0                 529    22   10    77

Results:
Chi-square statistic: 381.5876
Degrees of freedom: 15
p-value: 0.000000
➡️ Reject H₀: Significant association between market_id and delivery_session

****************************************************************************************************
▶️ Chi-Square Test: market_id vs order_protocol
Contingency Table:
order_protocol    1.0   2.0    3.0   4.0    5.0  6.0  7.0
market_id                                                
1.0             14300  2632   7260  4241   8267  415    0
2.0             10579  7100  16398  3759  15508  124    1
3.0              9212  1866   3698  2977   3261   61    0
4.0              6853  6880  17181  3016  12282   10    0
5.0              7274  2319   2448  3193   1941   65   18
6.0               186    93    140    60    156    3    0

Results:
Chi-square statistic: 20983.1850
Degrees of freedom: 30
p-value: 0.000000
➡️ Reject H₀: Significant association between market_id and order_protocol

Order Protocol vs others¶

cols = ['created_on', 'delivery_session']
col1 = 'order_protocol'
for col2 in cols:
    run_chi_square(df1, col1, col2)

****************************************************************************************************
▶️ Chi-Square Test: order_protocol vs created_on

Contingency Table:
created_on          0      1
order_protocol              
1.0             17091  31313
2.0              6904  13986
3.0             15996  31129
4.0              6083  11163
5.0             14464  26951
6.0               210    468
7.0                 3     16

Results:
Chi-square statistic: 54.1156
Degrees of freedom: 6
p-value: 0.000000
➡️ Reject H₀: Significant association between order_protocol and created_on

****************************************************************************************************
▶️ Chi-Square Test: order_protocol vs delivery_session
Contingency Table:
delivery_session      0     1     2     3
order_protocol                           
1.0               39521  1682  1037  6164
2.0               16700   491   396  3303
3.0               39546  1155   534  5890
4.0               12984   849   714  2699
5.0               34571  1140   519  5185
6.0                 555    19     6    98
7.0                  18     0     0     1

Results:
Chi-square statistic: 1430.3035
Degrees of freedom: 18
p-value: 0.000000
➡️ Reject H₀: Significant association between order_protocol and delivery_session

Created on vs Delivery session¶

run_chi_square(df1, 'created_on', 'delivery_session')

****************************************************************************************************
▶️ Chi-Square Test: created_on vs delivery_session
Contingency Table:
delivery_session      0     1     2      3
created_on                                
0                 49709  2669  1310   7063
1                 94186  2667  1896  16277

Results:
Chi-square statistic: 810.8044
Degrees of freedom: 3
p-value: 0.000000
➡️ Reject H₀: Significant association between created_on and delivery_session

📌 Chi-Square Test Summary

Market ID vs Created On

Strong association found
Different markets show different order-creation patterns

Market ID vs Delivery Session

Significant relationship
Delivery session distribution varies across markets

Market ID vs Order Protocol

Very strong association
Order protocols differ heavily by market

Order Protocol vs Created On

Significant association
Order protocol usage changes between creation days (weekday(1) / weekend(0))

Order Protocol vs Delivery Session

Strong relationship
Different order protocols occur in different delivery sessions

Created On vs Delivery Session

Significant association
Delivery patterns differ between the two created_on groups

ML Devlopment¶

To predict delivery estimated minutes based on operational and market attributes.
This is a Supervised Regression problem.

from sklearn.model_selection import train_test_split, GridSearchCV, RandomizedSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor
from xgboost import XGBRegressor
import lightgbm as lgb
from sklearn.metrics import root_mean_squared_error, mean_absolute_error, r2_score
from statsmodels.stats.outliers_influence import variance_inflation_factor

Model Evalution¶

def model_evl(yt, yp):
    rmse = round(root_mean_squared_error(yt, yp), 4)
    mae = round(mean_absolute_error(yt, yp), 4)
    r2 = round(r2_score(yt, yp), 4)

    print(f"RMSE: {rmse}\nMAE: {mae}\nR2 score: {r2}")

Train Test Split¶

df1['created_date'] = pd.to_datetime(df1['created_date'])
df1['created_month'] = (df1['created_date']).dt.month
df1['created_year'] = (df1['created_date']).dt.year
df1['created_day'] = (df1['created_date']).dt.day_of_week

df1['delivered_date'] = pd.to_datetime(df1['delivered_date'])
df1['delivered_month'] = (df1['delivered_date']).dt.month
df1['delivered_year'] = (df1['delivered_date']).dt.year
df1['delivered_day'] = (df1['delivered_date']).dt.day_of_week

target = "estimated_minutes"
X = df1.drop(columns=[target, 'created_date', 'created_time', 'delivered_date', 'delivered_time', 
                       'created_at', 'actual_delivery_time', 'estimated_time', "_log_dm"])

X['delivery_session'] = X['delivery_session'].astype(int)

y = df1[target]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

Tree Based Regression Models¶

Random Forest¶

rf = RandomForestRegressor(n_estimators=100, max_depth=30, random_state=42)

rf.fit(X_train, y_train)

y_pred = rf.predict(X_test)

val = rf.feature_importances_
val

array([0.07901902, 0.00697876, 0.02021451, 0.002506  , 0.14397702,
       0.00382487, 0.0073201 , 0.00841503, 0.16129524, 0.02254129,
       0.29023048, 0.22149151, 0.00461633, 0.00098596, 0.00095051,
       0.00113706, 0.        , 0.01505626, 0.00104593, 0.        ,
       0.00839415])

columns = X.columns

fea_imp = pd.DataFrame(val).set_index(columns)
fea_imp.reset_index().sort_values(0, ascending=False)

X_dropped = X.drop(columns=['created_on', 'delivered_on', 'created_month', 'delivered_month', 'created_year', 'delivered_year'])
X_train_dropped, X_test_dropped, y_train, y_test = train_test_split(X_dropped, y, test_size=0.2, random_state=42)

rf.fit(X_train_dropped, y_train)

y_pred = rf.predict(X_test_dropped)

model_evl(y_pred, y_test)

RMSE: 2.4016
MAE: 1.7909
R2 score: 0.9195

# Predictions
train_pred = rf.predict(X_train)
test_pred = rf.predict(X_test)

# Train metrics
train_rmse = root_mean_squared_error(y_train, train_pred)
train_r2 = r2_score(y_train, train_pred)

# Test metrics
test_rmse = root_mean_squared_error(y_test, test_pred)
test_r2 = r2_score(y_test, test_pred)

print("=== Train Metrics ===")
print("Train RMSE:", train_rmse)
print("Train R2:", train_r2)

print("\n=== Test Metrics ===")
print("Test RMSE:", test_rmse)
print("Test R2:", test_r2)

=== Train Metrics ===
Train RMSE: 0.9082169432939801
Train R2: 0.9901191974578337

=== Test Metrics ===
Test RMSE: 2.4015697670824165
Test R2: 0.9311169552571839

result3 = model_evl(y_test, y_pred)

RMSE: 2.4016
MAE: 1.7909
R2 score: 0.9311

XGBoost¶

xgb = XGBRegressor(
    objective="reg:squarederror",
    eval_metric="rmse",
    tree_method="hist",   
    n_jobs=-1,
    random_state=42
)

xgb.fit(X_train_dropped, y_train)

xgb_pred = xgb.predict(X_test_dropped)

# Predictions
train_pred = xgb.predict(X_train_dropped)
test_pred = xgb.predict(X_test_dropped)

# Train metrics
train_rmse = root_mean_squared_error(y_train, train_pred)
train_r2 = r2_score(y_train, train_pred)

# Test metrics
test_rmse = root_mean_squared_error(y_test, test_pred)
test_r2 = r2_score(y_test, test_pred)

print("=== Train Metrics ===")
print("Train RMSE:", train_rmse)
print("Train R2:", train_r2)

print("\n=== Test Metrics ===")
print("Test RMSE:", test_rmse)
print("Test R2:", test_r2)

=== Train Metrics ===
Train RMSE: 1.9034255743026733
Train R2: 0.9566004872322083

=== Test Metrics ===
Test RMSE: 2.0791780948638916
Test R2: 0.9483696222305298

r1 = model_evl(y_test, xgb_pred)

RMSE: 2.0792
MAE: 1.56
R2 score: 0.9484

LightGBM¶

train_data = lgb.Dataset(X_train_dropped, label=y_train)
test_data = lgb.Dataset(X_test_dropped, label=y_test)

params = {
    'boosting_type': 'gbdt',
    'objective': 'regression',
    'metric': 'rmse',
    'learning_rate': 0.05,
    'min_data_in_leaf': 30,
    'num_leaves': 31,
    'max_depth': -1,
    'lambda_l1': 1.0,
    'lambda_l2': 1.0,
    'feature_fraction': 0.8,
    'bagging_fraction': 0.8,
    'bagging_freq': 5,
    'verbose': -1
}

model = lgb.train(
    params,
    train_data,
    valid_sets=[train_data, test_data],
    num_boost_round=9000
)

lgb_pred = model.predict(X_test_dropped, num_iteration=model.best_iteration)

# Predictions
train_pred = model.predict(X_train_dropped)
test_pred = model.predict(X_test_dropped)

# Train metrics
train_rmse = root_mean_squared_error(y_train, train_pred)
train_r2 = r2_score(y_train, train_pred)

# Test metrics
test_rmse =root_mean_squared_error(y_test, test_pred)
test_r2 = r2_score(y_test, test_pred)

print("=== Train Metrics ===")
print("Train RMSE:", train_rmse)
print("Train R2:", train_r2)

print("\n=== Test Metrics ===")
print("Test RMSE:", test_rmse)
print("Test R2:", test_r2)

=== Train Metrics ===
Train RMSE: 0.978173063699983
Train R2: 0.9885384218538967

=== Test Metrics ===
Test RMSE: 1.638417069658579
Test R2: 0.9679394588923644

lgb_score = model_evl(y_test, lgb_pred)

RMSE: 1.6384
MAE: 1.1488
R2 score: 0.9679

LGB for user interface¶

t = df1[df1['estimated_store_to_consumer_driving_duration'] == 861].head(3)
t[['estimated_store_to_consumer_driving_duration', 'estimated_minutes']]

X_train_new = X_train_dropped.drop(columns=['delivered_day'])
X_test_new = X_test_dropped.drop(columns=['delivered_day'])

train_data = lgb.Dataset(X_train_new, label=y_train)
test_data = lgb.Dataset(X_test_new, label=y_test)

model = lgb.train(
    params,
    train_data,
    valid_sets=[train_data, test_data],
    num_boost_round=9000
)

# Predictions
train_pred = model.predict(X_train_new)
test_pred = model.predict(X_test_new)

# Train metrics
train_rmse = root_mean_squared_error(y_train, train_pred)
train_r2 = r2_score(y_train, train_pred)

# Test metrics
test_rmse =root_mean_squared_error(y_test, test_pred)
test_r2 = r2_score(y_test, test_pred)

print("=== Train Metrics ===")
print("Train RMSE:", train_rmse)
print("Train R2:", train_r2)

print("\n=== Test Metrics ===")
print("Test RMSE:", test_rmse)
print("Test R2:", test_r2)

=== Train Metrics ===
Train RMSE: 5.821618831518193
Train R2: 0.594023818257526

=== Test Metrics ===
Test RMSE: 7.761705917398535
Test R2: 0.28049090576580094

lgb_pred_u = model.predict(X_test_new, num_iteration=model.best_iteration)

lgb_score_u = model_evl(y_test, lgb_pred_u)

RMSE: 4.7203
MAE: 3.6843
R2 score: 0.7339

Linear Regression¶

VIF¶

def calculate_vif(df):
    vif_data = pd.DataFrame()
    vif_data["feature"] = df.columns
    vif_data["VIF"] = [variance_inflation_factor(df.values, i) for i in range(df.shape[1])]
    return vif_data

print("===== VIF BEFORE REMOVAL =====")
vif_before = calculate_vif(X)
print(vif_before)

===== VIF BEFORE REMOVAL =====
                                         feature         VIF
0                                      market_id    1.009152
1                         store_primary_category    1.020600
2                                 order_protocol    1.048610
3                                    total_items    7.661572
4                                       subtotal    5.266616
5                             num_distinct_items    6.112050
6                                 min_item_price    2.511379
7                                 max_item_price    2.705759
8                          total_onshift_dashers   13.124841
9                             total_busy_dashers   12.650595
10                      total_outstanding_orders   11.085033
11  estimated_store_to_consumer_driving_duration    1.004830
12                              delivery_session    1.095620
13                                    created_on   72.049671
14                                  delivered_on   71.973109
15                                 created_month  176.256694
16                                  created_year    0.000000
17                                   created_day   54.474983
18                               delivered_month  176.340488
19                                delivered_year    0.000000
20                                 delivered_day   54.653936

# Remove features with VIF > 10
high_vif_cols = vif_before[vif_before["VIF"] > 10]["feature"].tolist()
X= X.drop(columns=high_vif_cols)

print("\nRemoved High-VIF Columns:", high_vif_cols)

Removed High-VIF Columns: ['total_onshift_dashers', 'total_busy_dashers', 'total_outstanding_orders', 'created_on', 'delivered_on', 'created_month', 'created_day', 'delivered_month', 'delivered_day']

print("===== VIF AFTER REMOVAL =====")
vif_after = calculate_vif(X)
print(vif_after)

===== VIF AFTER REMOVAL =====
                                         feature       VIF
0                                      market_id  1.002274
1                         store_primary_category  1.016549
2                                 order_protocol  1.018610
3                                    total_items  7.660335
4                                       subtotal  5.259378
5                             num_distinct_items  6.109833
6                                 min_item_price  2.511289
7                                 max_item_price  2.693932
8   estimated_store_to_consumer_driving_duration  1.003299
9                               delivery_session  1.033163
10                                  created_year  0.000000
11                                delivered_year  0.000000

Scaling¶

X = X.drop(columns=['created_year', 'delivered_year'])
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

Xs_train, Xs_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

Training¶

lr = LinearRegression()

lr.fit(Xs_train, y_train)

lr_pred = lr.predict(Xs_test)

model_evl(y_test, lr_pred)

RMSE: 7.0456
MAE: 5.4207
R2 score: 0.4071

✅ LightGBM Model – Final Summary for Deployment

Model Selected

The final model chosen for deployment is:

🎯LightGBM Regressor (Fine-Tuned Version)

 LightGBM consistently delivered the best balance of accuracy, generalization, speed, and stability compared to Random Forest, XGBoost, and CatBoost.

Reason for Choosing LightGBM

LightGBM was selected because:

 ✔ Highest test performance
     RMSE ≈ 1.27
     MAE ≈ 1.14
     R² ≈ 0.968

 ✔ Low overfitting (small gap between train & test metrics)

 ✔ Fast inference time, ideal for real-time predictions

 ✔ Handles:
     Large datasets efficiently
     High-cardinality categorical features
     Missing values
     Non-linear relationships

 ✔ Hyperparameters tune well and improve generalization

Final Model Performance

Train Metrics
RMSE: ~0.986
R²: ~0.9887

Test Metrics
RMSE: ~1.27
R²: ~0.968

These metrics indicate excellent predictive power.

import pickle

with open("lightgbm_model.pkl", "wb") as f:
    pickle.dump(model, f)

model.save_model("lightgbm_model.txt")

<lightgbm.basic.Booster at 0x1f0b38438c0>

Neural Network Implementation¶

from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from keras.callbacks import EarlyStopping
import keras_tuner as kt

X.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 175777 entries, 0 to 175776
Data columns (total 15 columns):
 #   Column                                        Non-Null Count   Dtype  
---  ------                                        --------------   -----  
 0   market_id                                     175777 non-null  float64
 1   store_primary_category                        175777 non-null  int64  
 2   order_protocol                                175777 non-null  float64
 3   total_items                                   175777 non-null  int64  
 4   subtotal                                      175777 non-null  int64  
 5   num_distinct_items                            175777 non-null  int64  
 6   min_item_price                                175777 non-null  int64  
 7   max_item_price                                175777 non-null  int64  
 8   total_onshift_dashers                         175777 non-null  float64
 9   total_busy_dashers                            175777 non-null  float64
 10  total_outstanding_orders                      175777 non-null  float64
 11  estimated_store_to_consumer_driving_duration  175777 non-null  float64
 12  delivery_session                              175777 non-null  int64  
 13  created_on                                    175777 non-null  int64  
 14  delivered_on                                  175777 non-null  int64  
dtypes: float64(6), int64(9)
memory usage: 20.1 MB

X_dropped = X.drop(columns=['created_on', 'delivered_on'])
X_train_dropped, X_test_dropped, y_train, y_test = train_test_split(X_dropped, y, test_size=0.2, random_state=42)

scaler = StandardScaler()

scaler = StandardScaler()
scaler.fit(X_train_dropped)

X_train_scaled = scaler.transform(X_train_dropped)
X_test_scaled = scaler.transform(X_test_dropped)

X_train_scaled = X_train_scaled.astype("float32")
y_train = y_train.astype("float32")

model = Sequential([
    Dense(128, activation='relu', input_dim=X_train_scaled.shape[1]),
    Dropout(0.2),
    Dense(64, activation='relu'),
    Dense(32, activation='relu'),
    Dense(1)
])

model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.001),
              loss='mse',
              metrics=['mae'])

X_test_scaled = scaler.transform(X_test_dropped)

X_test_scaled = X_test_scaled.astype("float32")
y_test = y_test.astype("float32")

loss, mae = model.evaluate(X_test_scaled, y_test)
print("Test MAE:", mae)
print("Test RMSE:", loss**0.5)

1099/1099 ━━━━━━━━━━━━━━━━━━━━ 2s 1ms/step - loss: 2237.9453 - mae: 46.4035
Test MAE: 46.40354919433594
Test RMSE: 47.30692668626868

def build_model(hp):
    model = keras.Sequential()
    
    # Input layer
    model.add(layers.Input(shape=(X_train_scaled.shape[1],)))
    
    # Number of layers
    for i in range(hp.Int("num_layers", 2, 5)):
        model.add(layers.Dense(
            units=hp.Int(f"units_{i}", min_value=64, max_value=512, step=64),
            activation="relu",
            kernel_initializer="he_normal"
        ))
        model.add(layers.BatchNormalization())
        model.add(layers.Dropout(hp.Float(f"dropout_{i}", 0.1, 0.4, step=0.1)))

    # Output (regression)
    model.add(layers.Dense(1))

    # Choose LR
    lr = hp.Float("lr", 1e-4, 5e-3, sampling="log")

    model.compile(
        optimizer=keras.optimizers.Adam(learning_rate=lr),
        loss="mse",
        metrics=["mae"]
    )

    return model

tuner = kt.RandomSearch(
    build_model,
    objective="val_mae",
    max_trials=10,
    executions_per_trial=1,
    directory="tuner_results",
    project_name="porter_nn"
)

tuner.search(
    X_train_scaled,
    y_train,
    validation_split=0.2,
    epochs=30,
    batch_size=32,
    callbacks=[
        EarlyStopping(monitor="val_loss", patience=10, restore_best_weights=True)
    ],
    verbose=1
)

Trial 10 Complete [00h 12m 10s]
val_mae: 1.849675178527832

Best val_mae So Far: 1.5717947483062744
Total elapsed time: 02h 08m 34s

best_model = tuner.get_best_models(num_models=1)[0]

Train Best Model Longer With LR Scheduler

es = EarlyStopping(monitor="val_loss", patience=15, restore_best_weights=True)
lr_reduce = ReduceLROnPlateau(monitor="val_loss", factor=0.5, patience=5)

history = best_model.fit(
    X_train_scaled,
    y_train,
    validation_split=0.2,
    epochs=200,
    batch_size=32,
    callbacks=[es, lr_reduce],
    verbose=1
)

Epoch 1/200
3516/3516 ━━━━━━━━━━━━━━━━━━━━ 30s 7ms/step - loss: 11.1731 - mae: 2.5690 - val_loss: 5.0569 - val_mae: 1.6039 - learning_rate: 0.0011
Epoch 2/200
3516/3516 ━━━━━━━━━━━━━━━━━━━━ 26s 7ms/step - loss: 11.0609 - mae: 2.5593 - val_loss: 4.9464 - val_mae: 1.6282 - learning_rate: 0.0011
Epoch 3/200
3516/3516 ━━━━━━━━━━━━━━━━━━━━ 26s 7ms/step - loss: 10.8290 - mae: 2.5317 - val_loss: 4.9020 - val_mae: 1.5800 - learning_rate: 0.0011
Epoch 4/200
3516/3516 ━━━━━━━━━━━━━━━━━━━━ 26s 7ms/step - loss: 10.9666 - mae: 2.5489 - val_loss: 5.6801 - val_mae: 1.6700 - learning_rate: 0.0011
Epoch 5/200
3516/3516 ━━━━━━━━━━━━━━━━━━━━ 27s 8ms/step - loss: 10.8374 - mae: 2.5305 - val_loss: 5.1052 - val_mae: 1.6383 - learning_rate: 0.0011
Epoch 6/200
3516/3516 ━━━━━━━━━━━━━━━━━━━━ 26s 8ms/step - loss: 10.9346 - mae: 2.5418 - val_loss: 5.4149 - val_mae: 1.6505 - learning_rate: 0.0011
Epoch 7/200
3516/3516 ━━━━━━━━━━━━━━━━━━━━ 27s 8ms/step - loss: 10.8291 - mae: 2.5330 - val_loss: 5.6278 - val_mae: 1.6620 - learning_rate: 0.0011
Epoch 8/200
3516/3516 ━━━━━━━━━━━━━━━━━━━━ 27s 8ms/step - loss: 10.8303 - mae: 2.5302 - val_loss: 5.5644 - val_mae: 1.6710 - learning_rate: 0.0011
Epoch 9/200
3516/3516 ━━━━━━━━━━━━━━━━━━━━ 26s 7ms/step - loss: 10.5399 - mae: 2.5043 - val_loss: 4.7204 - val_mae: 1.5656 - learning_rate: 5.5445e-04
Epoch 10/200
3516/3516 ━━━━━━━━━━━━━━━━━━━━ 27s 8ms/step - loss: 10.3377 - mae: 2.4757 - val_loss: 4.9055 - val_mae: 1.6018 - learning_rate: 5.5445e-04
Epoch 11/200
3516/3516 ━━━━━━━━━━━━━━━━━━━━ 28s 8ms/step - loss: 10.4294 - mae: 2.4889 - val_loss: 4.9357 - val_mae: 1.5885 - learning_rate: 5.5445e-04
Epoch 12/200
3516/3516 ━━━━━━━━━━━━━━━━━━━━ 27s 8ms/step - loss: 10.3007 - mae: 2.4698 - val_loss: 4.6838 - val_mae: 1.5627 - learning_rate: 5.5445e-04
Epoch 13/200
3516/3516 ━━━━━━━━━━━━━━━━━━━━ 28s 8ms/step - loss: 10.3921 - mae: 2.4825 - val_loss: 4.6811 - val_mae: 1.5524 - learning_rate: 5.5445e-04
Epoch 14/200
3516/3516 ━━━━━━━━━━━━━━━━━━━━ 27s 8ms/step - loss: 10.3310 - mae: 2.4754 - val_loss: 5.1783 - val_mae: 1.6203 - learning_rate: 5.5445e-04
Epoch 15/200
3516/3516 ━━━━━━━━━━━━━━━━━━━━ 27s 8ms/step - loss: 10.1875 - mae: 2.4572 - val_loss: 4.7661 - val_mae: 1.5831 - learning_rate: 5.5445e-04
Epoch 16/200
3516/3516 ━━━━━━━━━━━━━━━━━━━━ 28s 8ms/step - loss: 10.3916 - mae: 2.4855 - val_loss: 4.8488 - val_mae: 1.6388 - learning_rate: 5.5445e-04
Epoch 17/200
3516/3516 ━━━━━━━━━━━━━━━━━━━━ 30s 8ms/step - loss: 10.2832 - mae: 2.4741 - val_loss: 4.7574 - val_mae: 1.5511 - learning_rate: 5.5445e-04
Epoch 18/200
3516/3516 ━━━━━━━━━━━━━━━━━━━━ 29s 8ms/step - loss: 10.1361 - mae: 2.4563 - val_loss: 4.7872 - val_mae: 1.5574 - learning_rate: 5.5445e-04
Epoch 19/200
3516/3516 ━━━━━━━━━━━━━━━━━━━━ 29s 8ms/step - loss: 10.1889 - mae: 2.4628 - val_loss: 4.7868 - val_mae: 1.5773 - learning_rate: 2.7722e-04
Epoch 20/200
3516/3516 ━━━━━━━━━━━━━━━━━━━━ 29s 8ms/step - loss: 10.1050 - mae: 2.4527 - val_loss: 4.5825 - val_mae: 1.5519 - learning_rate: 2.7722e-04
Epoch 21/200
3516/3516 ━━━━━━━━━━━━━━━━━━━━ 29s 8ms/step - loss: 10.0650 - mae: 2.4519 - val_loss: 4.8756 - val_mae: 1.5784 - learning_rate: 2.7722e-04
Epoch 22/200
3516/3516 ━━━━━━━━━━━━━━━━━━━━ 29s 8ms/step - loss: 9.9601 - mae: 2.4392 - val_loss: 4.6995 - val_mae: 1.5619 - learning_rate: 2.7722e-04
Epoch 23/200
3516/3516 ━━━━━━━━━━━━━━━━━━━━ 29s 8ms/step - loss: 10.0070 - mae: 2.4428 - val_loss: 4.6322 - val_mae: 1.5666 - learning_rate: 2.7722e-04
Epoch 24/200
3516/3516 ━━━━━━━━━━━━━━━━━━━━ 29s 8ms/step - loss: 9.9514 - mae: 2.4359 - val_loss: 4.6682 - val_mae: 1.5938 - learning_rate: 2.7722e-04
Epoch 25/200
3516/3516 ━━━━━━━━━━━━━━━━━━━━ 29s 8ms/step - loss: 9.9414 - mae: 2.4329 - val_loss: 4.6467 - val_mae: 1.5524 - learning_rate: 2.7722e-04
Epoch 26/200
3516/3516 ━━━━━━━━━━━━━━━━━━━━ 29s 8ms/step - loss: 9.8197 - mae: 2.4180 - val_loss: 4.5422 - val_mae: 1.5539 - learning_rate: 1.3861e-04
Epoch 27/200
3516/3516 ━━━━━━━━━━━━━━━━━━━━ 30s 8ms/step - loss: 9.8143 - mae: 2.4163 - val_loss: 4.6405 - val_mae: 1.5432 - learning_rate: 1.3861e-04
Epoch 28/200
3516/3516 ━━━━━━━━━━━━━━━━━━━━ 29s 8ms/step - loss: 9.8140 - mae: 2.4162 - val_loss: 4.6507 - val_mae: 1.5615 - learning_rate: 1.3861e-04
Epoch 29/200
3516/3516 ━━━━━━━━━━━━━━━━━━━━ 28s 8ms/step - loss: 9.9292 - mae: 2.4295 - val_loss: 4.6189 - val_mae: 1.5353 - learning_rate: 1.3861e-04
Epoch 30/200
3516/3516 ━━━━━━━━━━━━━━━━━━━━ 27s 8ms/step - loss: 9.8732 - mae: 2.4259 - val_loss: 4.6158 - val_mae: 1.5515 - learning_rate: 1.3861e-04
Epoch 31/200
3516/3516 ━━━━━━━━━━━━━━━━━━━━ 27s 8ms/step - loss: 9.9272 - mae: 2.4303 - val_loss: 4.6666 - val_mae: 1.5467 - learning_rate: 1.3861e-04
Epoch 32/200
3516/3516 ━━━━━━━━━━━━━━━━━━━━ 28s 8ms/step - loss: 9.7719 - mae: 2.4120 - val_loss: 4.5839 - val_mae: 1.5454 - learning_rate: 6.9306e-05
Epoch 33/200
3516/3516 ━━━━━━━━━━━━━━━━━━━━ 28s 8ms/step - loss: 9.8252 - mae: 2.4217 - val_loss: 4.5755 - val_mae: 1.5336 - learning_rate: 6.9306e-05
Epoch 34/200
3516/3516 ━━━━━━━━━━━━━━━━━━━━ 29s 8ms/step - loss: 9.9237 - mae: 2.4316 - val_loss: 4.5608 - val_mae: 1.5342 - learning_rate: 6.9306e-05
Epoch 35/200
3516/3516 ━━━━━━━━━━━━━━━━━━━━ 28s 8ms/step - loss: 9.7947 - mae: 2.4177 - val_loss: 4.5781 - val_mae: 1.5357 - learning_rate: 6.9306e-05
Epoch 36/200
3516/3516 ━━━━━━━━━━━━━━━━━━━━ 27s 8ms/step - loss: 9.7991 - mae: 2.4219 - val_loss: 4.5851 - val_mae: 1.5454 - learning_rate: 6.9306e-05
Epoch 37/200
3516/3516 ━━━━━━━━━━━━━━━━━━━━ 28s 8ms/step - loss: 9.6536 - mae: 2.3966 - val_loss: 4.5869 - val_mae: 1.5392 - learning_rate: 3.4653e-05
Epoch 38/200
3516/3516 ━━━━━━━━━━━━━━━━━━━━ 29s 8ms/step - loss: 9.7269 - mae: 2.4070 - val_loss: 4.5611 - val_mae: 1.5297 - learning_rate: 3.4653e-05
Epoch 39/200
3516/3516 ━━━━━━━━━━━━━━━━━━━━ 30s 9ms/step - loss: 9.7791 - mae: 2.4175 - val_loss: 4.6015 - val_mae: 1.5520 - learning_rate: 3.4653e-05
Epoch 40/200
3516/3516 ━━━━━━━━━━━━━━━━━━━━ 30s 8ms/step - loss: 9.7580 - mae: 2.4140 - val_loss: 4.5885 - val_mae: 1.5342 - learning_rate: 3.4653e-05
Epoch 41/200
3516/3516 ━━━━━━━━━━━━━━━━━━━━ 30s 8ms/step - loss: 9.6786 - mae: 2.4036 - val_loss: 4.6306 - val_mae: 1.5430 - learning_rate: 3.4653e-05

X_test_scaled

array([[-1.3110044 ,  0.14893202, -1.2633076 , ...,  0.16592424,
         0.74276245, -0.44810063],
       [ 0.94322634, -0.52698404,  0.05841105, ...,  2.5472345 ,
         0.19756836, -0.44810063],
       [ 1.6946366 ,  0.48689005, -1.2633076 , ..., -1.0148908 ,
        -0.8286793 ,  0.5121023 ],
       ...,
       [-1.3110044 , -1.1063406 ,  1.3801296 , ..., -0.62128574,
        -1.5708762 , -0.44810063],
       [-0.55959415,  0.82484806,  0.05841105, ...,  0.539849  ,
        -1.1539632 , -0.44810063],
       [-1.3110044 , -0.38214487,  0.71927035, ..., -0.5228845 ,
        -1.1402187 , -0.44810063]], shape=(35156, 13), dtype=float32)

y_pred = best_model.predict(X_test_scaled)

1099/1099 ━━━━━━━━━━━━━━━━━━━━ 5s 4ms/step

model_evl(y_pred, y_test)

RMSE: 2.1281
MAE: 1.56
R2 score: 0.94

best_model.save("best_lighting_nn.h5")

WARNING:absl:You are saving your model as an HDF5 file via `model.save()` or `keras.saving.save_model(model)`. This file format is considered legacy. We recommend using instead the native Keras format, e.g. `model.save('my_model.keras')` or `keras.saving.save_model(model, 'my_model.keras')`.

Summary¶

Final scores:

Metric	NN Score	Light GBM Score
RMSE	2.1281	1.27
MAE	1.56	1.14
R² score	0.94	0.968

From the above comparisons, $Light GBM$ is performing better than $NN$. Hence we choose $Light GBM$ model for the final deployment.
$NN$ scores compartively less, this could be due to the dataset size.

	market_id	store_primary_category	order_protocol	total_items	subtotal	num_distinct_items	min_item_price	max_item_price	total_onshift_dashers	total_busy_dashers	total_outstanding_orders	estimated_store_to_consumer_driving_duration
count	175777.000000	175777.000000	175777.000000	175777.000000	175777.000000	175777.000000	175777.000000	175777.000000	175777.000000	175777.000000	175777.000000	175777.000000
mean	2.743726	35.887949	2.911752	3.204976	2697.111147	2.675060	684.965433	1160.158616	44.918664	41.861381	58.230115	546.077240
std	1.330963	20.728254	1.513128	2.674055	1828.554893	1.625681	519.882924	560.828571	34.544724	32.168505	52.731043	218.717798
min	1.000000	0.000000	1.000000	1.000000	0.000000	1.000000	-86.000000	0.000000	-4.000000	-5.000000	-6.000000	0.000000
25%	2.000000	18.000000	1.000000	2.000000	1412.000000	1.000000	299.000000	799.000000	17.000000	15.000000	17.000000	384.000000
50%	2.000000	38.000000	3.000000	3.000000	2224.000000	2.000000	595.000000	1095.000000	37.000000	35.000000	41.000000	544.000000
75%	4.000000	55.000000	4.000000	4.000000	3410.000000	3.000000	942.000000	1395.000000	66.000000	63.000000	85.000000	703.000000
max	6.000000	72.000000	7.000000	411.000000	26800.000000	20.000000	14700.000000	14700.000000	171.000000	154.000000	285.000000	2088.000000

	market_id	created_at	actual_delivery_time	store_primary_category	order_protocol	total_items	subtotal	num_distinct_items	min_item_price	max_item_price	total_onshift_dashers	total_busy_dashers	total_outstanding_orders	estimated_store_to_consumer_driving_duration
0	1.0	2015-02-06 22:24:17	2015-02-06 23:11:17	4	1.0	4	3441	4	557	1239	33.0	14.0	21.0	861.0
1	2.0	2015-02-10 21:49:25	2015-02-10 22:33:25	46	2.0	1	1900	1	1400	1400	1.0	2.0	2.0	690.0
2	2.0	2015-02-16 00:11:35	2015-02-16 01:06:35	36	3.0	4	4771	3	820	1604	8.0	6.0	18.0	289.0
3	1.0	2015-02-12 03:36:46	2015-02-12 04:35:46	38	1.0	1	1525	1	1525	1525	5.0	6.0	8.0	795.0
4	1.0	2015-01-27 02:12:36	2015-01-27 02:58:36	38	1.0	2	3620	2	1425	2195	5.0	5.0	7.0	205.0

	index	0
10	total_outstanding_orders	0.290230
11	estimated_store_to_consumer_driving_duration	0.221492
8	total_onshift_dashers	0.161295
4	subtotal	0.143977
0	market_id	0.079019
9	total_busy_dashers	0.022541
2	order_protocol	0.020215
17	created_day	0.015056
7	max_item_price	0.008415
20	delivered_day	0.008394
6	min_item_price	0.007320
1	store_primary_category	0.006979
12	delivery_session	0.004616
5	num_distinct_items	0.003825
3	total_items	0.002506
15	created_month	0.001137
18	delivered_month	0.001046
13	created_on	0.000986
14	delivered_on	0.000951
16	created_year	0.000000
19	delivered_year	0.000000