import pandas as pd
import sqlite3
from sqlalchemy import create_engine
import os
import numpy as np
import sys
from IPython.display import display, Markdown, Image
from pathlib import Path

import chart_studio.plotly as py
import plotly.express as px
import plotly.graph_objects as go
import cufflinks as cf
import matplotlib.pyplot as plt
%matplotlib inline

import math
import warnings
warnings.filterwarnings("ignore")
import importlib
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
init_notebook_mode(connected=True)
cf.go_offline()

# Add the project root to the system path:
project_root = os.path.abspath(os.path.join(os.getcwd(), ".."))
if project_root not in sys.path:
    sys.path.append(project_root)

# Import custome modules:
from src.rfm_analysis import rfm_analysis_metrics, rfm_analysis_summary
from plots import rfm_analysis_plots
from utils import helper


# Create a connection to the ecommerce database which is required for RFM metric function calls:
conn = sqlite3.connect("../data/ecommerce.db")


# A helper function to reload the package to pick up necessary changes:
def reload_package():
    importlib.reload(rfm_analysis_metrics)
    importlib.reload(rfm_analysis_summary)
    importlib.reload(rfm_analysis_plots)
    importlib.reload(helper)
    print("Packages reloaded!!")


# Create a global path for all SQL files that is related to the RFM analysis:
SQL_BASE_PATH = Path("../sql/03_Customer_Segmentation_(RFM_Analysis)")


# Get the list of the available tables:
tables = helper.extract_table_names(conn=conn)
print(f"The list of the tables:\n{tables}")

The list of the tables:
                           name
0                     customers
1                   geolocation
2                        orders
3                   order_items
4                order_payments
5                 order_reviews
6                      products
7                       sellers
8  product_category_translation


rfm_analysis_metrics.create_temp_table_customer_order_info(conn=conn, sql_base_path=SQL_BASE_PATH)

Temporary table `customer_order_info` successfully created !!


recency_table_full = rfm_analysis_metrics.get_recency_analysis(conn=conn, sql_base_path=SQL_BASE_PATH)
recency_table_full.head(3)


print(f"The average recency day is: {np.floor(recency_table_full['days_since_last_purchase'].mean())}")

The average recency day is: 248.0


recency_table_full[recency_table_full['bins'].isna()]


customers_per_bins = recency_table_full.groupby(['bins'])['customer_id'].nunique()


customers_per_bins

bins
0       9196
10      9545
20     11213
30     11553
40     11276
50     11198
60      7076
70      6895
80      5844
90      5591
100     3960
110     2747
120      118
130       90
140      176
Name: customer_id, dtype: int64


# Backup if something goes wrong:
customers_per_bins_labeled = customers_per_bins.copy()

# Mapping dictionary:
bin_mapping = {
    0: '0-50',
    10: '50-100',
    20: '100-150',
    30: '150-200',
    40: '200-250',
    50: '250-300',
    60: '300-350',
    70: '350-400',
    80: '400-450',
    90: '450-500',
    100: '500-550',
    110: '550-600',
    120: '600-650',
    130: '650-700',
    140: '700+'
}

customers_per_bins_labeled.index = customers_per_bins_labeled.index.map(bin_mapping)
customers_per_bins_labeled

customers_per_bins_labeled_df = customers_per_bins_labeled.to_frame().reset_index().rename(columns={'bins': 'Days Interval',
                                                                                                   'customer_id': 'Total Customers'})


customers_per_bins_labeled_df


fig = rfm_analysis_plots.plot_customer_distribution_recency_bar_chart(df = customers_per_bins_labeled_df)


reload_package()

Packages reloaded!!


rfm_analysis_summary.get_recency_analysis_summary(recency_table_full, customers_per_bins_labeled_df, fig)


customer_order_frequency_interpretation = rfm_analysis_metrics.get_customer_order_frequency_analysis(conn=conn,
                                                                                                    sql_base_path=SQL_BASE_PATH)


# The returned dataframe consists of the information below:
customer_order_frequency_interpretation.head()


customer_order_frequency_interpretation_counts = customer_order_frequency_interpretation['Interpretation'].value_counts()
customer_order_frequency_interpretation_counts = customer_order_frequency_interpretation_counts.to_frame().reset_index()
customer_order_frequency_interpretation_counts.rename(columns={'count':'Total Counts'}, inplace=True)
customer_order_frequency_interpretation_counts['Customer Share (%)'] = np.round((customer_order_frequency_interpretation_counts['Total Counts']
                                                                        / customer_order_frequency_interpretation_counts['Total Counts'].sum()) * 100, 3)


customer_order_frequency_interpretation_counts


reload_package()

Packages reloaded!!


fig = rfm_analysis_plots.plot_customer_purchase_frequency_bar_chart(df=customer_order_frequency_interpretation_counts)


rfm_analysis_summary.get_frequency_analysis_summary(customer_order_frequency_interpretation_counts, fig)


total_spendings_per_customer = rfm_analysis_metrics.get_total_spending_per_customer(conn=conn,
                                                                                   sql_base_path=SQL_BASE_PATH)


print(f"The `total_spendings_per_customer` table has a total {total_spendings_per_customer.shape[0]} rows and "
      f"{total_spendings_per_customer.shape[1]} features.")

The `total_spendings_per_customer` table has a total 93358 rows and 2 features.


# Below is the information, the table holds:
total_spendings_per_customer.head()


total_frequency_and_total_amount = pd.merge(customer_order_frequency_interpretation, 
               total_spendings_per_customer, on='customer_unique_id' , how='inner')


total_frequency_and_total_amount.head()


segment_monetary_summary = (total_frequency_and_total_amount.groupby('Interpretation')
                            .agg(total_customers=('customer_unique_id', 'nunique'),
                            total_revenue=('total_amount', 'sum'),
                            avg_revenue_per_customer=('total_amount', 'mean'))
                            .reset_index()
                            .sort_values(by='total_revenue', ascending=False))


segment_monetary_summary


segment_monetary_summary_w_customer_share = pd.merge(segment_monetary_summary,
                                                    customer_order_frequency_interpretation_counts,
                                                     on='Interpretation',
                                                    how='inner').drop(columns=['Total Counts'])

segment_monetary_summary_w_customer_share = (segment_monetary_summary_w_customer_share
                                             .rename(columns={'total_customers':'Total Customers',
                                                             'total_revenue': 'Total Revenue',
                                                             'avg_revenue_per_customer': 'Average Revenue Per Customer',
                                                             'Customer Share (%)':'Revenue Contribution (%)'}))


segment_monetary_summary_w_customer_share


fig = rfm_analysis_plots.plot_volume_vs_value_chart(df=segment_monetary_summary_w_customer_share)


reload_package()

Packages reloaded!!


rfm_analysis_summary.get_monetary_analysis_summary(segment_monetary_summary_w_customer_share,
                                                            fig)


reload_package()

Packages reloaded!!


rfm_analysis_summary.get_key_findings_and_interp()

	customer_id	latest_purchase_timestamp	date_threshold	days_since_last_purchase	bins
0	00012a2ce6f8dcda20d059ce98491703	2017-11-14	2018-09-08	297	50
1	000161a058600d5901f007fab4c27140	2017-07-16	2018-09-08	418	80
2	0001fd6190edaaf884bcaf3d49edf079	2017-02-28	2018-09-08	556	110

	customer_unique_id	total_orders	Interpretation
0	0000366f3b9a7992bf8c76cfdf3221e2	1	One-Time Buyer
1	0000b849f77a49e4a4ce2b2a4ca5be3f	1	One-Time Buyer
2	0000f46a3911fa3c0805444483337064	1	One-Time Buyer
3	0000f6ccb0745a6a4b88665a16c9f078	1	One-Time Buyer
4	0004aac84e0df4da2b147fca70cf8255	1	One-Time Buyer

	Interpretation	Total Counts	Customer Share (%)
0	One-Time Buyer	90557	97.000
1	Returning Customer	2573	2.756
2	Loyal Customer	181	0.194
3	Very Loyal	28	0.030
4	VIP Customers	19	0.020

	customer_unique_id	total_amount
0	0a0a92112bd4c708ca5fde585afaa872	13664.08
1	da122df9eeddfedc1dc1f5349a1a690c	7571.63
2	763c8b1c9c68a0229c42c9fc6f662b93	7274.88
3	dc4802a71eae9be1dd28f5d788ceb526	6929.31
4	459bef486812aa25204be022145caa62	6922.21

	customer_unique_id	total_orders	Interpretation	total_amount
0	0000366f3b9a7992bf8c76cfdf3221e2	1	One-Time Buyer	141.90
1	0000b849f77a49e4a4ce2b2a4ca5be3f	1	One-Time Buyer	27.19
2	0000f46a3911fa3c0805444483337064	1	One-Time Buyer	86.22
3	0000f6ccb0745a6a4b88665a16c9f078	1	One-Time Buyer	43.62
4	0004aac84e0df4da2b147fca70cf8255	1	One-Time Buyer	196.89

	Days Interval	Total Customers
0	0-50	9196
1	50-100	9545
2	100-150	11213
3	150-200	11553
4	200-250	11276
5	250-300	11198
6	300-350	7076
7	350-400	6895
8	400-450	5844
9	450-500	5591
10	500-550	3960
11	550-600	2747
12	600-650	118
13	650-700	90
14	700+	176

	Interpretation	total_customers	total_revenue	avg_revenue_per_customer
1	One-Time Buyer	90557	14555586.29	160.733972
2	Returning Customer	2573	748811.57	291.026650
0	Loyal Customer	181	78333.95	432.784254
4	Very Loyal	28	22086.25	788.794643
3	VIP Customers	19	14955.69	787.141579

Customer Segementation (RFM Analysis)¶

Author: Nishan Karki¶

Table of Contents¶

1. Setup & Configuration¶

2. Business Context¶

3. Analysis & Metrics¶

Summary¶

Summary¶

Summary¶

4. Key Findings & Business Interpretation¶

1. Setup & Configuration:¶

1.1 Import Libraries:¶

1.2 Verify Tables:¶

2. Business Context:¶

2.1 Objective:¶

2.2 Data Scope Definition:¶

3. Analysis & Metrics:¶

3.1 Recency Analysis:¶

3.1.1 Execute the get_recency_analysis() function from the rfm_analysis_metrics module:¶

3.1.2 Now, we will use the bins feature of the recency_table_full table to group the customers and count the unique IDs:¶

3.1.3 Mapping the bins (a 50-day recency bins) values to days range for the visualization purpose:¶

3.1.4 Call the plot_customer_distribution_recency_bar_chart() function from rfm_analysis_plots module:¶

Summary:¶

Call the get_recency_analysis_summary() function from rfm_analysis_summary module to summarize the Recency Analysis section:

Summary: Recency Analysis Per Customer:¶

Objective:

Key Insights:

3.2 Frequency Analysis:¶

3.2.1 Call the get_customer_order_frequency_analysis() function from the rfm_analysis_metrics module:¶

3.2.2 Use the Interpretation feature to count the customer and their contribution in % out of all the total orders made:¶

3.2.3 Call the plot_customer_purchase_frequency_bar_chart() function from rfm_analysis_plots module to get the purchase frequency bar chart:¶

Summary:¶

Call the frequency_analysis_summary() function from rfm_analysis_summary module to summarize the Frequency Analysis section:

Summary: Frequency Analysis¶

Objective:¶

Key Insights:¶

3.3 Monetary Analysis:¶

3.3.1 Call the get_total_spending_per_customer() function from rfm_analysis_metrics module to get the information about total spendings per customer:¶

3.3.2 Merging customer_order_frequency_interpretation and total_spendings_per_customer dataframes:¶

3.3.3 Grouping the customers based on Interpretation feature:¶

3.3.4 Merging segment_monetary_summary & customer_order_frequency_interpretation_counts to get the full table for visual comparison:¶

3.3.5 Call the plot_volume_vs_value_chart() function from rfm_analysis_plots module to plot the VOLUME V/S VALUE chart:¶

Summary:¶

Call the get_monetary_analysis_summary() function from rfm_analysis_summary module to summarize the Monetary Analysis section:

Summary: Monetary Analysis¶

Objective:¶

Key Insights:¶

4. Key Findings & Business Interpretation:¶

Call the get_key_findings_and_interp() function from rfm_analysis_summary module to list the Key Findings & Business Interpretation:

Key Findings:¶

Business Interpretation:¶

`Author: Nishan Karki`¶

1. Setup & Configuration ¶

2. Business Context ¶

3. Analysis & Metrics ¶

Summary ¶

Summary ¶

Summary ¶

4. Key Findings & Business Interpretation ¶

3.1.1 Execute the `get_recency_analysis()` function from the `rfm_analysis_metrics` module:¶

3.1.2 Now, we will use the bins feature of the `recency_table_full` table to group the customers and count the unique IDs:¶

3.1.3 Mapping the bins (`a 50-day recency bins`) values to days range for the visualization purpose:¶

3.1.4 Call the `plot_customer_distribution_recency_bar_chart()` function from `rfm_analysis_plots` module:¶

Call the `get_recency_analysis_summary()` function from `rfm_analysis_summary` module to

summarize the `Recency Analysis` section:

3.2.1 Call the `get_customer_order_frequency_analysis()` function from the `rfm_analysis_metrics` module:¶

3.2.2 Use the `Interpretation` feature to count the customer and their contribution in % out of all the total orders made:¶

3.2.3 Call the `plot_customer_purchase_frequency_bar_chart()` function from `rfm_analysis_plots` module to get the purchase frequency bar chart:¶

Call the `frequency_analysis_summary()` function from `rfm_analysis_summary`

module to summarize the `Frequency Analysis` section:

3.3.1 Call the `get_total_spending_per_customer()` function from `rfm_analysis_metrics` module to get the information about total spendings per customer:¶

3.3.2 Merging `customer_order_frequency_interpretation` and `total_spendings_per_customer` dataframes:¶

3.3.3 Grouping the customers based on `Interpretation` feature:¶

3.3.4 Merging `segment_monetary_summary` & `customer_order_frequency_interpretation_counts` to get the full table for visual comparison:¶

3.3.5 Call the `plot_volume_vs_value_chart()` function from `rfm_analysis_plots` module to plot the VOLUME V/S VALUE chart:¶

Call the `get_monetary_analysis_summary()` function from `rfm_analysis_summary`

module to summarize the Monetary Analysis section:

Call the `get_key_findings_and_interp()` function from `rfm_analysis_summary`

module to list the Key Findings & Business Interpretation: