Merge branch 'main' into generalization
This commit is contained in:
commit
6ac62d9957
|
@ -30,7 +30,7 @@ def export_dataset(df, output_name):
|
||||||
df.to_csv(file_out, index = False)
|
df.to_csv(file_out, index = False)
|
||||||
|
|
||||||
## 1 - Cleaning of the datasets
|
## 1 - Cleaning of the datasets
|
||||||
for tenant_id in ["1", "2", "3", "4", "5", "6", "7", "8", "9", "10", "11", "12", "13", "14", "101"]:
|
for tenant_id in ["1", "2", "3", "4", "5", "6", "7", "8", "9", "10", "11", "12", "13", "14"]:#, "101"
|
||||||
|
|
||||||
# Timer
|
# Timer
|
||||||
start = time.time()
|
start = time.time()
|
||||||
|
|
|
@ -22,52 +22,12 @@ exec(open('0_KPI_functions.py').read())
|
||||||
warnings.filterwarnings('ignore')
|
warnings.filterwarnings('ignore')
|
||||||
|
|
||||||
|
|
||||||
def display_covering_time(df, company, datecover):
|
|
||||||
"""
|
|
||||||
This function draws the time coverage of each company
|
|
||||||
"""
|
|
||||||
min_date = df['purchase_date'].min().strftime("%Y-%m-%d")
|
|
||||||
max_date = df['purchase_date'].max().strftime("%Y-%m-%d")
|
|
||||||
datecover[company] = [datetime.strptime(min_date, "%Y-%m-%d") + timedelta(days=x) for x in range((datetime.strptime(max_date, "%Y-%m-%d") - datetime.strptime(min_date, "%Y-%m-%d")).days)]
|
|
||||||
print(f'Couverture Company {company} : {min_date} - {max_date}')
|
|
||||||
return datecover
|
|
||||||
|
|
||||||
|
|
||||||
def compute_time_intersection(datecover):
|
|
||||||
"""
|
|
||||||
This function returns the time coverage for all companies
|
|
||||||
"""
|
|
||||||
timestamps_sets = [set(timestamps) for timestamps in datecover.values()]
|
|
||||||
intersection = set.intersection(*timestamps_sets)
|
|
||||||
intersection_list = list(intersection)
|
|
||||||
formated_dates = [dt.strftime("%Y-%m-%d") for dt in intersection_list]
|
|
||||||
return sorted(formated_dates)
|
|
||||||
|
|
||||||
|
|
||||||
def df_coverage_modelization(sport, coverage_features = 0.7):
|
|
||||||
"""
|
|
||||||
This function returns start_date, end_of_features and final dates
|
|
||||||
that help to construct train and test datasets
|
|
||||||
"""
|
|
||||||
datecover = {}
|
|
||||||
for company in sport:
|
|
||||||
df_products_purchased_reduced = display_databases(company, file_name = "products_purchased_reduced",
|
|
||||||
datetime_col = ['purchase_date'])
|
|
||||||
datecover = display_covering_time(df_products_purchased_reduced, company, datecover)
|
|
||||||
#print(datecover.keys())
|
|
||||||
dt_coverage = compute_time_intersection(datecover)
|
|
||||||
start_date = dt_coverage[0]
|
|
||||||
end_of_features = dt_coverage[int(0.7 * len(dt_coverage))]
|
|
||||||
final_date = dt_coverage[-1]
|
|
||||||
return start_date, end_of_features, final_date
|
|
||||||
|
|
||||||
|
|
||||||
def dataset_construction(min_date, end_features_date, max_date, directory_path):
|
def dataset_construction(min_date, end_features_date, max_date, directory_path):
|
||||||
|
|
||||||
# Import customerplus
|
# Import customerplus
|
||||||
df_customerplus_clean_0 = display_databases(directory_path, file_name = "customerplus_cleaned")
|
df_customerplus_clean_0 = display_input_databases(directory_path, file_name = "customerplus_cleaned")
|
||||||
df_campaigns_information = display_databases(directory_path, file_name = "campaigns_information", datetime_col = ['opened_at', 'sent_at', 'campaign_sent_at'])
|
df_campaigns_information = display_input_databases(directory_path, file_name = "campaigns_information", datetime_col = ['opened_at', 'sent_at', 'campaign_sent_at'])
|
||||||
df_products_purchased_reduced = display_databases(directory_path, file_name = "products_purchased_reduced", datetime_col = ['purchase_date'])
|
df_products_purchased_reduced = display_input_databases(directory_path, file_name = "products_purchased_reduced", datetime_col = ['purchase_date'])
|
||||||
|
|
||||||
# if directory_path == "101":
|
# if directory_path == "101":
|
||||||
# df_products_purchased_reduced_1 = display_databases(directory_path, file_name = "products_purchased_reduced_1", datetime_col = ['purchase_date'])
|
# df_products_purchased_reduced_1 = display_databases(directory_path, file_name = "products_purchased_reduced_1", datetime_col = ['purchase_date'])
|
||||||
|
@ -90,7 +50,7 @@ def dataset_construction(min_date, end_features_date, max_date, directory_path):
|
||||||
# Fusion de l'ensemble et creation des KPI
|
# Fusion de l'ensemble et creation des KPI
|
||||||
|
|
||||||
# KPI sur les campagnes publicitaires
|
# KPI sur les campagnes publicitaires
|
||||||
df_campaigns_kpi = campaigns_kpi_function(campaigns_information = df_campaigns_information)
|
df_campaigns_kpi = campaigns_kpi_function(campaigns_information = df_campaigns_information, max_date = end_features_date)
|
||||||
|
|
||||||
# KPI sur le comportement d'achat
|
# KPI sur le comportement d'achat
|
||||||
df_tickets_kpi = tickets_kpi_function(tickets_information = df_products_purchased_features)
|
df_tickets_kpi = tickets_kpi_function(tickets_information = df_products_purchased_features)
|
||||||
|
|
|
@ -74,7 +74,7 @@ def preprocessing_customerplus(directory_path):
|
||||||
cleaning_date(customerplus_copy, 'last_visiting_date')
|
cleaning_date(customerplus_copy, 'last_visiting_date')
|
||||||
|
|
||||||
# Selection des variables
|
# Selection des variables
|
||||||
customerplus_copy.drop(['lastname', 'firstname', 'birthdate', 'profession', 'language', 'age', 'email', 'civility', 'note', 'extra', 'reference', 'extra_field', 'need_reload', 'preferred_category', 'preferred_supplier', 'preferred_formula', 'zipcode', 'last_visiting_date'], axis = 1, inplace=True)
|
customerplus_copy.drop(['lastname', 'firstname', 'birthdate', 'language', 'email', 'civility', 'note', 'extra', 'reference', 'extra_field', 'need_reload', 'preferred_category', 'preferred_supplier', 'preferred_formula', 'mcp_contact_id', 'last_visiting_date', 'deleted_at'], axis = 1, inplace=True)
|
||||||
customerplus_copy.rename(columns = {'id' : 'customer_id'}, inplace = True)
|
customerplus_copy.rename(columns = {'id' : 'customer_id'}, inplace = True)
|
||||||
|
|
||||||
return customerplus_copy
|
return customerplus_copy
|
||||||
|
|
|
@ -3,7 +3,7 @@
|
||||||
def custom_date_parser(date_string):
|
def custom_date_parser(date_string):
|
||||||
return pd.to_datetime(date_string, utc = True, format = 'ISO8601')
|
return pd.to_datetime(date_string, utc = True, format = 'ISO8601')
|
||||||
|
|
||||||
def display_databases(directory_path, file_name, datetime_col = None):
|
def display_input_databases(directory_path, file_name, datetime_col = None):
|
||||||
"""
|
"""
|
||||||
This function returns the file from s3 storage
|
This function returns the file from s3 storage
|
||||||
"""
|
"""
|
||||||
|
@ -13,14 +13,16 @@ def display_databases(directory_path, file_name, datetime_col = None):
|
||||||
df = pd.read_csv(file_in, sep=",", parse_dates = datetime_col, date_parser=custom_date_parser)
|
df = pd.read_csv(file_in, sep=",", parse_dates = datetime_col, date_parser=custom_date_parser)
|
||||||
return df
|
return df
|
||||||
|
|
||||||
def campaigns_kpi_function(campaigns_information = None):
|
def campaigns_kpi_function(campaigns_information = None, max_date = None):
|
||||||
|
|
||||||
# Nombre de campagnes de mails
|
# Nombre de campagnes de mails
|
||||||
nb_campaigns = campaigns_information[['customer_id', 'campaign_name']].groupby('customer_id').count().reset_index()
|
nb_campaigns = campaigns_information[['customer_id', 'campaign_name']].groupby('customer_id').count().reset_index()
|
||||||
nb_campaigns.rename(columns = {'campaign_name' : 'nb_campaigns'}, inplace = True)
|
nb_campaigns.rename(columns = {'campaign_name' : 'nb_campaigns'}, inplace = True)
|
||||||
|
|
||||||
# Temps d'ouverture moyen (en minutes)
|
# Temps d'ouverture moyen (en minutes)
|
||||||
campaigns_information['time_to_open'] = pd.to_datetime(campaigns_information['opened_at'], utc = True, format = 'ISO8601') - pd.to_datetime(campaigns_information['delivered_at'], utc = True, format = 'ISO8601')
|
campaigns_information['time_to_open'] = (pd.to_datetime(campaigns_information['opened_at'], utc = True, format = 'ISO8601') - pd.to_datetime(campaigns_information['delivered_at'], utc = True, format = 'ISO8601')) / np.timedelta64(1, 'h')
|
||||||
|
campaigns_information['time_to_open'] = campaigns_information['time_to_open'].fillna((pd.to_datetime(campaigns_information['delivered_at'], utc = True, format = 'ISO8601') - pd.to_datetime(max_date, utc = True, format = 'ISO8601')) / np.timedelta64(1, 'h'))
|
||||||
|
|
||||||
time_to_open = campaigns_information[['customer_id', 'time_to_open']].groupby('customer_id').mean().reset_index()
|
time_to_open = campaigns_information[['customer_id', 'time_to_open']].groupby('customer_id').mean().reset_index()
|
||||||
|
|
||||||
# Nombre de mail ouvert
|
# Nombre de mail ouvert
|
||||||
|
@ -33,8 +35,11 @@ def campaigns_kpi_function(campaigns_information = None):
|
||||||
campaigns_reduced = pd.merge(nb_campaigns, opened_campaign, on = 'customer_id', how = 'left')
|
campaigns_reduced = pd.merge(nb_campaigns, opened_campaign, on = 'customer_id', how = 'left')
|
||||||
campaigns_reduced = pd.merge(campaigns_reduced, time_to_open, on = 'customer_id', how = 'left')
|
campaigns_reduced = pd.merge(campaigns_reduced, time_to_open, on = 'customer_id', how = 'left')
|
||||||
|
|
||||||
|
# Taux de mails ouvert
|
||||||
|
campaigns_reduced['taux_ouverture_mail'] = campaigns_reduced['nb_campaigns_opened'] / campaigns_reduced['nb_campaigns']
|
||||||
|
|
||||||
# Fill NaN values
|
# Fill NaN values
|
||||||
campaigns_reduced[['nb_campaigns', 'nb_campaigns_opened']] = campaigns_reduced[['nb_campaigns', 'nb_campaigns_opened']].fillna(0)
|
campaigns_reduced[['nb_campaigns', 'nb_campaigns_opened', 'taux_ouverture_mail']] = campaigns_reduced[['nb_campaigns', 'nb_campaigns_opened', 'taux_ouverture_mail']].fillna(0)
|
||||||
# Remplir les NaT : time_to_open (??)
|
# Remplir les NaT : time_to_open (??)
|
||||||
|
|
||||||
return campaigns_reduced
|
return campaigns_reduced
|
||||||
|
@ -49,33 +54,20 @@ def tickets_kpi_function(tickets_information = None):
|
||||||
tickets_information_copy['vente_internet'] = tickets_information_copy['supplier_name'].fillna('').str.contains('|'.join(liste_mots), case=False).astype(int)
|
tickets_information_copy['vente_internet'] = tickets_information_copy['supplier_name'].fillna('').str.contains('|'.join(liste_mots), case=False).astype(int)
|
||||||
|
|
||||||
# Proportion de vente en ligne
|
# Proportion de vente en ligne
|
||||||
prop_vente_internet = tickets_information_copy[tickets_information_copy['vente_internet'] == 1].groupby(['customer_id'])['ticket_id'].count().reset_index()
|
prop_vente_internet = tickets_information_copy[tickets_information_copy['vente_internet'] == 1].groupby(['customer_id'])['purchase_id'].nunique().reset_index()
|
||||||
prop_vente_internet.rename(columns = {'ticket_id' : 'nb_tickets_internet'}, inplace = True)
|
prop_vente_internet.rename(columns = {'purchase_id' : 'nb_purchases_internet'}, inplace = True)
|
||||||
|
|
||||||
# Average amount
|
|
||||||
# avg_amount = (tickets_information_copy.groupby(["event_type_id", 'name_event_types'])
|
|
||||||
# .agg({"amount" : "mean"}).reset_index()
|
|
||||||
# .rename(columns = {'amount' : 'avg_amount'}))
|
|
||||||
|
|
||||||
|
|
||||||
|
# Mixte KPI comportement achat
|
||||||
tickets_kpi = (tickets_information_copy[['customer_id', 'purchase_id' ,'ticket_id','supplier_name', 'purchase_date', 'amount', 'vente_internet']]
|
tickets_kpi = (tickets_information_copy[['customer_id', 'purchase_id' ,'ticket_id','supplier_name', 'purchase_date', 'amount', 'vente_internet']]
|
||||||
.groupby(['customer_id'])
|
.groupby(['customer_id'])
|
||||||
.agg({'ticket_id': 'count',
|
.agg(nb_tickets=('ticket_id', 'nunique'),
|
||||||
'purchase_id' : 'nunique',
|
nb_purchases=('purchase_id', 'nunique'),
|
||||||
'amount' : 'sum',
|
total_amount=('amount', 'sum'),
|
||||||
'supplier_name': 'nunique',
|
nb_suppliers=('supplier_name', 'nunique'),
|
||||||
'vente_internet' : 'max',
|
achat_internet=('vente_internet', 'max'),
|
||||||
'purchase_date' : ['min', 'max']})
|
purchase_date_min=('purchase_date', 'min'),
|
||||||
.reset_index()
|
purchase_date_max=('purchase_date', 'max'))
|
||||||
)
|
.reset_index())
|
||||||
|
|
||||||
tickets_kpi.columns = tickets_kpi.columns.map('_'.join)
|
|
||||||
|
|
||||||
tickets_kpi.rename(columns = {'ticket_id_count' : 'nb_tickets',
|
|
||||||
'purchase_id_nunique' : 'nb_purchases',
|
|
||||||
'amount_sum' : 'total_amount',
|
|
||||||
'supplier_name_nunique' : 'nb_suppliers',
|
|
||||||
'customer_id_' : 'customer_id'}, inplace = True)
|
|
||||||
|
|
||||||
tickets_kpi['time_between_purchase'] = tickets_kpi['purchase_date_max'] - tickets_kpi['purchase_date_min']
|
tickets_kpi['time_between_purchase'] = tickets_kpi['purchase_date_max'] - tickets_kpi['purchase_date_min']
|
||||||
tickets_kpi['time_between_purchase'] = tickets_kpi['time_between_purchase'] / np.timedelta64(1, 'D') # En nombre de jours
|
tickets_kpi['time_between_purchase'] = tickets_kpi['time_between_purchase'] / np.timedelta64(1, 'D') # En nombre de jours
|
||||||
|
@ -85,22 +77,17 @@ def tickets_kpi_function(tickets_information = None):
|
||||||
tickets_kpi['purchase_date_max'] = (max_date - tickets_kpi['purchase_date_max']) / np.timedelta64(1, 'D')
|
tickets_kpi['purchase_date_max'] = (max_date - tickets_kpi['purchase_date_max']) / np.timedelta64(1, 'D')
|
||||||
tickets_kpi['purchase_date_min'] = (max_date - tickets_kpi['purchase_date_min']) / np.timedelta64(1, 'D')
|
tickets_kpi['purchase_date_min'] = (max_date - tickets_kpi['purchase_date_min']) / np.timedelta64(1, 'D')
|
||||||
|
|
||||||
|
# Proportion de ticket internet
|
||||||
tickets_kpi = tickets_kpi.merge(prop_vente_internet, on = ['customer_id'], how = 'left')
|
tickets_kpi = tickets_kpi.merge(prop_vente_internet, on = ['customer_id'], how = 'left')
|
||||||
tickets_kpi['nb_tickets_internet'] = tickets_kpi['nb_tickets_internet'].fillna(0)
|
tickets_kpi['nb_purchases_internet'] = tickets_kpi['nb_purchases_internet'].fillna(0)
|
||||||
|
tickets_kpi['prop_purchases_internet'] = tickets_kpi['nb_purchases_internet'] / tickets_kpi['nb_purchases']
|
||||||
# tickets_kpi = tickets_kpi.merge(avg_amount, how='left', on= 'event_type_id')
|
|
||||||
|
|
||||||
#Taux de ticket payé par internet selon les compagnies
|
|
||||||
|
|
||||||
#tickets_kpi["Taux_ticket_internet"] = tickets_kpi["nb_tickets_internet"]*100 / tickets_kpi["nb_tickets"]
|
|
||||||
#tickets_kpi['Taux_ticket_internet'] = tickets_kpi['Taux_ticket_internet'].fillna(0)
|
|
||||||
|
|
||||||
return tickets_kpi
|
return tickets_kpi
|
||||||
|
|
||||||
def customerplus_kpi_function(customerplus_clean = None):
|
def customerplus_kpi_function(customerplus_clean = None):
|
||||||
# KPI sur les données socio-demographique
|
# KPI sur les données socio-demographique
|
||||||
## Le genre
|
|
||||||
|
# Le genre
|
||||||
customerplus_clean["gender_label"] = customerplus_clean["gender"].map({
|
customerplus_clean["gender_label"] = customerplus_clean["gender"].map({
|
||||||
0: 'female',
|
0: 'female',
|
||||||
1: 'male',
|
1: 'male',
|
||||||
|
@ -109,9 +96,28 @@ def customerplus_kpi_function(customerplus_clean = None):
|
||||||
gender_dummies = pd.get_dummies(customerplus_clean["gender_label"], prefix='gender').astype(int)
|
gender_dummies = pd.get_dummies(customerplus_clean["gender_label"], prefix='gender').astype(int)
|
||||||
customerplus_clean = pd.concat([customerplus_clean, gender_dummies], axis=1)
|
customerplus_clean = pd.concat([customerplus_clean, gender_dummies], axis=1)
|
||||||
|
|
||||||
## Indicatrice si individue vit en France
|
# Age
|
||||||
|
customerplus_clean['categorie_age_0_10'] = ((customerplus_clean['age'] >= 0) & (customerplus_clean['age'] < 10)).astype(int)
|
||||||
|
customerplus_clean['categorie_age_10_20'] = ((customerplus_clean['age'] >= 10) & (customerplus_clean['age'] < 20)).astype(int)
|
||||||
|
customerplus_clean['categorie_age_20_30'] = ((customerplus_clean['age'] >= 20) & (customerplus_clean['age'] < 30)).astype(int)
|
||||||
|
customerplus_clean['categorie_age_30_40'] = ((customerplus_clean['age'] >= 30) & (customerplus_clean['age'] < 40)).astype(int)
|
||||||
|
customerplus_clean['categorie_age_40_50'] = ((customerplus_clean['age'] >= 40) & (customerplus_clean['age'] < 50)).astype(int)
|
||||||
|
customerplus_clean['categorie_age_50_60'] = ((customerplus_clean['age'] >= 50) & (customerplus_clean['age'] < 60)).astype(int)
|
||||||
|
customerplus_clean['categorie_age_60_70'] = ((customerplus_clean['age'] >= 60) & (customerplus_clean['age'] < 70)).astype(int)
|
||||||
|
customerplus_clean['categorie_age_70_80'] = ((customerplus_clean['age'] >= 70) & (customerplus_clean['age'] < 80)).astype(int)
|
||||||
|
customerplus_clean['categorie_age_plus_80'] = (customerplus_clean['age'] >= 80).astype(int)
|
||||||
|
customerplus_clean['categorie_age_inconnue'] = customerplus_clean['age'].apply(lambda x: 1 if pd.isna(x) else 0)
|
||||||
|
|
||||||
|
# Consentement au mailing
|
||||||
|
customerplus_clean['opt_in'] = customerplus_clean['opt_in'].astype(int)
|
||||||
|
|
||||||
|
# Indicatrice si individue vit en France
|
||||||
customerplus_clean["country_fr"] = customerplus_clean["country"].apply(lambda x : int(x=="fr") if pd.notna(x) else np.nan)
|
customerplus_clean["country_fr"] = customerplus_clean["country"].apply(lambda x : int(x=="fr") if pd.notna(x) else np.nan)
|
||||||
|
|
||||||
|
customerplus_clean['is_profession_known'] = customerplus_clean['profession'].notna().astype(int)
|
||||||
|
|
||||||
|
customerplus_clean['is_zipcode_known'] = customerplus_clean['zipcode'].notna().astype(int)
|
||||||
|
|
||||||
# Dummy if the customer has a structure id (tags)
|
# Dummy if the customer has a structure id (tags)
|
||||||
# customerplus_clean['has_tags'] = customerplus_clean['structure_id'].apply(lambda x: 1 if not pd.isna(x) else 0)
|
# customerplus_clean['has_tags'] = customerplus_clean['structure_id'].apply(lambda x: 1 if not pd.isna(x) else 0)
|
||||||
|
|
||||||
|
|
|
@ -524,6 +524,65 @@
|
||||||
"export_in_temporary(target_agg, 'Target_kpi_concatenate')"
|
"export_in_temporary(target_agg, 'Target_kpi_concatenate')"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"id": "cb6f06e6-78de-4b8d-a103-8366eff0493a",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"v"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"id": "c5e864b1-adad-4267-b956-3f7ef371d677",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"\n",
|
||||||
|
"def display_covering_time(df, company, datecover):\n",
|
||||||
|
" \"\"\"\n",
|
||||||
|
" This function draws the time coverage of each company\n",
|
||||||
|
" \"\"\"\n",
|
||||||
|
" min_date = df['purchase_date'].min().strftime(\"%Y-%m-%d\")\n",
|
||||||
|
" max_date = df['purchase_date'].max().strftime(\"%Y-%m-%d\")\n",
|
||||||
|
" datecover[company] = [datetime.strptime(min_date, \"%Y-%m-%d\") + timedelta(days=x) for x in range((datetime.strptime(max_date, \"%Y-%m-%d\") - datetime.strptime(min_date, \"%Y-%m-%d\")).days)]\n",
|
||||||
|
" print(f'Couverture Company {company} : {min_date} - {max_date}')\n",
|
||||||
|
" return datecover\n",
|
||||||
|
"\n",
|
||||||
|
"\n",
|
||||||
|
"def compute_time_intersection(datecover):\n",
|
||||||
|
" \"\"\"\n",
|
||||||
|
" This function returns the time coverage for all companies\n",
|
||||||
|
" \"\"\"\n",
|
||||||
|
" timestamps_sets = [set(timestamps) for timestamps in datecover.values()]\n",
|
||||||
|
" intersection = set.intersection(*timestamps_sets)\n",
|
||||||
|
" intersection_list = list(intersection)\n",
|
||||||
|
" formated_dates = [dt.strftime(\"%Y-%m-%d\") for dt in intersection_list]\n",
|
||||||
|
" return sorted(formated_dates)\n",
|
||||||
|
"\n",
|
||||||
|
"\n",
|
||||||
|
"def df_coverage_modelization(sport, coverage_features = 0.7):\n",
|
||||||
|
" \"\"\"\n",
|
||||||
|
" This function returns start_date, end_of_features and final dates\n",
|
||||||
|
" that help to construct train and test datasets\n",
|
||||||
|
" \"\"\"\n",
|
||||||
|
" datecover = {}\n",
|
||||||
|
" for company in sport:\n",
|
||||||
|
" df_products_purchased_reduced = display_input_databases(company, file_name = \"products_purchased_reduced\",\n",
|
||||||
|
" datetime_col = ['purchase_date'])\n",
|
||||||
|
" datecover = display_covering_time(df_products_purchased_reduced, company, datecover)\n",
|
||||||
|
" #print(datecover.keys())\n",
|
||||||
|
" dt_coverage = compute_time_intersection(datecover)\n",
|
||||||
|
" start_date = dt_coverage[0]\n",
|
||||||
|
" end_of_features = dt_coverage[int(0.7 * len(dt_coverage))]\n",
|
||||||
|
" final_date = dt_coverage[-1]\n",
|
||||||
|
" return start_date, end_of_features, final_date\n",
|
||||||
|
" "
|
||||||
|
]
|
||||||
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"id": "2435097a-95a5-43e1-84d0-7f6b701441ba",
|
"id": "2435097a-95a5-43e1-84d0-7f6b701441ba",
|
||||||
|
|
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
8910
Sport/Modelization/3_logit_cross_val_sport.ipynb
Normal file
8910
Sport/Modelization/3_logit_cross_val_sport.ipynb
Normal file
File diff suppressed because one or more lines are too long
Loading…
Reference in New Issue
Block a user