import pandas as pd
import numpy as np
import os
import calplot
import matplotlib.pyplot as plt


res = pd.read_csv("Lot1_carimam.csv");
df = pd.read_csv("Lot2_carimam.csv"); res = pd.concat([res, df])

df['file'] = df['filename']


l_res = list()
for fn, grp in tqdm.tqdm(df.groupby('file')):
    tmp = (fn, grp["prediction"].max(), grp["prediction"].mean(), (grp["prediction"]>0.5).sum(), (grp["prediction"]>0.8).sum(), (grp["prediction"]>0.9).sum())
    l_res.append(tmp)

df = pd.DataFrame(l_res, columns=["file", "max_pred", "mean_pred", "nb_positif_50", "nb_positif_80", "nb_positif_90"])

df['lot'] = df['file'].str.split('/', expand=True)[7]
df['sess'] = df['file'].str.split('/', expand=True)[8].str.split('_[0-9]{2}', expand=True)[0]

df['path'] = df.apply(lambda x: os.path.dirname(x['file']) , axis=1)
df['fn'] = df.apply(lambda x: os.path.basename(x['file']) , axis=1)

df['raw'] = df['file']
df['raw'] = df['raw'].str.replace('/2122', '/2021')
df['raw'] = df['raw'].str.replace('/2050', '/2022')
df['raw'] = df['raw'].str.replace('/2132', '/2021')
df['raw'] = df['raw'].str.replace('/2122', '/2021')
df['raw'] = df['raw'].str.replace('/2035', '/2021')
df['raw'] = df['raw'].str.replace('/2002', '/2021')
df['raw'] = df['raw'].str.replace('/2131', '/2021')
df['raw'] = df['raw'].str.replace('/2136', '/2021')
df['raw'] = df['raw'].str.replace('/2005', '/2021')
df['raw'] = df['raw'].str.replace('/2001', '/2021')
df['raw'] = df['raw'].str.replace('/2121', '/2021')
df['raw'] = df['raw'].str.replace('/2133', '/2021')
df['raw'] = df['raw'].str.replace('/2036', '/2021')
df['raw'] = df['raw'].str.replace('/2076', '/2021')
df['raw'] = df['raw'].str.replace('/2026', '/2021')
df['raw'] = df['raw'].str.replace('/2019', '/2021')
df['date'] = pd.to_datetime(df['raw'].str.split('/', expand=True)[9].str.split('UTC_V', expand=True)[0], format="%Y%m%d_%H%M%S", errors='coerce')
df = df.dropna()

df['date'].dt.year.unique()
df.lot.unique()

df = df[['path', 'fn', 'lot', 'sess', 'date', 'max_pred', 'mean_pred', 'nb_positif_50', 'nb_positif_80', 'nb_positif_90']]

df.to_pickle("clean_res.pkl")