import pandas as pd import numpy as np import os import calplot import matplotlib.pyplot as plt res = pd.read_csv("Lot1_carimam.csv"); df = pd.read_csv("Lot2_carimam.csv"); res = pd.concat([res, df]) df['file'] = df['filename'] l_res = list() for fn, grp in tqdm.tqdm(df.groupby('file')): tmp = (fn, grp["prediction"].max(), grp["prediction"].mean(), (grp["prediction"]>0.5).sum(), (grp["prediction"]>0.8).sum(), (grp["prediction"]>0.9).sum()) l_res.append(tmp) df = pd.DataFrame(l_res, columns=["file", "max_pred", "mean_pred", "nb_positif_50", "nb_positif_80", "nb_positif_90"]) df['lot'] = df['file'].str.split('/', expand=True)[7] df['sess'] = df['file'].str.split('/', expand=True)[8].str.split('_[0-9]{2}', expand=True)[0] df['path'] = df.apply(lambda x: os.path.dirname(x['file']) , axis=1) df['fn'] = df.apply(lambda x: os.path.basename(x['file']) , axis=1) df['raw'] = df['file'] df['raw'] = df['raw'].str.replace('/2122', '/2021') df['raw'] = df['raw'].str.replace('/2050', '/2022') df['raw'] = df['raw'].str.replace('/2132', '/2021') df['raw'] = df['raw'].str.replace('/2122', '/2021') df['raw'] = df['raw'].str.replace('/2035', '/2021') df['raw'] = df['raw'].str.replace('/2002', '/2021') df['raw'] = df['raw'].str.replace('/2131', '/2021') df['raw'] = df['raw'].str.replace('/2136', '/2021') df['raw'] = df['raw'].str.replace('/2005', '/2021') df['raw'] = df['raw'].str.replace('/2001', '/2021') df['raw'] = df['raw'].str.replace('/2121', '/2021') df['raw'] = df['raw'].str.replace('/2133', '/2021') df['raw'] = df['raw'].str.replace('/2036', '/2021') df['raw'] = df['raw'].str.replace('/2076', '/2021') df['raw'] = df['raw'].str.replace('/2026', '/2021') df['raw'] = df['raw'].str.replace('/2019', '/2021') df['date'] = pd.to_datetime(df['raw'].str.split('/', expand=True)[9].str.split('UTC_V', expand=True)[0], format="%Y%m%d_%H%M%S", errors='coerce') df = df.dropna() df['date'].dt.year.unique() df.lot.unique() df = df[['path', 'fn', 'lot', 'sess', 'date', 'max_pred', 'mean_pred', 'nb_positif_50', 'nb_positif_80', 'nb_positif_90']] df.to_pickle("clean_res.pkl")