import os os.mkdir("audio_test_article) df = pd.read_excel("Annotation_CARIMAM_apo_2023_07_06.xlsx", 'annot_click_apo', usecols='A:E').dropna() df = df[~df['File'].str.startswith('/nfs/NAS4/')] df = df[~df['File'].str.startswith('/nfs/NAS3/')] mask = ((df['File'].str.startswith('LOT2/BERMUDE'))|(df.File.str.startswith('LOT2/GUYANNE')) | (df.File.str.startswith('LOT2/ANG'))) df_test = df[mask] df_train = df[~mask] for idx, row in df_test.iterrows(): file = row['File'] label = int(row['positif_negatif'] != 'n') # 0->negatif, 1->positif file = file if file.startswith('/nfs') else os.path.join(self.data_path, file) file = file if file.startswith('/nfs') else os.path.join("/nfs/NAS6/SABIOD/SITE/CARIMAM/DATA/", file) click_start, click_end = row[['pos_start', 'pos_end']] pos_click = click_start if click_start == click_end else np.random.uniform(click_start, click_end) pos_click = int(sf.info(file).samplerate * pos_click) click_margin = self.nb_ctxt_frame * 2 * int(sf.info(file).samplerate * 0.001) nb_ctxt_frame = 512 click_margin = nb_ctxt_frame * 1 * int(sf.info(file).samplerate * 0.001) sig, sr = sf.read(file, start=max(0, pos_click-click_margin), stop=pos_click+click_margin) sf.write('/row['File']', sig, sr) df_test.to_csv("df_test.csv")