import os df = pd.read_excel("Annotation_CARIMAM_apo_2023_07_06.xlsx", 'annot_click_apo', usecols='A:E').dropna() df = df[~df['File'].str.startswith('/nfs/NAS4/')] df = df[~df['File'].str.startswith('/nfs/NAS3/')] mask = ((df['File'].str.startswith('LOT2/BERMUDE'))|(df.File.str.startswith('LOT2/GUYANNE')) | (df.File.str.startswith('LOT2/ANG'))) df_test = df[mask] df_train = df[~mask] #import ipdb; ipdb.set_trace() for idx, row in df_test.iterrows(): file = row['File'] label = int(row['positif_negatif'] != 'n') file = file if file.startswith('/nfs') else os.path.join("/nfs/NAS6/SABIOD/SITE/CARIMAM/DATA/", file) click_start, click_end = row[['pos_start', 'pos_end']] pos_click = click_start if click_start == click_end else np.random.uniform(click_start, click_end) pos_click = int(sf.info(file).samplerate * pos_click) nb_ctxt_frame = 512 click_margin = nb_ctxt_frame * 2 * int(sf.info(file).samplerate * 0.001) sig, sr = sf.read(file, start=max(0, pos_click-click_margin), stop=pos_click+click_margin) if (pos_click-click_margin) < 0: sig = np.pad(sig, [np.abs(pos_click-click_margin), 0]) else (len(sig)) print(len(sig)/sr) os.makedirs(os.path.dirname(("audio_test_article/%s")%row['File']), exist_ok=True) sf.write(("audio_test_article/%s-%s-%s.WAV")%(row['File'],click_start, label), sig, sr) df_test.to_csv("df_test.csv")