import os, glob import numpy as np import pandas as pd import argparse ''' Script updates events.tsv files to include the image show for each trial, and specify which version was ran (default or alternative stimuli). The script then relabels the events files to match the bold.nii.gz data (runs labelled as run 1 and run 2, chronologically, rather than with task-flocdef/task-flocalt labels) Default paths are to be ran on elm ''' def get_arguments(): parser = argparse.ArgumentParser(description="Update fLoc events.tsv files to include task version and shown stimuli") parser.add_argument('--source_dir', default='/unf/eyetracker/neuromod/floc/sourcedata', type=str, help='absolute path to source dataset') parser.add_argument('--bids_dir', default='/data/neuromod/DATA/cneuromod/floc', type=str, help='absolute path to bids dataset') parser.add_argument('--out_dir', default='/data/neuromod/temp_marie/floc', type=str, help='absolute path to output directory') args = parser.parse_args() return args def process_log(log_path): log_dict = {} with open(log_path) as f: lines = f.readlines() stim_list = [] for line in lines: if "stimulus: image = '/scratch/neuromod/src/task_stimuli/data/fLoc" in line: image_path = line.split(' ')[-1] stim_list.append(image_path[1:-2]) elif "task - " in line: taskname = line.split(':')[-2][1:] if len(stim_list) > 0: log_dict[taskname] = stim_list stim_list = [] return log_dict name_dict = { 'def': { 'bodies': 'body-xx.jpg', 'characters': 'word-xx.jpg', 'faces': 'adult-xx.jpg', 'objects': 'car-xx.jpg', 'places': 'house-xx.jpg' }, 'alt': { 'bodies': 'limb-xx.jpg', 'characters': 'word-xx.jpg', 'faces': 'adult-xx.jpg', 'objects': 'instrument-xx.jpg', 'places': 'corridor-xx.jpg' } } cat_dict = { 'def': { 'bodies': 'body', 'characters': 'word', 'faces': 'adult', 'objects': 'car', 'places': 'house' }, 'alt': { 'bodies': 'limb', 'characters': 'word', 'faces': 'adult', 'objects': 'instrument', 'places': 'corridor' } } def get_name(row, task_label): ''' Returns image name from image path for pandas DF row ''' im_path = row['image_path'] if '.git/annex' in im_path: cat = row['category'] return name_dict[task_label][cat] else: return im_path.split('/')[-1] def get_cat(row, task_label): ''' Returns image category from image path for pandas DF row ''' im_path = row['image_path'] if '.git/annex' in im_path: cat = row['category'] return cat_dict[task_label][cat] else: return im_path.split('/')[-2] def update_event(source_list, bids_path, out_path): for s in source_list: sub, ses, fnum, task, suffix = os.path.basename(s).split('_') out_dir = os.path.join(out_path, sub, ses) if not os.path.exists(out_dir): os.makedirs(out_dir) try: log_path = glob.glob(os.path.join(source_path, sub, ses, '*' + fnum + '.log')) assert len(log_path) == 1 log_path = log_path[0] log_dict = process_log(log_path) img_list = log_dict[task] df = pd.read_csv(s, sep = '\t') df.insert(loc=4, column='task_version', value=task[-3:], allow_duplicates=True) # some ugly code to divide and conquer... df_stim = df[df['trial_type'] == 'stimuli'] df_nostim = df[df['trial_type'] != 'stimuli'] df_stim.insert(loc=1, column='image_path', value=np.array(img_list), allow_duplicates=True) sub_cat = df_stim.apply(lambda row: get_cat(row, task[-3:]), axis=1) df_stim.insert(loc=7, column='subcategory', value=sub_cat, allow_duplicates=True) img_num = df_stim.apply(lambda row: get_name(row, task[-3:]), axis=1) df_stim.insert(loc=8, column='image_num', value=img_num, allow_duplicates=True) df_nostim.insert(loc=1, column='image_path', value=np.nan, allow_duplicates=True) df_nostim.insert(loc=7, column='subcategory', value=np.nan, allow_duplicates=True) df_nostim.insert(loc=8, column='image_num', value=np.nan, allow_duplicates=True) # probably more elegant ways to merge & re-sort per index... df_merged = pd.concat([df_stim, df_nostim], ignore_index=False) idx = df_merged.index.tolist() df_merged.insert(loc=0, column='temp_idx', value=idx, allow_duplicates=False) df_merged.sort_values(by=['temp_idx'], inplace=True) df_merged = df_merged.drop(['temp_idx'], axis=1) # get run number, to match bold files (run 1 and 2) # (a bit of a hack: onset times only match exactly with itself...) # I tried using first and last trial categories, but there are problems w that approach on_first = df_merged['onset'][0] on_last = df_merged['onset'][df_merged.shape[0]-1] ev_list = glob.glob(os.path.join(bids_path, sub, ses, 'func', '*_task-fLoc_run-*_events.tsv')) assert len(ev_list) == 2 out_names = [] for ev in ev_list: ev_df = pd.read_csv(ev, sep = '\t') if ev_df['onset'][0] == on_first and ev_df['onset'][df_merged.shape[0]-1] == on_last: out_names.append(os.path.basename(ev)) assert len(out_names) == 1 out_name = os.path.join(out_dir, out_names[0]) df_merged.to_csv(out_name, sep='\t', header=True, index=False) except: print('problems with ' + sub + ', ' + ses) if __name__ == '__main__': args = get_arguments() source_path = args.source_dir bids_path = args.bids_dir out_path = args.out_dir source_list = sorted(glob.glob(os.path.join(source_path, 'sub-*/ses-0*/*task-floc*events.tsv'))) update_event(source_list, bids_path, out_path)