From b1f6c919e53f2a807913eaabfc5e3c2d6d93aafd Mon Sep 17 00:00:00 2001 From: Ross Cutler <46252169+rosscutler@users.noreply.github.com> Date: Wed, 4 Jun 2025 14:45:44 -0700 Subject: [PATCH] Add aggregation analysis --- docs/results.md | 3 ++- src/result_parser.py | 34 ++++++++++++++++++++++++++++++++++ 2 files changed, 36 insertions(+), 1 deletion(-) diff --git a/docs/results.md b/docs/results.md index 0bf0127..dec27a4 100644 --- a/docs/results.md +++ b/docs/results.md @@ -46,7 +46,8 @@ created in the first step ([preparation](preparation.md)). for each assignment which has a status of "submitted". * `[downloaded_batch_result]_votes_per_clip.csv`: Aggregated result per clip, including MOS, standard deviations, and 95% Confidence Intervals. * `[downloaded_batch_result]_votes_per_cond.csv`: Aggregated result per condition. - * `[downloaded_batch_result]_votes_per_worker.csv`: Long format of rating per clip, includes: HITId, workerid, file, vote and condition. + * `[downloaded_batch_result]_votes_per_worker.csv`: Long format of rating per clip, includes: HITId, workerid, file, vote and condition. + * `[downloaded_batch_result]_aggregated_results.csv`: Aggregated MOS statistics per condition keys with 95% confidence intervals. * `[downloaded_batch_result]_quantity_bonus_report.csv`: List of workers who are eligible for quantity bonus with the amount of bonus (to be used with the mturk_utils.py). * `[downloaded_batch_result]_quality_bonus_report.csv`: List of workers who are eligible for quality bonus with the amount of bonus (to be used with the mturk_utils.py). * `[downloaded_batch_result]_extending.csv`: List of HITIds with number of assignment per each which are needed to reach a specific number of votes per clip. diff --git a/src/result_parser.py b/src/result_parser.py index e9e0112..b5261d2 100644 --- a/src/result_parser.py +++ b/src/result_parser.py @@ -1659,6 +1659,36 @@ def number_of_uniqe_workers(answers): return len(df) +def aggregate_condition_results(votes, cfg, out_path): + """Aggregate votes by condition keys and question type. + + Parameters + ---------- + votes : list[dict] + List of vote entries generated during parsing. + cfg : configparser.ConfigParser + Configuration to read ``condition_keys``. + out_path : str + Path of the CSV file to create. + """ + + if len(votes) == 0: + return + + df = pd.DataFrame(votes) + group_keys = [] + if cfg.has_option('general', 'condition_keys'): + group_keys.extend([k.strip() for k in cfg['general']['condition_keys'].split(',')]) + + if 'question_type' in df.columns: + group_keys.append('question_type') + + agg = df.groupby(group_keys)['vote'].agg(['count', 'mean', 'std']).reset_index() + agg.rename(columns={'count': 'n', 'mean': 'MOS', 'std': 'std'}, inplace=True) + agg['95%CI'] = 1.96 * agg['std'] / np.sqrt(agg['n']) + agg.to_csv(out_path, index=False) + + def get_ans_suffixes(test_method): if "p835" in test_method: question_name_suffix = p835_suffixes[2] @@ -1900,6 +1930,10 @@ def analyze_results(config, test_method, answer_path, list_of_req, quality_bonus ) write_dict_as_csv(all_data_per_worker, all_votes_per_file_path) + # aggregated analysis report + agg_path = os.path.splitext(answer_path)[0] + '_aggregated_results.csv' + aggregate_condition_results(all_data_per_worker, config, agg_path) + if __name__ == "__main__": parser = argparse.ArgumentParser(