Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 10 additions & 5 deletions docs/results.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,11 +11,16 @@ created in the first step ([preparation](preparation.md)).
**Note**: In case there is possible to have a condition level aggregation in your dataset, uncomment the
`condition_pattern` and `condition_keys`.

**Note**: The `condition_pattern` specifies which part of the clip URL refers to the condition name/number that they are
representing. Clips with the same value on that position, are considered to belong to the same condition and votes
assigned to them will be aggregated to create the `per_condition` report. Example: Assuming `D501_C03_M2_S02.wav` is
a file name,and `03` is the condition name. The pattern should be set to `.*_c(?P<condition_num>\d{1,2})_.*.wav` ,
and the `condition_keys` to `condition_num`.
**Note**: The `condition_pattern` specifies which part of the clip URL refers to the condition name/number that they are
representing. Clips with the same value on that position, are considered to belong to the same condition and votes
assigned to them will be aggregated to create the `per_condition` report.

The pattern follows Python regular expression syntax and should use **named
capture groups**. The names of the groups become column names in the reports.
Example: Assuming `D501_C03_M2_S02.wav` is a file name and `03` is the
condition identifier, set
`condition_pattern: .*_c(?P<condition_num>\d{1,2})_.*\.wav` and
`condition_keys` to `condition_num`.

**Note**: You can activate the automatic outlier detection method per condition. To do so
open `YOUR_PROJECT_NAME_ccr_result_parser.cfg`, in section `[accept_and_use]` add `outlier_removal: true`. The [z-score
Expand Down
41 changes: 22 additions & 19 deletions src/result_parser.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,15 @@
"""
/*---------------------------------------------------------------------------------------------
/*-------------------------------------------------------------------------------
* Copyright (c) Microsoft Corporation. All rights reserved.
* Licensed under the MIT License. See License.txt in the project root for license information.
*--------------------------------------------------------------------------------------------*/
*-------------------------------------------------------------------------------*/
@author: Babak Naderi

This module parses MTurk result files and aggregates the votes per clip or per
condition. Condition names can be automatically derived from the clip URL or file
name using regular expressions. Set the desired pattern via the ``condition_pattern``
option in the configuration file. The pattern must contain named capture groups
whose values will appear in the generated reports.
"""

import csv
Expand Down Expand Up @@ -1283,10 +1289,19 @@ def write_dict_as_csv(dic_to_write, file_name, *args, **kwargs):


def conv_filename_to_condition(f_name):
"""
extract the condition name from filename given the mask in the config
:param f_name:
:return:
"""Return condition information extracted from ``f_name``.

The regular expression defined by ``condition_pattern`` in the configuration
file is applied to the file name. The pattern should contain named capture
groups so that their values can be used as columns in the final reports. If
the pattern does not match the given file name the dictionary
``{"Unknown": "NoCondition"}`` is returned.

Example
-------
>>> config['general']['condition_pattern'] = r".*_c(?P<cond>\\d{2})_.*\\.wav"
>>> conv_filename_to_condition("D501_C03_M2_S02.wav")
{'cond': '03'}
"""
if f_name in file_to_condition_map:
return file_to_condition_map[f_name]
Expand Down Expand Up @@ -1577,7 +1592,7 @@ def calc_payment_stat(df):
median_time_in_sec = df[word_duration_col].median()

payment_text = df['Reward'].values[0]
paymnet = re.findall("\d+\.\d+", payment_text)
paymnet = re.findall(r"\\d+\\.\\d+", payment_text)

avg_pay = 3600*float(paymnet[0])/median_time_in_sec

Expand All @@ -1591,18 +1606,6 @@ def calc_stats(input_file):
:param input_file:
:return:
"""
"""
df = pd.read_csv(input_file, low_memory=False)
median_time_in_sec = df["WorkTimeInSeconds"].median()
payment_text = df['Reward'].values[0]
paymnet = re.findall("\d+\.\d+", payment_text)

avg_pay = 3600*float(paymnet[0])/median_time_in_sec
formatted_time = time.strftime("%M:%S", time.gmtime(median_time_in_sec))
print(
f"Stats: work duration (median) {formatted_time} (MM:SS), payment per hour: ${avg_pay:.2f}"
)
"""
df = pd.read_csv(input_file, low_memory=False)
df_full = df.copy()
overall_time, overall_pay = calc_payment_stat(df)
Expand Down