Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 31 additions & 0 deletions .github/workflows/testing-code.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
name: Run Unit Test via Pytest

on: [push]

jobs:
build:
permissions:
contents: read
pull-requests: write
runs-on: ubuntu-latest
strategy:
matrix:
python-version: ["3.9", "3.10", "3.11"]

steps:
- uses: actions/checkout@v3
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v4
with:
python-version: ${{ matrix.python-version }}
- name: Install dependencies
run: |
python -m pip install --upgrade pip
if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
- name: Test with pytest
run: |
coverage run -m pytest tests
continue-on-error: true
- name: Generate Coverage Report
run: |
coverage report -m
Empty file added app/__init_.py
Empty file.
80 changes: 80 additions & 0 deletions app/constants.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
# Test file names
EXAMPLE_POS_FILENAME = "1a_MZmine3_pos.csv"
EXAMPLE_NEG_FILENAME = "1b_MZmine3_neg.csv"
EXAMPLE_TRACER_FILENAME = "WW2DW_Tracers_Amenable.csv"
EXAMPLE_RUN_SEQUENCE_POS_FILENAME = "WW2DW_sequence_cal.csv"
EXAMPLE_RUN_SEQUENCE_NEG_FILENAME = "WW2DW_sequence_cal.csv"
EXAMPLE_SURROGATE_FILENAME = "qNTA_Surrogate_Input_File_WW2DW.csv"

# Define pos/neg/neutral adduct lists
# Proton added - we observe Mass-(H+) and Mass+(Adduct)
NEG_ADDUCT_LI = [
("Cl", 35.976678),
("Br", 79.926161),
("HCO2", 46.005477),
("CH3CO2", 60.021127),
("CF3CO2", 113.992862),
]

# Proton subtracted - we observe Mass+(H+) and Mass+(Adduct)
POS_ADDUCT_LI = [
("Na", 21.981942),
("K", 37.955882),
("NH4", 17.026547),
]

NEUTRAL_LOSSES_LI = [
("H2O", -18.010565),
("2H2O", -36.02113),
("3H2O", -54.031695),
("4H2O", -72.04226),
("5H2O", -90.052825),
("NH3", -17.0265),
("O", -15.99490),
("CO", -29.00220),
("CO2", -43.989829),
("C2H4", -28.03130),
("CH2O2", 46.00550), # note here and below - not losses? but still neutral?
("CH3COOH", 60.02110),
("CH3OH", 32.02620),
("CH3CN", 41.02650),
("(CH3)2CHOH", 60.05810),
]

# Set to tested memory capacity of WebApp for number of features in 'adduct_matrix'
MAX_NUM_ADDUCT_FEATURES = 12000

# Column names accessed throughout app
FEATURE_ID_COL = "Feature ID"
DASHBOARD_SEARCH_COL = "For_Dashboard_Search"
FORMULA_COL = "Formula"
MASS_COL = "Mass"
RETENTION_COL = "Retention_Time"
IONIZATION_COL = "Ionization_Mode"
MOLECULAR_FORMULA_COL = "MOLECULAR_FORMULA"

# Format lists to test values agains
ALLOWED_BLANK_FORMATS_LIST = ["Blank", "blank", "BLANK", "MB", "Mb", "mb", "mB"]
ACTIVE_COLUMNS_LIST = [
"Retention_Time",
"Mass",
"Ionization_Mode",
"Compound",
]

# Establish ordering of all possible front matter (tracer/no tracer, flags/no flags, etc.)
FRONT_MATTER_ORDERING = [
"Ionization_Mode",
"Mass",
"Retention_Time",
"Compound",
"Tracer Chemical Match?",
"Duplicate Feature?",
"Is Adduct or Loss?",
"Has Adduct or Loss?",
"Adduct or Loss Info",
"Final Occurrence Count",
"Final Occurrence Percentage",
"Final Occurrence Count (with flags)",
"Final Occurrence Percentage (with flags)",
]
2 changes: 1 addition & 1 deletion app/feature/tests/test_feature.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
import unittest
from Feature import Feature_MS2 as ms2
from feature import Feature_MS2 as ms2
from test_data import parsedMGF as mgfData

#Note about test mgfData
Expand Down
Empty file added app/ms1/__init__.py
Empty file.
53 changes: 27 additions & 26 deletions app/ms1/nta_task.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
import traceback
import shutil
import json
from typing import Union
from datetime import datetime
from dask.distributed import Client, LocalCluster, fire_and_forget
from zipfile import ZipFile, ZIP_DEFLATED
Expand Down Expand Up @@ -35,13 +36,13 @@

def run_nta_dask(
parameters,
input_dfs,
tracer_df=None,
run_sequence_pos_df=None,
run_sequence_neg_df=None,
qnta_df=None,
jobid="00000000",
verbose=True,
input_dfs: list[Union[pd.DataFrame, None]],
tracer_df: Union[pd.DataFrame, None] = None,
run_sequence_pos_df: Union[pd.DataFrame, None] = None,
run_sequence_neg_df: Union[pd.DataFrame, None] = None,
qnta_df: Union[pd.DataFrame, None] = None,
jobid = "00000000",
verbose = True,
):
in_docker = os.environ.get("IN_DOCKER") != "False"
mongo_address = os.environ.get("MONGO_SERVER")
Expand Down Expand Up @@ -96,14 +97,14 @@ def run_nta_dask(

def run_nta(
parameters,
input_dfs,
tracer_df=None,
run_sequence_pos_df=None,
run_sequence_neg_df=None,
qnta_df=None,
mongo_address=None,
jobid="00000000",
verbose=True,
input_dfs: list[Union[pd.DataFrame, None]],
tracer_df: Union[pd.DataFrame, None] = None,
run_sequence_pos_df: Union[pd.DataFrame, None] = None,
run_sequence_neg_df: Union[pd.DataFrame, None] = None,
qnta_df: Union[pd.DataFrame, None] = None,
jobid = "00000000",
verbose = True,
mongo_address: Union[str, None] = None,
in_docker=True,
):
nta_run = NtaRun(
Expand Down Expand Up @@ -140,7 +141,7 @@ def __init__(
run_sequence_pos_df=None,
run_sequence_neg_df=None,
qnta_df=None,
mongo_address=None,
mongo_address: Union[str, None] = None,
jobid="00000000",
verbose=True,
in_docker=True,
Expand Down Expand Up @@ -406,7 +407,7 @@ def check_existence_of_mass_column(self, input_dfs):

return

def check_retention_time_column(self, input_dfs):
def check_retention_time_column(self, input_dfs: list[Union[pd.DataFrame, None]]):
"""
Check for the existence of alternate spellings of 'Retention_Time' column in input dataframes and rename to "Retention_Time".

Expand Down Expand Up @@ -609,10 +610,10 @@ def pass_through_cols(self):
self.pass_through = [
task_fun.passthrucol(df, self.all_headers)[0] if df is not None else None for df in self.dfs
]
self.dfs = [task_fun.passthrucol(df, self.all_headers)[1] if df is not None else None for df in self.dfs]
self.dfs: list[Union[pd.DataFrame, None]] = [task_fun.passthrucol(df, self.all_headers)[1] if df is not None else None for df in self.dfs]
return

def filter_void_volume(self, min_rt):
def filter_void_volume(self, min_rt: float):
"""
Accesses self.dfs (list of dataframes) and self.parameters["minimum_rt"][1]
then removes all rows with a value below "minimum_rt" in the "Retention_Time"
Expand All @@ -624,7 +625,7 @@ def filter_void_volume(self, min_rt):
None
"""
# Iterate through dfs, removing rows where "Retention_Time" is below min_rt threshold
self.dfs = [df.loc[df["Retention_Time"] > min_rt].copy() if df is not None else None for df in self.dfs]
self.dfs: list[Union[pd.DataFrame, None]] = [df.loc[df["Retention_Time"] > min_rt].copy() if df is not None else None for df in self.dfs]
return

def filter_duplicates(self):
Expand All @@ -641,11 +642,11 @@ def filter_duplicates(self):
None
"""
# Get ppm, mass_accuracy, and rt_accuracy parameters
ppm = self.parameters["mass_accuracy_units"][1] == "ppm"
ppm: bool = self.parameters["mass_accuracy_units"][1] == "ppm"
mass_accuracy = float(self.parameters["mass_accuracy"][1])
rt_accuracy = float(self.parameters["rt_accuracy"][1])
# Perform duplicate flagging functions
self.dfs = [
self.dfs: list[Union[pd.DataFrame, None]] = [
task_fun.duplicates(df, mass_accuracy, rt_accuracy, ppm, self.blank_headers, self.sample_headers)
if df is not None
else None
Expand Down Expand Up @@ -677,7 +678,7 @@ def calc_statistics(self):
# Iterate through dfs, calling chunk_stats() function
# NTAW-49: Raises custom ValueError if blank columns are improperly named in the input dataframes
try:
self.dfs = [
self.dfs: list[Union[pd.DataFrame, None]] = [
task_fun.chunk_stats(
df,
min_blank_detection_percentage,
Expand Down Expand Up @@ -831,7 +832,7 @@ def check_tracers(self):
)
for df in self.dfs
]
self.dfs = [
self.dfs: list[Union[pd.DataFrame, None]] = [
(
task_fun.check_feature_tracers(
df,
Expand Down Expand Up @@ -1007,12 +1008,12 @@ def clean_features(self):
task_fun.clean_features(df, controls, tracer_df=tracer_df_bool)[2] if df is not None else None
for index, df in enumerate(self.dfs)
]
self.dfs = [
self.dfs: list[Union[pd.DataFrame, None]] = [
task_fun.clean_features(df, controls, tracer_df=tracer_df_bool)[0] if df is not None else None
for index, df in enumerate(self.dfs)
]
# subtract blanks from means
self.dfs = [task_fun.Blank_Subtract_Mean(df) if df is not None else None for index, df in enumerate(self.dfs)]
self.dfs: list[Union[pd.DataFrame, None]] = [task_fun.Blank_Subtract_Mean(df) if df is not None else None for index, df in enumerate(self.dfs)]
# subtract blanks from means
self.dfs_flagged = [
task_fun.Blank_Subtract_Mean(df) if df is not None else None for index, df in enumerate(self.dfs_flagged)
Expand Down
Loading
Loading