diff --git a/.github/workflows/testing-code.yml b/.github/workflows/testing-code.yml
new file mode 100644
index 00000000..1bfc49fb
--- /dev/null
+++ b/.github/workflows/testing-code.yml
@@ -0,0 +1,31 @@
+name: Run Unit Test via Pytest
+
+on: [push]
+
+jobs:
+  build:
+    permissions:
+      contents: read
+      pull-requests: write
+    runs-on: ubuntu-latest
+    strategy:
+      matrix:
+        python-version: ["3.9", "3.10", "3.11"]
+
+    steps:
+      - uses: actions/checkout@v3
+      - name: Set up Python ${{ matrix.python-version }}
+        uses: actions/setup-python@v4
+        with:
+          python-version: ${{ matrix.python-version }}
+      - name: Install dependencies
+        run: |
+          python -m pip install --upgrade pip
+          if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
+      - name: Test with pytest
+        run: |
+          coverage run -m pytest tests
+        continue-on-error: true
+      - name: Generate Coverage Report
+        run: |
+          coverage report -m
\ No newline at end of file
diff --git a/app/__init_.py b/app/__init_.py
new file mode 100644
index 00000000..e69de29b
diff --git a/app/constants.py b/app/constants.py
new file mode 100644
index 00000000..3ad5ab5a
--- /dev/null
+++ b/app/constants.py
@@ -0,0 +1,80 @@
+# Test file names
+EXAMPLE_POS_FILENAME = "1a_MZmine3_pos.csv"
+EXAMPLE_NEG_FILENAME = "1b_MZmine3_neg.csv"
+EXAMPLE_TRACER_FILENAME = "WW2DW_Tracers_Amenable.csv"
+EXAMPLE_RUN_SEQUENCE_POS_FILENAME = "WW2DW_sequence_cal.csv"
+EXAMPLE_RUN_SEQUENCE_NEG_FILENAME = "WW2DW_sequence_cal.csv"
+EXAMPLE_SURROGATE_FILENAME = "qNTA_Surrogate_Input_File_WW2DW.csv"
+
+# Define pos/neg/neutral adduct lists
+# Proton added - we observe Mass-(H+) and Mass+(Adduct)
+NEG_ADDUCT_LI = [
+    ("Cl", 35.976678),
+    ("Br", 79.926161),
+    ("HCO2", 46.005477),
+    ("CH3CO2", 60.021127),
+    ("CF3CO2", 113.992862),
+]
+
+# Proton subtracted - we observe Mass+(H+) and Mass+(Adduct)
+POS_ADDUCT_LI = [
+    ("Na", 21.981942),
+    ("K", 37.955882),
+    ("NH4", 17.026547),
+]
+
+NEUTRAL_LOSSES_LI = [
+    ("H2O", -18.010565),
+    ("2H2O", -36.02113),
+    ("3H2O", -54.031695),
+    ("4H2O", -72.04226),
+    ("5H2O", -90.052825),
+    ("NH3", -17.0265),
+    ("O", -15.99490),
+    ("CO", -29.00220),
+    ("CO2", -43.989829),
+    ("C2H4", -28.03130),
+    ("CH2O2", 46.00550),  # note here and below - not losses? but still neutral?
+    ("CH3COOH", 60.02110),
+    ("CH3OH", 32.02620),
+    ("CH3CN", 41.02650),
+    ("(CH3)2CHOH", 60.05810),
+]
+
+# Set to tested memory capacity of WebApp for number of features in 'adduct_matrix'
+MAX_NUM_ADDUCT_FEATURES = 12000
+
+# Column names accessed throughout app
+FEATURE_ID_COL = "Feature ID"
+DASHBOARD_SEARCH_COL = "For_Dashboard_Search"
+FORMULA_COL = "Formula"
+MASS_COL = "Mass"
+RETENTION_COL = "Retention_Time"
+IONIZATION_COL = "Ionization_Mode"
+MOLECULAR_FORMULA_COL = "MOLECULAR_FORMULA"
+
+# Format lists to test values agains
+ALLOWED_BLANK_FORMATS_LIST = ["Blank", "blank", "BLANK", "MB", "Mb", "mb", "mB"]
+ACTIVE_COLUMNS_LIST = [
+    "Retention_Time",
+    "Mass",
+    "Ionization_Mode",
+    "Compound",
+]
+
+# Establish ordering of all possible front matter (tracer/no tracer, flags/no flags, etc.)
+FRONT_MATTER_ORDERING = [
+    "Ionization_Mode",
+    "Mass",
+    "Retention_Time",
+    "Compound",
+    "Tracer Chemical Match?",
+    "Duplicate Feature?",
+    "Is Adduct or Loss?",
+    "Has Adduct or Loss?",
+    "Adduct or Loss Info",
+    "Final Occurrence Count",
+    "Final Occurrence Percentage",
+    "Final Occurrence Count (with flags)",
+    "Final Occurrence Percentage (with flags)",
+]
\ No newline at end of file
diff --git a/app/feature/tests/test_feature.py b/app/feature/tests/test_feature.py
index 2557a591..178cc035 100644
--- a/app/feature/tests/test_feature.py
+++ b/app/feature/tests/test_feature.py
@@ -1,6 +1,6 @@
 # -*- coding: utf-8 -*-
 import unittest
-from Feature import Feature_MS2 as ms2
+from feature import Feature_MS2 as ms2
 from test_data import parsedMGF as mgfData
 
 #Note about test mgfData
diff --git a/app/ms1/__init__.py b/app/ms1/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/app/ms1/nta_task.py b/app/ms1/nta_task.py
index 5c869880..592eae94 100644
--- a/app/ms1/nta_task.py
+++ b/app/ms1/nta_task.py
@@ -6,6 +6,7 @@
 import traceback
 import shutil
 import json
+from typing import Union
 from datetime import datetime
 from dask.distributed import Client, LocalCluster, fire_and_forget
 from zipfile import ZipFile, ZIP_DEFLATED
@@ -35,13 +36,13 @@
 
 def run_nta_dask(
     parameters,
-    input_dfs,
-    tracer_df=None,
-    run_sequence_pos_df=None,
-    run_sequence_neg_df=None,
-    qnta_df=None,
-    jobid="00000000",
-    verbose=True,
+    input_dfs: list[Union[pd.DataFrame, None]],
+    tracer_df: Union[pd.DataFrame, None] = None,
+    run_sequence_pos_df: Union[pd.DataFrame, None] = None,
+    run_sequence_neg_df: Union[pd.DataFrame, None] = None,
+    qnta_df: Union[pd.DataFrame, None] = None,
+    jobid = "00000000",
+    verbose = True,
 ):
     in_docker = os.environ.get("IN_DOCKER") != "False"
     mongo_address = os.environ.get("MONGO_SERVER")
@@ -96,14 +97,14 @@ def run_nta_dask(
 
 def run_nta(
     parameters,
-    input_dfs,
-    tracer_df=None,
-    run_sequence_pos_df=None,
-    run_sequence_neg_df=None,
-    qnta_df=None,
-    mongo_address=None,
-    jobid="00000000",
-    verbose=True,
+    input_dfs: list[Union[pd.DataFrame, None]],
+    tracer_df: Union[pd.DataFrame, None] = None,
+    run_sequence_pos_df: Union[pd.DataFrame, None] = None,
+    run_sequence_neg_df: Union[pd.DataFrame, None] = None,
+    qnta_df: Union[pd.DataFrame, None] = None,
+    jobid = "00000000",
+    verbose = True,
+    mongo_address: Union[str, None] = None,
     in_docker=True,
 ):
     nta_run = NtaRun(
@@ -140,7 +141,7 @@ def __init__(
         run_sequence_pos_df=None,
         run_sequence_neg_df=None,
         qnta_df=None,
-        mongo_address=None,
+        mongo_address: Union[str, None] = None,
         jobid="00000000",
         verbose=True,
         in_docker=True,
@@ -406,7 +407,7 @@ def check_existence_of_mass_column(self, input_dfs):
 
         return
 
-    def check_retention_time_column(self, input_dfs):
+    def check_retention_time_column(self, input_dfs: list[Union[pd.DataFrame, None]]):
         """
         Check for the existence of alternate spellings of 'Retention_Time' column in input dataframes and rename to "Retention_Time".
 
@@ -609,10 +610,10 @@ def pass_through_cols(self):
         self.pass_through = [
             task_fun.passthrucol(df, self.all_headers)[0] if df is not None else None for df in self.dfs
         ]
-        self.dfs = [task_fun.passthrucol(df, self.all_headers)[1] if df is not None else None for df in self.dfs]
+        self.dfs: list[Union[pd.DataFrame, None]] = [task_fun.passthrucol(df, self.all_headers)[1] if df is not None else None for df in self.dfs]
         return
 
-    def filter_void_volume(self, min_rt):
+    def filter_void_volume(self, min_rt: float):
         """
         Accesses self.dfs (list of dataframes) and self.parameters["minimum_rt"][1]
         then removes all rows with a value below "minimum_rt" in the "Retention_Time"
@@ -624,7 +625,7 @@ def filter_void_volume(self, min_rt):
             None
         """
         # Iterate through dfs, removing rows where "Retention_Time" is below min_rt threshold
-        self.dfs = [df.loc[df["Retention_Time"] > min_rt].copy() if df is not None else None for df in self.dfs]
+        self.dfs: list[Union[pd.DataFrame, None]] = [df.loc[df["Retention_Time"] > min_rt].copy() if df is not None else None for df in self.dfs]
         return
 
     def filter_duplicates(self):
@@ -641,11 +642,11 @@ def filter_duplicates(self):
             None
         """
         # Get ppm, mass_accuracy, and rt_accuracy parameters
-        ppm = self.parameters["mass_accuracy_units"][1] == "ppm"
+        ppm: bool = self.parameters["mass_accuracy_units"][1] == "ppm"
         mass_accuracy = float(self.parameters["mass_accuracy"][1])
         rt_accuracy = float(self.parameters["rt_accuracy"][1])
         # Perform duplicate flagging functions
-        self.dfs = [
+        self.dfs: list[Union[pd.DataFrame, None]] = [
             task_fun.duplicates(df, mass_accuracy, rt_accuracy, ppm, self.blank_headers, self.sample_headers)
             if df is not None
             else None
@@ -677,7 +678,7 @@ def calc_statistics(self):
         # Iterate through dfs, calling chunk_stats() function
         # NTAW-49: Raises custom ValueError if blank columns are improperly named in the input dataframes
         try:
-            self.dfs = [
+            self.dfs: list[Union[pd.DataFrame, None]] = [
                 task_fun.chunk_stats(
                     df,
                     min_blank_detection_percentage,
@@ -831,7 +832,7 @@ def check_tracers(self):
             )
             for df in self.dfs
         ]
-        self.dfs = [
+        self.dfs: list[Union[pd.DataFrame, None]] = [
             (
                 task_fun.check_feature_tracers(
                     df,
@@ -1007,12 +1008,12 @@ def clean_features(self):
             task_fun.clean_features(df, controls, tracer_df=tracer_df_bool)[2] if df is not None else None
             for index, df in enumerate(self.dfs)
         ]
-        self.dfs = [
+        self.dfs: list[Union[pd.DataFrame, None]] = [
             task_fun.clean_features(df, controls, tracer_df=tracer_df_bool)[0] if df is not None else None
             for index, df in enumerate(self.dfs)
         ]
         # subtract blanks from means
-        self.dfs = [task_fun.Blank_Subtract_Mean(df) if df is not None else None for index, df in enumerate(self.dfs)]
+        self.dfs: list[Union[pd.DataFrame, None]] = [task_fun.Blank_Subtract_Mean(df) if df is not None else None for index, df in enumerate(self.dfs)]
         # subtract blanks from means
         self.dfs_flagged = [
             task_fun.Blank_Subtract_Mean(df) if df is not None else None for index, df in enumerate(self.dfs_flagged)
diff --git a/app/ms1/task_functions.py b/app/ms1/task_functions.py
index fe5d38f5..fe60f2e8 100644
--- a/app/ms1/task_functions.py
+++ b/app/ms1/task_functions.py
@@ -1,3 +1,4 @@
+from typing import Literal
 import pandas as pd
 import numpy as np
 from operator import itemgetter
@@ -6,8 +7,10 @@
 import os
 import re
 import logging
+from typing import Union
 from openpyxl.utils import get_column_letter
 import io
+from ..constants import *
 
 
 logger = logging.getLogger("nta_app.ms1")
@@ -39,7 +42,7 @@ def assign_feature_id(df_in, start=1):
     # Adjust list based on start
     to_assign = [x + start for x in row_nums]
     # Insert column at the front of df
-    df.insert(0, "Feature ID", to_assign.copy())
+    df.insert(0, FEATURE_ID_COL, to_assign.copy())
     # Return df
     return df
 
@@ -54,7 +57,7 @@ def differences(s1, s2):
     Outputs:
         count (int, # of characters different between s1 and s2)
     """
-    # Replace special characters in s1 and s1 (not underscores or dashes)
+    # Replace special characters in s1 and s2 (not underscores or dashes)
     s1 = re.sub(re.compile(r"\([^)]*\)"), "", s1)
     s2 = re.sub(re.compile(r"\([^)]*\)"), "", s2)
     # Count up different characters between s1 and s2, plus difference in string length
@@ -62,7 +65,7 @@ def differences(s1, s2):
     # count = sum(1 for a, b in zip(s1, s2) if a != b) + abs(len(s1) - len(s2))
     mytup = tuple(zip(s1, s2))
     count = abs(len(s1) - len(s2))
-    diff_index = None  # This value is only important if the final count ==1
+    diff_index = None  # This value is only important if the final count == 1
     for i in range(len(mytup)):
         if mytup[i][0] != mytup[i][1]:
             count += 1
@@ -79,7 +82,7 @@ def differences(s1, s2):
         return count
 
 
-def formulas(df):
+def formulas(df: pd.DataFrame):
     """
     Return list of formulas tagged 'For_Dashboard_Search'
 
@@ -89,16 +92,16 @@ def formulas(df):
         formulas_list (list)
     """
     # Remmove Formula duplicates, keeping the first
-    df.drop_duplicates(subset="Formula", keep="first", inplace=True)
+    df.drop_duplicates(subset=FORMULA_COL, keep="first", inplace=True)
     # Subset df by items selected for Dashboard search
-    formulas = df.loc[df["For_Dashboard_Search"] == "1", "Formula"].values
+    formulas = df.loc[df[DASHBOARD_SEARCH_COL] == "1", FORMULA_COL].values
     # Get formulas in list
     formulas_list = [str(i) for i in formulas]
     # Return list
     return formulas_list
 
 
-def masses(df):
+def masses(df: pd.DataFrame):
     """
     Return list of masses tagged 'For_Dashboard_Search'
 
@@ -108,7 +111,7 @@ def masses(df):
         masses_list (list)
     """
     # Subset df by items selected for Dashboard search
-    masses = df.loc[df["For_Dashboard_Search"] == "1", "Mass"].values
+    masses = df.loc[df[DASHBOARD_SEARCH_COL] == "1", MASS_COL].values
     # Update logger
     logger.info("# of masses for dashboard search: {} out of {}".format(len(masses), len(df)))
     # Get masses in list
@@ -117,7 +120,7 @@ def masses(df):
     return masses_list
 
 
-def parse_headers(df_in):
+def parse_headers(df_in: pd.DataFrame):
     """
     A function to group the dataframe's column headers into sets of similar names which represent replicates
 
@@ -138,7 +141,7 @@ def parse_headers(df_in):
     # Iterate through list of columns, calling differences() function
     # When differences() return is greater than some value, increase countD (group assigner)
     for s in range(0, len(headers) - 1):
-        if differences(str(headers[s]), str(headers[s + 1])) < 2:  # 2 is more common
+        if differences(str(headers[s]), str(headers[s + 1])) < 2:  # 2 is more common TODO: This might be a vulnerability with differences()
             countS += 1
         if differences(str(headers[s]), str(headers[s + 1])) >= 2:
             countD += 1
@@ -152,7 +155,7 @@ def parse_headers(df_in):
     # Group lists of columns by group assigner (countD)
     groups = groupby(new_headers, itemgetter(1))
     # Extract column names from group tuples
-    new_headers_list = [[item[0] for item in data] for (key, data) in groups]
+    new_headers_list: list[list[str]] = [[item[0] for item in data] for (key, data) in groups]
     # Check that replicate samples are present. Raise IndexError if no replicate samples are found.
     max_group_size = 0
     for item in new_headers_list:
@@ -167,17 +170,15 @@ def parse_headers(df_in):
 
 
 # NTAW-594
-def get_sample_and_blank_headers(dfs):
+def get_sample_and_blank_headers(dfs: tuple[Union[pd.DataFrame, None], Union[pd.DataFrame, None]]):
     if dfs[0] is not None:
         all_headers = parse_headers(dfs[0])
     else:
         all_headers = parse_headers(dfs[1])
     # get all header groups
     header_groups = [item for item in all_headers if (len(item) > 1)]
-    # get blank headers
-    allowed_blank_formats = ["Blank", "blank", "BLANK", "MB", "Mb", "mb", "mB"]
     # Should be more than one blank in group, so blank_headers uses header_groups
-    blank_headers = [item for item in header_groups if any(x in head for head in item for x in allowed_blank_formats)]
+    blank_headers = [item for item in header_groups if any(x in head for head in item for x in ALLOWED_BLANK_FORMATS_LIST)]
     # get sample headers
     sample_headers = [item for item in header_groups if not any(item == x for x in blank_headers)]
 
@@ -187,7 +188,7 @@ def get_sample_and_blank_headers(dfs):
 """PASS-THROUGH COLUMNS FUNCTION"""
 
 
-def passthrucol(df_in, all_headers):
+def passthrucol(df_in: pd.DataFrame, all_headers: list[list[str]]):
     """
     Find all columns in dfs that aren't necessary (i.e., not Mass and RT) and store
     these columns to be later appended to the output -- TMF 11/20/23
@@ -200,21 +201,14 @@ def passthrucol(df_in, all_headers):
     """
     # Make a copy of the input df
     df = df_in.copy()
-    # Define active_cols: Keep 'Feature ID' in pt_headers to merge later
-    active_cols = [
-        "Retention_Time",
-        "Mass",
-        "Ionization_Mode",
-        "Compound",
-    ]
     # Create list of pass through headers that are not in the active columns
-    pt_headers = ["Feature ID"] + [
+    pt_headers = [FEATURE_ID_COL] + [
         item
         for sublist in all_headers
         for item in sublist
-        if len(sublist) == 1 and not any(x in sublist for x in active_cols)
+        if len(sublist) == 1 and not any(x in sublist for x in ACTIVE_COLUMNS_LIST)
     ]
-    headers = ["Feature ID"] + [
+    headers = [FEATURE_ID_COL] + [
         item for sublist in all_headers for item in sublist if not any(x in item for x in pt_headers)
     ]
     # Save pass through columns in df
@@ -227,7 +221,7 @@ def passthrucol(df_in, all_headers):
 """ADDUCT IDENTIFICATION FUNCTIONS"""
 
 
-def adduct_matrix(df, a_name, delta, Mass_Difference, Retention_Difference, ppm):
+def adduct_matrix(df: pd.DataFrame, a_name: str, delta: float, Mass_Difference: float, Retention_Difference: float, ppm: int):
     """
     Modified version of Jeff's 'adduct_identifier' function. This function executes
     the matrix portion of the old function -- TMF 10/27/23
@@ -243,9 +237,9 @@ def adduct_matrix(df, a_name, delta, Mass_Difference, Retention_Difference, ppm)
         df (dataframe, with adduct information added to columns)
     """
     # 'Mass' to matrix, 'Retention Time' to matrix, 'Feature ID' to matrix
-    mass = df["Mass"].to_numpy()
-    rts = df["Retention_Time"].to_numpy()
-    ids = df["Feature ID"].to_numpy()
+    mass = df[MASS_COL].to_numpy()
+    rts = df[RETENTION_COL].to_numpy()
+    ids = df[FEATURE_ID_COL].to_numpy()
     # Reshape 'masses', 'rts', and 'ids'
     masses_vector = np.reshape(mass, (len(mass), 1))
     rts_vector = np.reshape(rts, (len(rts), 1))
@@ -320,7 +314,7 @@ def adduct_matrix(df, a_name, delta, Mass_Difference, Retention_Difference, ppm)
     return df
 
 
-def collapse_adduct_id_array(the_array, delta_name):
+def collapse_adduct_id_array(the_array: np.ndarray, delta_name: str):
     """
     Helper function that collapses each row of the adduct ID matrix into a string containing all matches
 
@@ -342,7 +336,7 @@ def collapse_adduct_id_array(the_array, delta_name):
     return adduct_info_str
 
 
-def window_size(df_in, mass_diff_mass=112.985586):
+def window_size(df_in: pd.DataFrame, mass_diff_mass=112.985586):
     """
     # Estimate a sliding window size from the input data by finding
     the maximum distance between indices differing by 'mass_diff_mass' -- TMF 10/27/23
@@ -374,7 +368,7 @@ def window_size(df_in, mass_diff_mass=112.985586):
     return val
 
 
-def chunk_adducts(df_in, n, step, a_name, delta, Mass_Difference, Retention_Difference, ppm):
+def chunk_adducts(df_in: pd.DataFrame, n: int, step: int, a_name: str, delta: float, Mass_Difference: float, Retention_Difference: float, ppm: int):
     """
     Function that takes the input data, chunks it based on window size, then loops through chunks
     and sends them to 'adduct_matrix' for calculation -- TMF 10/27/23
@@ -397,7 +391,7 @@ def chunk_adducts(df_in, n, step, a_name, delta, Mass_Difference, Retention_Diff
     to_test_list = [df[i : i + n] for i in range(0, df.shape[0], step)]
     to_test_list = [i for i in to_test_list if (i.shape[0] > n / 2)]
     # Create list, iterate through df chunks and append results to list
-    li = []
+    li: list[pd.DataFrame] = []
     for x in to_test_list:
         dum = adduct_matrix(x, a_name, delta, Mass_Difference, Retention_Difference, ppm)
         li.append(dum)
@@ -407,12 +401,14 @@ def chunk_adducts(df_in, n, step, a_name, delta, Mass_Difference, Retention_Diff
     return output
 
 
-def adduct_identifier(df_in, adduct_selections, Mass_Difference, Retention_Difference, ppm, ionization):
+def adduct_identifier(df_in: pd.DataFrame, adduct_selections: list[tuple[str, float]], Mass_Difference: float, Retention_Difference: float, ppm: int, ionization: str):
     """
     Function that does the front-end of the old 'adduct_identifier'; we trim the input data by identifying
     features that are near to adduct distance from another feature. This shortened dataframe is used to
     calculate a window size, then loop through possible adducts, passing to 'chunk_adducts' -- TMF 10/27/23
 
+    TODO: Add Ionization to list of inputs.
+
     Inputs:
         df_in (dataframe)
         adduct_selections (list of tuples, contains adduct names and masses selected by user)
@@ -429,50 +425,17 @@ def adduct_identifier(df_in, adduct_selections, Mass_Difference, Retention_Diffe
     df["Rounded RT"] = df["Retention_Time"].round(1)
     # Create tuple of 'Rounded RT' and 'Rounded Mass'
     df["Rounded_RT_Mass_Pair"] = list(zip(df["Rounded RT"], df["Rounded Mass"]))
-    # Define pos/neg/neutral adduct lists
-    # Proton subtracted - we observe Mass+(H+) and Mass+(Adduct)
-    pos_adduct_li = [
-        ("Na", 21.981942),
-        ("K", 37.955882),
-        ("NH4", 17.026547),
-    ]
-    # Proton added - we observe Mass-(H+) and Mass+(Adduct)
-    neg_adduct_li = [
-        ("Cl", 35.976678),
-        ("Br", 79.926161),
-        ("HCO2", 46.005477),
-        ("CH3CO2", 60.021127),
-        ("CF3CO2", 113.992862),
-    ]
-    # no change to neutral losses
-    neutral_losses_li = [
-        ("H2O", -18.010565),
-        ("2H2O", -36.02113),
-        ("3H2O", -54.031695),
-        ("4H2O", -72.04226),
-        ("5H2O", -90.052825),
-        ("NH3", -17.0265),
-        ("O", -15.99490),
-        ("CO", -29.00220),
-        ("CO2", -43.989829),
-        ("C2H4", -28.03130),
-        ("CH2O2", 46.00550),  # note here and below - not losses? but still neutral?
-        ("CH3COOH", 60.02110),
-        ("CH3OH", 32.02620),
-        ("CH3CN", 41.02650),
-        ("(CH3)2CHOH", 60.05810),
-    ]
     # Determine possible adduct dictionary according to ionization
     if ionization == "positive":
-        possible_adduct_deltas = [item for item in pos_adduct_li if item[0] in adduct_selections[0]]
+        possible_adduct_deltas = [item for item in POS_ADDUCT_LI if item[0] in adduct_selections[0]]
         possible_adduct_deltas = possible_adduct_deltas + [
-            item for item in neutral_losses_li if item[0] in adduct_selections[2]
+            item for item in NEUTRAL_LOSSES_LI if item[0] in adduct_selections[2]
         ]
         possible_adduct_deltas = dict(possible_adduct_deltas)
     else:
-        possible_adduct_deltas = [item for item in neg_adduct_li if item[0] in adduct_selections[1]]
+        possible_adduct_deltas = [item for item in NEG_ADDUCT_LI if item[0] in adduct_selections[1]]
         possible_adduct_deltas = possible_adduct_deltas + [
-            item for item in neutral_losses_li if item[0] in adduct_selections[2]
+            item for item in NEUTRAL_LOSSES_LI if item[0] in adduct_selections[2]
         ]
         possible_adduct_deltas = dict(possible_adduct_deltas)
     # Create empty list to hold mass shift/RT tuples
@@ -500,18 +463,16 @@ def adduct_identifier(df_in, adduct_selections, Mass_Difference, Retention_Diffe
         to_test["Has Adduct or Loss?"] = 0
         to_test["Is Adduct or Loss?"] = 0
         to_test["Adduct or Loss Info"] = ""
-        # Set 'n' to tested memory capacity of WebApp for number of features in 'adduct_matrix'
-        n = 12000
-        # If 'to_test' is less than n, send it straight to 'adduct_matrix'
-        if to_test.shape[0] <= n:
+        # If 'to_test' is less than MAX_NUM_ADDUCT_FEATURES, send it straight to 'adduct_matrix'
+        if to_test.shape[0] <= MAX_NUM_ADDUCT_FEATURES:
             for a_name, delta in possible_adduct_deltas.items():
                 to_test = adduct_matrix(to_test, a_name, delta, Mass_Difference, Retention_Difference, ppm)
         # Else, calculate the moving window size and send 'to_test' to 'chunk_adducts'
         else:
-            step = n - window_size(to_test)
+            step = MAX_NUM_ADDUCT_FEATURES - window_size(to_test)
             # Loop through possible adducts, perform 'adduct_matrix'
             for a_name, delta in possible_adduct_deltas.items():
-                to_test = chunk_adducts(to_test, n, step, a_name, delta, Mass_Difference, Retention_Difference, ppm)
+                to_test = chunk_adducts(to_test, MAX_NUM_ADDUCT_FEATURES, step, a_name, delta, Mass_Difference, Retention_Difference, ppm)
         # Concatenate 'Has Adduct or Loss?', 'Is Adduct or Loss?', 'Adduct or Loss Info' to df
         df_in = pd.merge(
             df_in,
@@ -618,7 +579,7 @@ def dup_matrix_flag(df_in, mass_cutoff, rt_cutoff, ppm):
     return output
 
 
-def duplicates(df_in, mass_cutoff, rt_cutoff, ppm, blank_headers, sample_headers):
+def duplicates(df_in: pd.DataFrame, mass_cutoff: float, rt_cutoff: float, ppm: bool, blank_headers: list[list[str]], sample_headers: list[list[str]]):
     """
     Drop duplicates from input dataframe, based on mass_cutoff and rt_cutoff.
     Includes logic statement for determining if the dataframe is too large to
@@ -632,7 +593,7 @@ def duplicates(df_in, mass_cutoff, rt_cutoff, ppm, blank_headers, sample_headers
         df_in (dataframe)
         mass_cutoff (float, value for determing if masses are close enough)
         rt_cutoff (float, value for determing if rts are close enough)
-        ppm (int, binary yes/no for using ppm as units)
+        ppm (bool, binary yes/no for using ppm as units)
     Outputs:
         output (dataframe, dataframe with duplicate flag column added)
     """
@@ -667,7 +628,7 @@ def duplicates(df_in, mass_cutoff, rt_cutoff, ppm, blank_headers, sample_headers
 """CALCULATE STATISTICS FUNCTIONS"""
 
 
-def statistics(df_in, blank_headers, sample_headers):
+def statistics(df_in: pd.DataFrame, blank_headers, sample_headers):
     """
     Calculates statistics (mean, median, std, CV, N_Abun, & Percent Abun) on
     the dataframe. Includes logic statement for determining if the dataframe is
@@ -1486,7 +1447,8 @@ def feat_drop_df(df, docs, df_flagged):
     # Return df (data), df_flagged (data + flagged data)
     return df, df_flagged
 
-
+# TODO: tracer_df as defined in nta_task should be a Dataframe or None. If we want this to be a boolean, we should phrase the parameter name as
+# a yes-no question, i.e. has_tracer_df, which would be False if tracer_df was None.
 def clean_features(df_in, controls, tracer_df=False):
     """
     Function that removes (blanks out) observations at feature and occurrence level
@@ -1577,7 +1539,7 @@ def clean_features(df_in, controls, tracer_df=False):
     return df, docs, df_flagged
 
 
-def Blank_Subtract_Mean(df_in):
+def Blank_Subtract_Mean(df_in: pd.DataFrame):
     """
     Calculate the mean blank intensity for each feature and subtract that value from
     each sample's mean value for that feature.
@@ -1607,7 +1569,7 @@ def Blank_Subtract_Mean(df_in):
 """FUNCTIONS FOR COMBINING DATAFRAMES / FILE PREPARATION"""
 
 
-def combine(df1, df2):
+def combine(df1: Union[pd.DataFrame, None], df2: Union[pd.DataFrame, None]):
     """
     Function to combine positive and negative mode dataframes into df_combined
 
@@ -1628,7 +1590,7 @@ def combine(df1, df2):
     # Get column names
     columns = dfc.columns.values.tolist()
     # Drop duplicates (should not be any)
-    dfc = dfc.drop_duplicates(subset=["Mass", "Retention_Time"])
+    dfc = dfc.drop_duplicates(subset=[MASS_COL, RETENTION_COL])
     # Get sample Means
     Mean_list = dfc.columns[
         (dfc.columns.str.contains(pat="Mean ") == True)
@@ -1638,12 +1600,12 @@ def combine(df1, df2):
     dfc["N_Abun_Samples"] = dfc[Mean_list].count(axis=1, numeric_only=True)
     dfc["Mean_Abun_Samples"] = dfc[Mean_list].median(axis=1, skipna=True).round(0)
     # Sort by 'Mass' and 'Retention_Time'
-    dfc = dfc[columns].sort_values(["Mass", "Retention_Time"], ascending=[True, True])
+    dfc: pd.DataFrame = dfc[columns].sort_values([MASS_COL, RETENTION_COL], ascending=[True, True])
     # Return combined dataframe
     return dfc
 
-
-def combine_doc(doc1, doc2, tracer_df=False):
+# TODO: Is tracer_df necessary if it is unaccessed?
+def combine_doc(doc1: Union[pd.DataFrame, None], doc2: Union[pd.DataFrame, None], tracer_df=False):
     """
     Function to combine positive and negative mode docs for filter_documentation sheet
 
@@ -1654,27 +1616,25 @@ def combine_doc(doc1, doc2, tracer_df=False):
     Outputs:
         dfc (dataframe, doc1 and doc2 combined)
     """
-    # Define blank sub-strings
-    blanks = ["MB", "mb", "mB", "Mb", "blank", "Blank", "BLANK"]
     # Recombine doc and dupe
     if doc1 is not None and doc2 is not None:
         # Get Mean columns for blanks and samples
         Mean = doc1.columns[doc1.columns.str.contains(pat="Mean ")].tolist()
-        Mean_Samples = [md for md in Mean if not any(x in md for x in blanks)]
-        Mean_MB = [md for md in Mean if any(x in md for x in blanks)]
+        Mean_Samples = [md for md in Mean if not any(x in md for x in ALLOWED_BLANK_FORMATS_LIST)]
+        Mean_MB = [md for md in Mean if any(x in md for x in ALLOWED_BLANK_FORMATS_LIST)]
         dfc = pd.concat([doc1, doc2], sort=True)  # fixing pandas FutureWarning
         dfc = dfc.reindex(columns=doc1.columns)
     elif doc1 is not None:
         # Get Mean columns for blanks and samples
         Mean = doc1.columns[doc1.columns.str.contains(pat="Mean ")].tolist()
-        Mean_Samples = [md for md in Mean if not any(x in md for x in blanks)]
-        Mean_MB = [md for md in Mean if any(x in md for x in blanks)]
+        Mean_Samples = [md for md in Mean if not any(x in md for x in ALLOWED_BLANK_FORMATS_LIST)]
+        Mean_MB = [md for md in Mean if any(x in md for x in ALLOWED_BLANK_FORMATS_LIST)]
         dfc = doc1.copy()
     else:
         # Get Mean columns for blanks and samples
         Mean = doc2.columns[doc2.columns.str.contains(pat="Mean ")].tolist()
-        Mean_Samples = [md for md in Mean if not any(x in md for x in blanks)]
-        Mean_MB = [md for md in Mean if any(x in md for x in blanks)]
+        Mean_Samples = [md for md in Mean if not any(x in md for x in ALLOWED_BLANK_FORMATS_LIST)]
+        Mean_MB = [md for md in Mean if any(x in md for x in ALLOWED_BLANK_FORMATS_LIST)]
         dfc = doc2.copy()
     # Select columns for keeping, with tracer conditional
 
@@ -1699,12 +1659,12 @@ def combine_doc(doc1, doc2, tracer_df=False):
     dfc = dfc[cols]
     dfc.rename({"BlkStd_cutoff": "Selected MRL"}, axis=1, inplace=True)
     # Sort by 'Mass' and 'Retention_Time'
-    dfc = dfc.sort_values(["Feature ID"], ascending=[True])
+    dfc = dfc.sort_values([FEATURE_ID_COL], ascending=[True])
     # Return filter_documentation dataframe with removed duplicates appended
     return dfc
 
 
-def MPP_Ready(dfc, pts, blank_headers, sample_headers):
+def MPP_Ready(dfc: pd.DataFrame, pts: list[Union[pd.DataFrame, None]], blank_headers: list[list[str]], sample_headers: list[list[str]]):
     """
     Function that re-combines the pass-through columns with the processed dataframe
     plus some final column sorting.
@@ -1719,13 +1679,13 @@ def MPP_Ready(dfc, pts, blank_headers, sample_headers):
     # Assign pass through columns to pt_cols for re_org
     if pts[0] is not None and pts[1] is not None:
         pt_com = pd.concat([pts[0], pts[1]], axis=0)
-        dfc = pd.merge(dfc, pt_com, how="left", on=["Feature ID"])
+        dfc = pd.merge(dfc, pt_com, how="left", on=[FEATURE_ID_COL])
         pt_cols = pts[0].columns.tolist()
     elif pts[0] is not None:
-        dfc = pd.merge(dfc, pts[0], how="left", on=["Feature ID"])
+        dfc = pd.merge(dfc, pts[0], how="left", on=[FEATURE_ID_COL])
         pt_cols = pts[0].columns.tolist()
     else:
-        dfc = pd.merge(dfc, pts[1], how="left", on=["Feature ID"])
+        dfc = pd.merge(dfc, pts[1], how="left", on=[FEATURE_ID_COL])
         pt_cols = pts[1].columns.tolist()
 
     # Get raw sample headers
@@ -1733,35 +1693,19 @@ def MPP_Ready(dfc, pts, blank_headers, sample_headers):
     raw_samples = [item for sublist in sample_groups for item in sublist] + ["MRL (3x)", "MRL (5x)", "MRL (10x)"]
     # Get blank subtracted means
     blank_subtracted_means = dfc.columns[dfc.columns.str.contains(pat="BlankSub")].tolist()
-    # Establish ordering of all possible front matter (tracer/no tracer, flags/no flags, etc.)
-    ordering = [
-        "Ionization_Mode",
-        "Mass",
-        "Retention_Time",
-        "Compound",
-        "Tracer Chemical Match?",
-        "Duplicate Feature?",
-        "Is Adduct or Loss?",
-        "Has Adduct or Loss?",
-        "Adduct or Loss Info",
-        "Final Occurrence Count",
-        "Final Occurrence Percentage",
-        "Final Occurrence Count (with flags)",
-        "Final Occurrence Percentage (with flags)",
-    ]
     # Get dft columns in list
     all_cols = dfc.columns.tolist()
     # Front matter list comp
-    front_matter = [item for item in ordering if item in all_cols]
+    front_matter = [item for item in FRONT_MATTER_ORDERING if item in all_cols]
     # Generate full column list
     cols = pt_cols + front_matter + raw_samples + blank_subtracted_means
-    # Subset dft with correct columns / ordering
+    # Subset dft with correct columns / FRONT_MATTER_ORDERING
     dfc = dfc[cols]
     # Rename columns
-    dfc["Ionization_Mode"] = dfc["Ionization_Mode"].replace("Esi+", "ESI+")
-    dfc["Ionization_Mode"] = dfc["Ionization_Mode"].replace("Esi-", "ESI-")
+    dfc[IONIZATION_COL] = dfc[IONIZATION_COL].replace("Esi+", "ESI+")
+    dfc[IONIZATION_COL] = dfc[IONIZATION_COL].replace("Esi-", "ESI-")
     dfc.rename(
-        {"Ionization_Mode": "Ionization Mode", "Retention_Time": "Retention Time"},
+        {IONIZATION_COL: "Ionization Mode", RETENTION_COL: "Retention Time"},
         axis=1,
         inplace=True,
     )
@@ -1769,7 +1713,7 @@ def MPP_Ready(dfc, pts, blank_headers, sample_headers):
     return dfc
 
 
-def calc_toxcast_percent_active(df):
+def calc_toxcast_percent_active(df: pd.DataFrame):
     """
     Function that calculates toxcast percent active values.
 
@@ -1801,7 +1745,7 @@ def calc_toxcast_percent_active(df):
     return dft
 
 
-def determine_string_width(input_string):
+def determine_string_width(input_string: str):
     """
     The following function calculates a "width" of a string based on the characters within, as some
     characters are large, medium or skinnyThese widths are used to determine the spacing of the group
@@ -1885,6 +1829,7 @@ def determine_string_width(input_string):
     for j in range(len(input_string)):
         if input_string[j] in big_letters:
             temp_increment = temp_increment + big_increment
+            # TODO: are these print statements still necessary?
             print("big")
         elif input_string[j] in medium_letters:
             temp_increment = temp_increment + medium_increment
@@ -1896,7 +1841,7 @@ def determine_string_width(input_string):
     return temp_increment
 
 
-def chunk_dataframe(df, chunk_size):
+def chunk_dataframe(df: pd.DataFrame, chunk_size: int):
     """
     Function for splitting a dataframe into chunks for printing into separate
     sheets of an excel workbook.
@@ -1914,7 +1859,7 @@ def chunk_dataframe(df, chunk_size):
         yield df[i * chunk_size : (i + 1) * chunk_size]
 
 
-def create_excel_book(d, chem_res=False):
+def create_excel_book(d: dict[str, pd.DataFrame], chem_res=False):
     """
     Function for creating excel book from python dictionary, where dict keys
     are sheet names and dict items (dfs) are sheet contents.
@@ -1961,7 +1906,7 @@ def create_excel_book(d, chem_res=False):
     return excel_data
 
 
-def DSSTox_atom_filtering(df_in, atom_ranges):
+def DSSTox_atom_filtering(df_in: pd.DataFrame, atom_ranges: list[dict]):
     """
     Function that takes a dataframe of returned candidates from searching DSSTox
     and user submitted ranges for atoms (CHONPS, Halogens, and other potential elements).
@@ -1977,7 +1922,7 @@ def DSSTox_atom_filtering(df_in, atom_ranges):
     # Copy input dataframe
     df = df_in.copy()
     # Drop candidates with no formula information
-    df = df.loc[~df["MOLECULAR_FORMULA"].isna(), :]
+    df = df.loc[~df[MOLECULAR_FORMULA_COL].isna(), :]
     # Create separate 'atom_ranges' into 'to_search' and 'to_exclude'
     to_search = [item for item in atom_ranges if (item["max"] - item["min"]) > 0]
     to_exclude = [item["element"] for item in atom_ranges if (item["max"] - item["min"]) <= 0]
@@ -1986,11 +1931,11 @@ def DSSTox_atom_filtering(df_in, atom_ranges):
     # is stored in a new "{element} filter check" column (1 - pass, 0 - fail)
     for item in to_search:
         col = item["element"] + " filter check"
-        df[col] = df["MOLECULAR_FORMULA"].apply(
+        df[col] = df[MOLECULAR_FORMULA_COL].apply(
             lambda x: formula_atom_count(x, item["element"], item["min"], item["max"])
         )
     # Flag candidates with elements from 'to_exclude'
-    df["Pass excluded elements filter?"] = df["MOLECULAR_FORMULA"].apply(lambda x: formula_exclude(x, to_exclude))
+    df["Pass excluded elements filter?"] = df[MOLECULAR_FORMULA_COL].apply(lambda x: formula_exclude(x, to_exclude))
     # Get '{element} filter check' columns in list
     cols = [col for col in df.columns if " filter check" in col] + ["Pass excluded elements filter?"]
     # Keep rows that pass for all '{element} filter check' columns and excluded elements
@@ -2002,11 +1947,11 @@ def DSSTox_atom_filtering(df_in, atom_ranges):
 
 
 def formula_atom_count(
-    formula,
-    element,
-    minimum,
-    maximum,
-):
+    formula: str,
+    element: str,
+    minimum: int,
+    maximum: int,
+) -> Literal[0, 1]:
     """
     Function that takes in a chemical formula string, an element string, a
     minimum integer, and a maximum integer. The function finds the element string
@@ -2046,9 +1991,9 @@ def formula_atom_count(
 
 
 def formula_exclude(
-    formula,
-    element_li,
-):
+    formula: str,
+    element_li: list[str],
+) -> Literal[0, 1]:
     """
     Function that takes in a chemical formula string and an element string.
     The function searches for the element string in the chemical formula string,
diff --git a/input/ms1/1a_MZmine3_pos.csv b/input/ms1/1a_MZmine3_pos.csv
index 66d94d24..fff5d797 100644
--- a/input/ms1/1a_MZmine3_pos.csv
+++ b/input/ms1/1a_MZmine3_pos.csv
@@ -1,4 +1,4 @@
-MZmine_ID,MB1,MB2,MB3,MB4,MB5,D1S1_1,D1S1_2,D1S1_3,D1S2_1,D1S2_2,D1S2_3,D1S3_1,D1S3_2,D1S3_3,D1S4_1,D1S4_2,D1S4_3,D2S1_1,D2S1_2,D2S1_3,D2S2_1,D2S2_2,D2S2_3,D2S3_1,D2S3_2,D2S3_3,D2S4_1,D2S4_2,D2S4_3,D3S1_1,D3S1_2,D3S1_3,D3S2_1,D3S2_2,D3S2_3,D3S3_1,D3S3_2,D3S3_3,D3S4_1,D3S4_2,D3S4_3,Pooled_1,Pooled_2,Pooled_3,Pooled_4,10ppb_1,10ppb_2,100ppb_1,100ppb_2,250ppb_1,250ppb_2,250ppb_3,250ppb_4,500ppb_1,500ppb_2,1000ppb_1,1000ppb_2,m/z,Retention Time,Mass,Ionization mode
+MZmine_ID,MB1,MB2,MB3,MB4,MB5,D1S1_1,D1S1_2,D1S1_3,D1S2_1,D1S2_2,D1S2_3,D1S3_1,D1S3_2,D1S3_3,D1S4_1,D1S4_2,D1S4_3,D2S1_1,D2S1_2,D2S1_3,D2S2_1,D2S2_2,D2S2_3,D2S3_1,D2S3_2,D2S3_3,D2S4_1,D2S4_2,D2S4_3,D3S1_1,D3S1_2,D3S1_3,D3S2_1,D3S2_2,D3S2_3,D3S3_1,D3S3_2,D3S3_3,D3S4_1,D3S4_2,D3S4_3,Pooled_1,Pooled_2,Pooled_3,Pooled_4,10ppb_1,10ppb_2,100ppb_1,100ppb_2,250ppb_1,250ppb_2,250ppb_3,250ppb_4,500ppb_1,500ppb_2,1000ppb_1,1000ppb_2,m/z,Retention Time,Mass,Ionization Mode
 3268,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,195.77501,,,179.43219,,,,,,,,,,,168.7952,168.7952,58.06545151,9.240864,57.05817551,Esi+
 174,,,,,,,,,,,,,,,,,,,,,1091.0004,1212.4932,960.021,855.3134,,,,,,,,,,,,903.57697,,,,,,615.77826,666.35693,638.09827,522.0698,,,,,,,,,,,,,61.00808652,0.7318255,60.00081052,Esi+
 1656,301.16437,238.65865,,172.99602,124.241974,185.96703,,126.823105,172.96233,142.18542,108.34602,151.76581,,124.848045,158.36009,,154.1734,,152.28131,116.609276,,128.06255,,137.65839,,104.11684,,168.83298,114.57462,,,,,131.19362,,,124.75299,,,186.35593,116.801285,242.66692,,149.98123,110.991875,647.5779,647.5779,494.74332,494.74332,406.00397,171.5949,146.74963,114.577896,385.40845,385.40845,314.1869,314.1869,69.03398487,6.4335365,68.02670887,Esi+
diff --git a/input/ms1/1b_MZmine3_neg.csv b/input/ms1/1b_MZmine3_neg.csv
index ae53889d..1bc7e737 100644
--- a/input/ms1/1b_MZmine3_neg.csv
+++ b/input/ms1/1b_MZmine3_neg.csv
@@ -1,4 +1,4 @@
-MZmine_ID,MB1,MB2,MB3,MB4,MB5,D1S1_1,D1S1_2,D1S1_3,D1S2_1,D1S2_2,D1S2_3,D1S3_1,D1S3_2,D1S3_3,D1S4_1,D1S4_2,D1S4_3,D2S1_1,D2S1_2,D2S1_3,D2S2_1,D2S2_2,D2S2_3,D2S3_1,D2S3_2,D2S3_3,D2S4_1,D2S4_2,D2S4_3,D3S1_1,D3S1_2,D3S1_3,D3S2_1,D3S2_2,D3S2_3,D3S3_1,D3S3_2,D3S3_3,D3S4_1,D3S4_2,D3S4_3,Pooled_1,Pooled_2,Pooled_3,Pooled_4,10ppb_1,10ppb_2,100ppb_1,100ppb_2,250ppb_1,250ppb_2,250ppb_3,250ppb_4,500ppb_1,500ppb_2,1000ppb_1,1000ppb_2,m/z,Retention Time,Mass,Ionization mode
+MZmine_ID,MB1,MB2,MB3,MB4,MB5,D1S1_1,D1S1_2,D1S1_3,D1S2_1,D1S2_2,D1S2_3,D1S3_1,D1S3_2,D1S3_3,D1S4_1,D1S4_2,D1S4_3,D2S1_1,D2S1_2,D2S1_3,D2S2_1,D2S2_2,D2S2_3,D2S3_1,D2S3_2,D2S3_3,D2S4_1,D2S4_2,D2S4_3,D3S1_1,D3S1_2,D3S1_3,D3S2_1,D3S2_2,D3S2_3,D3S3_1,D3S3_2,D3S3_3,D3S4_1,D3S4_2,D3S4_3,Pooled_1,Pooled_2,Pooled_3,Pooled_4,10ppb_1,10ppb_2,100ppb_1,100ppb_2,250ppb_1,250ppb_2,250ppb_3,250ppb_4,500ppb_1,500ppb_2,1000ppb_1,1000ppb_2,m/z,Retention Time,Mass,Ionization Mode
 16,,,,,,212.73552,219.34872,281.90924,221.2685,248.72163,214.2259,242.11006,318.33566,342.4138,,,,,253.82516,,287.05722,412.3387,347.5727,278.27313,353.55814,339.57358,,,,269.50406,305.74353,265.1876,238.98248,295.77087,280.3127,277.0126,316.07498,312.39313,,,,,,,,,,,,,,,,,,,,86.99327011,0.820615,88.00054611,Esi-
 97,1060.3883,,1468.8896,2660.218,1198.4237,900.1573,1856.4471,,1629.8142,,3759.2708,2697.5913,1550.9102,2276.3447,3398.5586,2640.9841,2326.5679,1775.7096,1986.4924,,,1446.644,3586.179,,5742.782,1210.672,2446.036,2605.1956,1961.6882,1491.55,2004.7794,,1380.3123,1090.6395,1459.1161,1379.0665,,2324.6565,1118.1552,,2835.0273,1791.8285,3270.8782,3327.1106,3122.3787,1884.4099,1884.4099,3270.5608,3270.5608,2535.286,4900.734,1849.4922,6822.686,2279.2485,2279.2485,5719.1035,5719.1035,91.00316049,0.91303396,92.01043649,Esi-
 2,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,209.75368,212.25703,180.59227,,,,,,,,,,,303.348,303.348,94.98040041,0.7934577,95.98767641,Esi-
diff --git a/requirements.txt b/requirements.txt
index a9e69a74..2427b872 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,6 +1,7 @@
 aiohttp==3.9.2
 bokeh
-blosc==1.10.2
+blosc==1.11.2
+coverage==7.8.0
 dask==2023.4.0
 distributed==2023.4.0
 django==4.1.13
@@ -12,6 +13,7 @@ matplotlib==3.7.1
 numpy==1.24.2
 pandas==2.0.0
 pymongo==3.12.0
+pytest==8.3.5
 psycopg2-binary==2.9.2
 requests==2.31.0
 toolz==0.11.2
diff --git a/tests/README.md b/tests/README.md
new file mode 100644
index 00000000..30b7a7dd
--- /dev/null
+++ b/tests/README.md
@@ -0,0 +1,7 @@
+# Pytest
+To run tests, from top directory run:
+
+```
+pytest nta_app/tests/
+```
+
diff --git a/tests/__init__.py b/tests/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/tests/app_ms1_test_helpers.py b/tests/app_ms1_test_helpers.py
new file mode 100644
index 00000000..464ab931
--- /dev/null
+++ b/tests/app_ms1_test_helpers.py
@@ -0,0 +1,42 @@
+import datetime
+from nta_app.app.constants import EXAMPLE_POS_FILENAME, EXAMPLE_NEG_FILENAME, EXAMPLE_RUN_SEQUENCE_NEG_FILENAME, EXAMPLE_RUN_SEQUENCE_POS_FILENAME, EXAMPLE_SURROGATE_FILENAME, EXAMPLE_TRACER_FILENAME
+
+inputParameters = {
+  "project_name": ["Project name", "Example nta"],
+  "datetime": ["Date & time", str(datetime.datetime.now())],
+  "test_files": ["Run test files only (debugging)", "yes"],
+  "pos_input": ["Positive mode file", EXAMPLE_POS_FILENAME],
+  "neg_input": ["Negative mode file", EXAMPLE_NEG_FILENAME],
+  "pos_adducts": ["Positive mode adducts", ["Na", "K", "NH4"]],
+  "neg_adducts": ["Negative mode adducts", ["Cl", "HCO2", "CH3CO2", "FA"]],
+  "neutral_losses": ["Neutral losses (both modes)", ["H2O", "CO2"]],
+  "mass_accuracy_units": ["Adduct / duplicate mass accuracy units", "ppm"],
+  "mass_accuracy": ["Adduct / duplicate mass accuracy", 10],
+  "rt_accuracy": ["Adduct / duplicate retention time accuracy (mins)", 0.05],
+  "run_sequence_pos_file": [
+      "Run sequence positive mode file",
+      EXAMPLE_RUN_SEQUENCE_POS_FILENAME,
+  ],
+  "run_sequence_neg_file": [
+      "Run sequence negative mode file",
+      EXAMPLE_RUN_SEQUENCE_NEG_FILENAME,
+  ],
+  "tracer_input": ["Tracer file", EXAMPLE_TRACER_FILENAME],
+  "mass_accuracy_units_tr": ["Tracer mass accuracy units", "ppm"],
+  "mass_accuracy_tr": ["Tracer mass accuracy", 5],
+  "rt_accuracy_tr": ["Tracer retention time accuracy (mins)", 0.1],
+  "tracer_plot_yaxis_format": ["Tracer plot y-axis scaling", "log"],
+  "tracer_plot_trendline": ["Tracer plot trendlines shown", "yes"],
+  "min_replicate_hits": ["Min replicate hits (%)", 66],
+  "min_replicate_hits_blanks": ["Min replicate hits in blanks (%)", 66],
+  "max_replicate_cv": ["Max replicate CV", 0.8],
+  "mrl_std_multiplier": ["MRL standard deviation multiplier", "3"],
+  "parent_ion_mass_accuracy": ["Parent ion mass accuracy (ppm)", 5],
+  "minimum_rt": ["Discard features below this retention time (mins)", 0.00],
+  "search_dsstox": ["Search DSSTox for possible structures", "no"],
+  "search_hcd": ["Search Cheminformatics Hazard Module for toxicity data", "no"],
+  "search_mode": ["Search dashboard by", "mass"],
+  "do_qnta": ["Perform qNTA?", "no"],
+  "qnta_input": ["qNTA Surrogate input file", EXAMPLE_SURROGATE_FILENAME],
+  "atom_ranges": ["Atom filtering ranges", None],
+}
\ No newline at end of file
diff --git a/tests/test_app_ms1_test_helpers.py b/tests/test_app_ms1_test_helpers.py
new file mode 100644
index 00000000..456187e0
--- /dev/null
+++ b/tests/test_app_ms1_test_helpers.py
@@ -0,0 +1,6 @@
+from nta_app.tests.app_ms1_test_helpers import inputParameters
+from nta_app.app.constants import EXAMPLE_POS_FILENAME
+
+def test__ensure_parameters_are_complete_for_tests():
+  assert inputParameters["pos_input"][1] == EXAMPLE_POS_FILENAME
+  assert inputParameters["test_files"][1] == "yes"
\ No newline at end of file
diff --git a/tests/test_nta_task.py b/tests/test_nta_task.py
new file mode 100644
index 00000000..e69de29b
diff --git a/tests/test_task_functions.py b/tests/test_task_functions.py
new file mode 100644
index 00000000..7d070982
--- /dev/null
+++ b/tests/test_task_functions.py
@@ -0,0 +1,67 @@
+import pandas as pd
+import os
+import pytest
+from nta_app.tests.app_ms1_test_helpers import inputParameters
+from nta_app.app.constants import EXAMPLE_NEG_FILENAME, EXAMPLE_POS_FILENAME
+from nta_app.app.ms1.task_functions import duplicates, assign_feature_id, differences as count_string_differences, parse_headers, get_sample_and_blank_headers, passthrucol, window_size
+
+data_dir = "input/ms1"
+my_pos_df = pd.read_csv(os.path.join(data_dir, EXAMPLE_POS_FILENAME))
+my_neg_df = pd.read_csv(os.path.join(data_dir, EXAMPLE_NEG_FILENAME))
+
+def test__added_feature_id__new_column_for_feature_id():
+    data = {
+        "firstCol": [420, 380, 390],
+        "secondCol": [50, 40, 45]
+    }
+    old_df = pd.DataFrame(data)
+    assert "Feature ID" not in old_df.columns
+
+    new_df = assign_feature_id(df_in=old_df, start=1)
+
+    assert "Feature ID" in new_df.columns
+
+def test__count_string_differences__for_basic_strings():
+    assert count_string_differences(s1="ones", s2="one") == 2
+
+def test__count_string_differences__for_strings_with_special_characters():
+    assert count_string_differences(s1="o_nes", s2="o^nes") == 2
+
+def test__parse_headers__returns_list_of_list_of_string(df=my_pos_df):
+    result = parse_headers(df)
+    assert len(result) == 23
+
+def test__parse_headers__lists_contain_expected_items(df=my_pos_df):
+    result = parse_headers(df)
+    assert result[0][0] == "MB1"
+    assert result[22][0] == "Ionization Mode"
+
+def test__get_sample_and_blank_headers__returns_all_headers(pos_df=my_pos_df, neg_df=my_neg_df):
+    assert len(get_sample_and_blank_headers((pos_df, neg_df))) == 3
+
+def test__get_sample_and_blank_headers__fails_when_both_dfs_are_none(pos_df=my_pos_df, neg_df=my_neg_df):
+    with pytest.raises(AttributeError):
+        get_sample_and_blank_headers((None, None))
+
+def test__get_sample_and_blank_headers__returns_correct_content(pos_df=my_pos_df, neg_df=my_neg_df):
+    all_headers, blank_headers, sample_headers = get_sample_and_blank_headers((pos_df, neg_df))
+    assert len(blank_headers[0]) == 5
+    for sample_types in sample_headers:
+        for sample in sample_types:
+            assert not sample.startswith("MB")
+    assert all_headers[-1][0] == "Ionization Mode"
+
+def test__passthrucol__returns_passthrough_and_trimmed_df(pos_df=my_pos_df, neg_df=my_neg_df):
+    pos_df = assign_feature_id(pos_df)
+    all_headers = get_sample_and_blank_headers((pos_df, neg_df))[0]
+    df_pt, df_trim = passthrucol(pos_df, all_headers)
+    assert "m/z" in df_pt.columns.values
+    assert "m/z" not in df_trim.columns.values
+
+def test__window_size__default_mass_diff(df_in=my_pos_df):
+    val = window_size(df_in)
+    assert val == 1801
+
+def test__window_size__supplied_mass_diff(df_in=my_pos_df):
+    val = window_size(df_in, 100.00)
+    assert val == 1597
\ No newline at end of file
diff --git a/views/ms1/ms1_input_api.py b/views/ms1/ms1_input_api.py
index 02d690a0..c19902a3 100644
--- a/views/ms1/ms1_input_api.py
+++ b/views/ms1/ms1_input_api.py
@@ -8,6 +8,7 @@
 import string, random
 import datetime
 import logging
+from nta_app.app.constants import EXAMPLE_NEG_FILENAME, EXAMPLE_POS_FILENAME, EXAMPLE_RUN_SEQUENCE_NEG_FILENAME, EXAMPLE_RUN_SEQUENCE_POS_FILENAME, EXAMPLE_SURROGATE_FILENAME, EXAMPLE_TRACER_FILENAME
 from ...app.ms1.nta_task import run_nta_dask
 from ...tools.ms1 import file_manager
 from ..views_dectorators import api_key_required
@@ -17,15 +18,6 @@
 if os.getenv("DEPLOY_ENV", "kube-dev") == "kube-prod":
     logger.setLevel(logging.WARNING)
 
-# hard-coded example file names for testing found in nta_app/input/ms1/
-example_pos_filename = "1a_MZmine3_pos.csv"
-example_neg_filename = "1b_MZmine3_neg.csv"
-example_tracer_filename = "WW2DW_Tracers_Amenable.csv"
-example_run_sequence_pos_filename = "WW2DW_sequence_cal.csv"
-example_run_sequence_neg_filename = "WW2DW_sequence_cal.csv"
-example_surrogate_filename = "qNTA_Surrogate_Input_File_WW2DW.csv"
-
-
 @api_key_required
 @csrf_exempt
 def ms1_run_api(request):
@@ -205,13 +197,13 @@ def ms1_run_api(request):
                 # handle case 1: the user has selected to run the test files
                 # get the path and filename of the test files
                 example_data_dir = os.path.join(os.path.dirname(os.path.abspath(__file__)), "..", "..", "input/ms1")
-                pos_input = os.path.join(example_data_dir, example_pos_filename)
-                neg_input = os.path.join(example_data_dir, example_neg_filename)
-                tracer_file = os.path.join(example_data_dir, example_tracer_filename)
-                run_sequence_pos_file = os.path.join(example_data_dir, example_run_sequence_pos_filename)
-                run_sequence_neg_file = os.path.join(example_data_dir, example_run_sequence_neg_filename)
+                pos_input = os.path.join(example_data_dir, EXAMPLE_POS_FILENAME)
+                neg_input = os.path.join(example_data_dir, EXAMPLE_NEG_FILENAME)
+                tracer_file = os.path.join(example_data_dir, EXAMPLE_TRACER_FILENAME)
+                run_sequence_pos_file = os.path.join(example_data_dir, EXAMPLE_RUN_SEQUENCE_POS_FILENAME)
+                run_sequence_neg_file = os.path.join(example_data_dir, EXAMPLE_RUN_SEQUENCE_NEG_FILENAME)
                 if parameters["do_qnta"] == "yes":
-                    qnta_file = os.path.join(example_data_dir, example_surrogate_filename)
+                    qnta_file = os.path.join(example_data_dir, EXAMPLE_SURROGATE_FILENAME)
                     inputParameters["qnta_input"][1] = qnta_file
                     qnta_df = file_manager.tracer_handler(qnta_file)
                 else: