diff --git a/.gitignore b/.gitignore
index 0628429..f684eec 100755
--- a/.gitignore
+++ b/.gitignore
@@ -9,3 +9,5 @@ GuPPy/runFiberPhotometryAnalysis.ipynb
 .clinerules/
 
 testing_data/
+
+CLAUDE.md
diff --git a/src/guppy/analysis/__init__.py b/src/guppy/analysis/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/src/guppy/analysis/artifact_removal.py b/src/guppy/analysis/artifact_removal.py
new file mode 100644
index 0000000..d3da042
--- /dev/null
+++ b/src/guppy/analysis/artifact_removal.py
@@ -0,0 +1,222 @@
+import logging
+
+import numpy as np
+
+logger = logging.getLogger(__name__)
+
+
+def remove_artifacts(
+    timeForLightsTurnOn,
+    storesList,
+    pair_name_to_tsNew,
+    pair_name_to_sampling_rate,
+    pair_name_to_coords,
+    name_to_data,
+    compound_name_to_ttl_timestamps,
+    method,
+):
+    if method == "concatenate":
+        name_to_corrected_data, pair_name_to_corrected_timestamps, compound_name_to_corrected_ttl_timestamps = (
+            processTimestampsForArtifacts(
+                timeForLightsTurnOn,
+                storesList,
+                pair_name_to_tsNew,
+                pair_name_to_sampling_rate,
+                pair_name_to_coords,
+                name_to_data,
+                compound_name_to_ttl_timestamps,
+            )
+        )
+        logger.info("Artifacts removed using concatenate method.")
+    elif method == "replace with NaN":
+        name_to_corrected_data, compound_name_to_corrected_ttl_timestamps = addingNaNtoChunksWithArtifacts(
+            storesList,
+            pair_name_to_tsNew,
+            pair_name_to_coords,
+            name_to_data,
+            compound_name_to_ttl_timestamps,
+        )
+        pair_name_to_corrected_timestamps = None
+        logger.info("Artifacts removed using NaN replacement method.")
+    else:
+        logger.error("Invalid artifact removal method specified.")
+        raise ValueError("Invalid artifact removal method specified.")
+
+    return name_to_corrected_data, pair_name_to_corrected_timestamps, compound_name_to_corrected_ttl_timestamps
+
+
+def addingNaNtoChunksWithArtifacts(
+    storesList, pair_name_to_tsNew, pair_name_to_coords, name_to_data, compound_name_to_ttl_timestamps
+):
+    logger.debug("Replacing chunks with artifacts by NaN values.")
+    names_for_storenames = storesList[1, :]
+    pair_names = pair_name_to_tsNew.keys()
+
+    name_to_corrected_data = {}
+    compound_name_to_corrected_ttl_timestamps = {}
+    for pair_name in pair_names:
+        tsNew = pair_name_to_tsNew[pair_name]
+        coords = pair_name_to_coords[pair_name]
+        for i in range(len(names_for_storenames)):
+            if (
+                "control_" + pair_name.lower() in names_for_storenames[i].lower()
+                or "signal_" + pair_name.lower() in names_for_storenames[i].lower()
+            ):  # changes done
+                data = name_to_data[names_for_storenames[i]].reshape(-1)
+                data = addingNaNValues(data=data, ts=tsNew, coords=coords)
+                name_to_corrected_data[names_for_storenames[i]] = data
+            else:
+                if "control" in names_for_storenames[i].lower() or "signal" in names_for_storenames[i].lower():
+                    continue
+                ttl_name = names_for_storenames[i]
+                compound_name = ttl_name + "_" + pair_name
+                ts = compound_name_to_ttl_timestamps[compound_name].reshape(-1)
+                ts = removeTTLs(ts=ts, coords=coords)
+                compound_name_to_corrected_ttl_timestamps[compound_name] = ts
+    logger.info("Chunks with artifacts are replaced by NaN values.")
+
+    return name_to_corrected_data, compound_name_to_corrected_ttl_timestamps
+
+
+# main function to align timestamps for control, signal and event timestamps for artifacts removal
+def processTimestampsForArtifacts(
+    timeForLightsTurnOn,
+    storesList,
+    pair_name_to_tsNew,
+    pair_name_to_sampling_rate,
+    pair_name_to_coords,
+    name_to_data,
+    compound_name_to_ttl_timestamps,
+):
+    logger.debug("Processing timestamps to get rid of artifacts using concatenate method...")
+    names_for_storenames = storesList[1, :]
+    pair_names = pair_name_to_tsNew.keys()
+
+    name_to_corrected_data = {}
+    pair_name_to_corrected_timestamps = {}
+    compound_name_to_corrected_ttl_timestamps = {}
+    for pair_name in pair_names:
+        sampling_rate = pair_name_to_sampling_rate[pair_name]
+        tsNew = pair_name_to_tsNew[pair_name]
+        coords = pair_name_to_coords[pair_name]
+
+        for i in range(len(names_for_storenames)):
+            if (
+                "control_" + pair_name.lower() in names_for_storenames[i].lower()
+                or "signal_" + pair_name.lower() in names_for_storenames[i].lower()
+            ):  # changes done
+                data = name_to_data[names_for_storenames[i]]
+                data, timestampNew = eliminateData(
+                    data=data,
+                    ts=tsNew,
+                    coords=coords,
+                    timeForLightsTurnOn=timeForLightsTurnOn,
+                    sampling_rate=sampling_rate,
+                )
+                name_to_corrected_data[names_for_storenames[i]] = data
+                pair_name_to_corrected_timestamps[pair_name] = timestampNew
+            else:
+                if "control" in names_for_storenames[i].lower() or "signal" in names_for_storenames[i].lower():
+                    continue
+                compound_name = names_for_storenames[i] + "_" + pair_name
+                ts = compound_name_to_ttl_timestamps[compound_name]
+                ts = eliminateTs(
+                    ts=ts,
+                    tsNew=tsNew,
+                    coords=coords,
+                    timeForLightsTurnOn=timeForLightsTurnOn,
+                    sampling_rate=sampling_rate,
+                )
+                compound_name_to_corrected_ttl_timestamps[compound_name] = ts
+
+    logger.info("Timestamps processed, artifacts are removed and good chunks are concatenated.")
+
+    return (
+        name_to_corrected_data,
+        pair_name_to_corrected_timestamps,
+        compound_name_to_corrected_ttl_timestamps,
+    )
+
+
+# helper function to process control and signal timestamps
+def eliminateData(*, data, ts, coords, timeForLightsTurnOn, sampling_rate):
+
+    if (data == 0).all() == True:
+        data = np.zeros(ts.shape[0])
+
+    arr = np.array([])
+    ts_arr = np.array([])
+    for i in range(coords.shape[0]):
+
+        index = np.where((ts > coords[i, 0]) & (ts < coords[i, 1]))[0]
+
+        if len(arr) == 0:
+            arr = np.concatenate((arr, data[index]))
+            sub = ts[index][0] - timeForLightsTurnOn
+            new_ts = ts[index] - sub
+            ts_arr = np.concatenate((ts_arr, new_ts))
+        else:
+            temp = data[index]
+            # new = temp + (arr[-1]-temp[0])
+            temp_ts = ts[index]
+            new_ts = temp_ts - (temp_ts[0] - ts_arr[-1])
+            arr = np.concatenate((arr, temp))
+            ts_arr = np.concatenate((ts_arr, new_ts + (1 / sampling_rate)))
+
+    # logger.info(arr.shape, ts_arr.shape)
+    return arr, ts_arr
+
+
+# helper function to align event timestamps with the control and signal timestamps
+def eliminateTs(*, ts, tsNew, coords, timeForLightsTurnOn, sampling_rate):
+
+    ts_arr = np.array([])
+    tsNew_arr = np.array([])
+    for i in range(coords.shape[0]):
+        tsNew_index = np.where((tsNew > coords[i, 0]) & (tsNew < coords[i, 1]))[0]
+        ts_index = np.where((ts > coords[i, 0]) & (ts < coords[i, 1]))[0]
+
+        if len(tsNew_arr) == 0:
+            sub = tsNew[tsNew_index][0] - timeForLightsTurnOn
+            tsNew_arr = np.concatenate((tsNew_arr, tsNew[tsNew_index] - sub))
+            ts_arr = np.concatenate((ts_arr, ts[ts_index] - sub))
+        else:
+            temp_tsNew = tsNew[tsNew_index]
+            temp_ts = ts[ts_index]
+            new_ts = temp_ts - (temp_tsNew[0] - tsNew_arr[-1])
+            new_tsNew = temp_tsNew - (temp_tsNew[0] - tsNew_arr[-1])
+            tsNew_arr = np.concatenate((tsNew_arr, new_tsNew + (1 / sampling_rate)))
+            ts_arr = np.concatenate((ts_arr, new_ts + (1 / sampling_rate)))
+
+    return ts_arr
+
+
+# adding nan values to removed chunks
+# when using artifacts removal method - replace with NaN
+def addingNaNValues(*, data, ts, coords):
+
+    if (data == 0).all() == True:
+        data = np.zeros(ts.shape[0])
+
+    arr = np.array([])
+    ts_index = np.arange(ts.shape[0])
+    for i in range(coords.shape[0]):
+
+        index = np.where((ts > coords[i, 0]) & (ts < coords[i, 1]))[0]
+        arr = np.concatenate((arr, index))
+
+    nan_indices = list(set(ts_index).symmetric_difference(arr))
+    data[nan_indices] = np.nan
+
+    return data
+
+
+# remove event TTLs which falls in the removed chunks
+# when using artifacts removal method - replace with NaN
+def removeTTLs(*, ts, coords):
+    ts_arr = np.array([])
+    for i in range(coords.shape[0]):
+        ts_index = np.where((ts > coords[i, 0]) & (ts < coords[i, 1]))[0]
+        ts_arr = np.concatenate((ts_arr, ts[ts_index]))
+
+    return ts_arr
diff --git a/src/guppy/analysis/combine_data.py b/src/guppy/analysis/combine_data.py
new file mode 100644
index 0000000..6ccddc0
--- /dev/null
+++ b/src/guppy/analysis/combine_data.py
@@ -0,0 +1,128 @@
+import logging
+import os
+
+import numpy as np
+
+from .io_utils import (
+    decide_naming_convention,
+    read_hdf5,
+    write_hdf5,
+)
+
+logger = logging.getLogger(__name__)
+
+
+def eliminateData(filepath_to_timestamps, filepath_to_data, timeForLightsTurnOn, sampling_rate):
+
+    arr = np.array([])
+    ts_arr = np.array([])
+    filepaths = list(filepath_to_timestamps.keys())
+    for filepath in filepaths:
+        ts = filepath_to_timestamps[filepath]
+        data = filepath_to_data[filepath]
+
+        if len(arr) == 0:
+            arr = np.concatenate((arr, data))
+            sub = ts[0] - timeForLightsTurnOn
+            new_ts = ts - sub
+            ts_arr = np.concatenate((ts_arr, new_ts))
+        else:
+            temp = data
+            temp_ts = ts
+            new_ts = temp_ts - (temp_ts[0] - ts_arr[-1])
+            arr = np.concatenate((arr, temp))
+            ts_arr = np.concatenate((ts_arr, new_ts + (1 / sampling_rate)))
+
+    return arr, ts_arr
+
+
+def eliminateTs(filepath_to_timestamps, filepath_to_ttl_timestamps, timeForLightsTurnOn, sampling_rate):
+
+    ts_arr = np.array([])
+    tsNew_arr = np.array([])
+    filepaths = list(filepath_to_timestamps.keys())
+    for filepath in filepaths:
+        ts = filepath_to_timestamps[filepath]
+        tsNew = filepath_to_ttl_timestamps[filepath]
+        if len(tsNew_arr) == 0:
+            sub = tsNew[0] - timeForLightsTurnOn
+            tsNew_arr = np.concatenate((tsNew_arr, tsNew - sub))
+            ts_arr = np.concatenate((ts_arr, ts - sub))
+        else:
+            temp_tsNew = tsNew
+            temp_ts = ts
+            new_ts = temp_ts - (temp_tsNew[0] - tsNew_arr[-1])
+            new_tsNew = temp_tsNew - (temp_tsNew[0] - tsNew_arr[-1])
+            tsNew_arr = np.concatenate((tsNew_arr, new_tsNew + (1 / sampling_rate)))
+            ts_arr = np.concatenate((ts_arr, new_ts + (1 / sampling_rate)))
+
+        # logger.info(event)
+        # logger.info(ts_arr)
+    return ts_arr
+
+
+def combine_data(filepath: list[list[str]], timeForLightsTurnOn, names_for_storenames, sampling_rate):
+    # filepath = [[folder1_output_0, folder2_output_0], [folder1_output_1, folder2_output_1], ...]
+
+    logger.debug("Processing timestamps for combining data...")
+
+    names_for_storenames = names_for_storenames[1, :]
+
+    for single_output_filepaths in filepath:
+        # single_output_filepaths = [folder1_output_i, folder2_output_i, ...]
+
+        path = decide_naming_convention(single_output_filepaths[0])
+
+        pair_name_to_tsNew = {}
+        for j in range(path.shape[1]):
+            name_1 = ((os.path.basename(path[0, j])).split(".")[0]).split("_")[-1]
+            name_2 = ((os.path.basename(path[1, j])).split(".")[0]).split("_")[-1]
+            if name_1 != name_2:
+                logger.error("Error in naming convention of files or Error in storesList file")
+                raise Exception("Error in naming convention of files or Error in storesList file")
+            pair_name = name_1
+
+            for i in range(len(names_for_storenames)):
+                if (
+                    "control_" + pair_name.lower() in names_for_storenames[i].lower()
+                    or "signal_" + pair_name.lower() in names_for_storenames[i].lower()
+                ):
+                    filepath_to_timestamps = {}
+                    filepath_to_data = {}
+                    for filepath in single_output_filepaths:
+                        ts = read_hdf5("timeCorrection_" + pair_name, filepath, "timestampNew")
+                        data = read_hdf5(names_for_storenames[i], filepath, "data").reshape(-1)
+                        filepath_to_timestamps[filepath] = ts
+                        filepath_to_data[filepath] = data
+
+                    data, timestampNew = eliminateData(
+                        filepath_to_timestamps,
+                        filepath_to_data,
+                        timeForLightsTurnOn,
+                        sampling_rate,
+                    )
+                    write_hdf5(data, names_for_storenames[i], single_output_filepaths[0], "data")
+                    pair_name_to_tsNew[pair_name] = timestampNew
+                else:
+                    if "control" in names_for_storenames[i].lower() or "signal" in names_for_storenames[i].lower():
+                        continue
+                    filepath_to_timestamps = {}
+                    filepath_to_ttl_timestamps = {}
+                    for filepath in single_output_filepaths:
+                        tsNew = read_hdf5("timeCorrection_" + pair_name, filepath, "timestampNew")
+                        if os.path.exists(os.path.join(filepath, names_for_storenames[i] + "_" + pair_name + ".hdf5")):
+                            ts = read_hdf5(names_for_storenames[i] + "_" + pair_name, filepath, "ts").reshape(-1)
+                        else:
+                            ts = np.array([])
+                        filepath_to_timestamps[filepath] = tsNew
+                        filepath_to_ttl_timestamps[filepath] = ts
+
+                    ts = eliminateTs(
+                        filepath_to_timestamps,
+                        filepath_to_ttl_timestamps,
+                        timeForLightsTurnOn,
+                        sampling_rate,
+                    )
+                    write_hdf5(ts, names_for_storenames[i] + "_" + pair_name, single_output_filepaths[0], "ts")
+        for pair_name, tsNew in pair_name_to_tsNew.items():
+            write_hdf5(tsNew, "timeCorrection_" + pair_name, single_output_filepaths[0], "timestampNew")
diff --git a/src/guppy/analysis/control_channel.py b/src/guppy/analysis/control_channel.py
new file mode 100644
index 0000000..605bd17
--- /dev/null
+++ b/src/guppy/analysis/control_channel.py
@@ -0,0 +1,122 @@
+import logging
+import os
+import shutil
+
+import numpy as np
+import pandas as pd
+from scipy import signal as ss
+from scipy.optimize import curve_fit
+
+from .io_utils import (
+    read_hdf5,
+    write_hdf5,
+)
+
+logger = logging.getLogger(__name__)
+
+
+# This function just creates placeholder Control-HDF5 files that are then immediately overwritten later on in the pipeline.
+# TODO: Refactor this function to avoid unnecessary file creation.
+# function to add control channel when there is no
+# isosbestic control channel and update the storeslist file
+def add_control_channel(filepath, arr):
+
+    storenames = arr[0, :]
+    storesList = np.char.lower(arr[1, :])
+
+    keep_control = np.array([])
+    # check a case if there is isosbestic control channel present
+    for i in range(storesList.shape[0]):
+        if "control" in storesList[i].lower():
+            name = storesList[i].split("_")[-1]
+            new_str = "signal_" + str(name).lower()
+            find_signal = [True for i in storesList if i == new_str]
+            if len(find_signal) > 1:
+                logger.error("Error in naming convention of files or Error in storesList file")
+                raise Exception("Error in naming convention of files or Error in storesList file")
+            if len(find_signal) == 0:
+                logger.error(
+                    "Isosbectic control channel parameter is set to False and still \
+							 	 storeslist file shows there is control channel present"
+                )
+                raise Exception(
+                    "Isosbectic control channel parameter is set to False and still \
+							 	 storeslist file shows there is control channel present"
+                )
+        else:
+            continue
+
+    for i in range(storesList.shape[0]):
+        if "signal" in storesList[i].lower():
+            name = storesList[i].split("_")[-1]
+            new_str = "control_" + str(name).lower()
+            find_signal = [True for i in storesList if i == new_str]
+            if len(find_signal) == 0:
+                src, dst = os.path.join(filepath, arr[0, i] + ".hdf5"), os.path.join(
+                    filepath, "cntrl" + str(i) + ".hdf5"
+                )
+                shutil.copyfile(src, dst)
+                arr = np.concatenate((arr, [["cntrl" + str(i)], ["control_" + str(arr[1, i].split("_")[-1])]]), axis=1)
+
+    np.savetxt(os.path.join(filepath, "storesList.csv"), arr, delimiter=",", fmt="%s")
+
+    return arr
+
+
+# main function to create control channel using
+# signal channel and save it to a file
+def create_control_channel(filepath, arr, window=5001):
+
+    storenames = arr[0, :]
+    storesList = arr[1, :]
+
+    for i in range(storesList.shape[0]):
+        event_name, event = storesList[i], storenames[i]
+        if "control" in event_name.lower() and "cntrl" in event.lower():
+            logger.debug("Creating control channel from signal channel using curve-fitting")
+            name = event_name.split("_")[-1]
+            signal = read_hdf5("signal_" + name, filepath, "data")
+            timestampNew = read_hdf5("timeCorrection_" + name, filepath, "timestampNew")
+            sampling_rate = np.full(timestampNew.shape, np.nan)
+            sampling_rate[0] = read_hdf5("timeCorrection_" + name, filepath, "sampling_rate")[0]
+
+            control = helper_create_control_channel(signal, timestampNew, window)
+
+            write_hdf5(control, event_name, filepath, "data")
+            d = {"timestamps": timestampNew, "data": control, "sampling_rate": sampling_rate}
+            df = pd.DataFrame(d)
+            df.to_csv(os.path.join(os.path.dirname(filepath), event.lower() + ".csv"), index=False)
+            logger.info("Control channel from signal channel created using curve-fitting")
+
+
+# TODO: figure out why a control channel is created for both timestamp correction and z-score steps.
+# helper function to create control channel using signal channel
+# by curve fitting signal channel to exponential function
+# when there is no isosbestic control channel is present
+def helper_create_control_channel(signal, timestamps, window):
+    # check if window is greater than signal shape
+    if window > signal.shape[0]:
+        window = ((signal.shape[0] + 1) / 2) + 1
+        if window % 2 != 0:
+            window = window
+        else:
+            window = window + 1
+
+    filtered_signal = ss.savgol_filter(signal, window_length=window, polyorder=3)
+
+    p0 = [5, 50, 60]
+
+    try:
+        popt, pcov = curve_fit(curveFitFn, timestamps, filtered_signal, p0)
+    except Exception as e:
+        logger.error(str(e))
+
+    # logger.info('Curve Fit Parameters : ', popt)
+    control = curveFitFn(timestamps, *popt)
+
+    return control
+
+
+# curve fit exponential function
+def curveFitFn(x, a, b, c):
+    return a + (b * np.exp(-(1 / c) * x))
diff --git a/src/guppy/analysis/io_utils.py b/src/guppy/analysis/io_utils.py
new file mode 100644
index 0000000..b467c37
--- /dev/null
+++ b/src/guppy/analysis/io_utils.py
@@ -0,0 +1,196 @@
+import fnmatch
+import glob
+import logging
+import os
+import re
+
+import h5py
+import numpy as np
+
+logger = logging.getLogger(__name__)
+
+
+def takeOnlyDirs(paths):
+    removePaths = []
+    for p in paths:
+        if os.path.isfile(p):
+            removePaths.append(p)
+    return list(set(paths) - set(removePaths))
+
+
+# find files by ignoring the case sensitivity
+def find_files(path, glob_path, ignore_case=False):
+    rule = (
+        re.compile(fnmatch.translate(glob_path), re.IGNORECASE)
+        if ignore_case
+        else re.compile(fnmatch.translate(glob_path))
+    )
+
+    no_bytes_path = os.listdir(os.path.expanduser(path))
+    str_path = []
+
+    # converting byte object to string
+    for x in no_bytes_path:
+        try:
+            str_path.append(x.decode("utf-8"))
+        except:
+            str_path.append(x)
+    return [os.path.join(path, n) for n in str_path if rule.match(n)]
+
+
+# check if dealing with TDT files or csv files
+def check_TDT(filepath):
+    path = glob.glob(os.path.join(filepath, "*.tsq"))
+    if len(path) > 0:
+        return True
+    else:
+        return False
+
+
+# function to read hdf5 file
+def read_hdf5(event, filepath, key):
+    if event:
+        event = event.replace("\\", "_")
+        event = event.replace("/", "_")
+        op = os.path.join(filepath, event + ".hdf5")
+    else:
+        op = filepath
+
+    if os.path.exists(op):
+        with h5py.File(op, "r") as f:
+            arr = np.asarray(f[key])
+    else:
+        logger.error(f"{event}.hdf5 file does not exist")
+        raise Exception("{}.hdf5 file does not exist".format(event))
+
+    return arr
+
+
+# function to write hdf5 file
+def write_hdf5(data, event, filepath, key):
+    event = event.replace("\\", "_")
+    event = event.replace("/", "_")
+    op = os.path.join(filepath, event + ".hdf5")
+
+    # if file does not exist create a new file
+    if not os.path.exists(op):
+        with h5py.File(op, "w") as f:
+            if type(data) is np.ndarray:
+                f.create_dataset(key, data=data, maxshape=(None,), chunks=True)
+            else:
+                f.create_dataset(key, data=data)
+
+    # if file already exists, append data to it or add a new key to it
+    else:
+        with h5py.File(op, "r+") as f:
+            if key in list(f.keys()):
+                if type(data) is np.ndarray:
+                    f[key].resize(data.shape)
+                    arr = f[key]
+                    arr[:] = data
+                else:
+                    arr = f[key]
+                    arr = data
+            else:
+                if type(data) is np.ndarray:
+                    f.create_dataset(key, data=data, maxshape=(None,), chunks=True)
+                else:
+                    f.create_dataset(key, data=data)
+
+
+# function to check if the naming convention for saving storeslist file was followed or not
+def decide_naming_convention(filepath):
+    path_1 = find_files(filepath, "control_*", ignore_case=True)  # glob.glob(os.path.join(filepath, 'control*'))
+
+    path_2 = find_files(filepath, "signal_*", ignore_case=True)  # glob.glob(os.path.join(filepath, 'signal*'))
+
+    path = sorted(path_1 + path_2, key=str.casefold)
+    if len(path) % 2 != 0:
+        logger.error("There are not equal number of Control and Signal data")
+        raise Exception("There are not equal number of Control and Signal data")
+
+    path = np.asarray(path).reshape(2, -1)
+
+    return path
+
+
+# function to read coordinates file which was saved by selecting chunks for artifacts removal
+def fetchCoords(filepath, naming, data):
+
+    path = os.path.join(filepath, "coordsForPreProcessing_" + naming + ".npy")
+
+    if not os.path.exists(path):
+        coords = np.array([0, data[-1]])
+    else:
+        coords = np.load(os.path.join(filepath, "coordsForPreProcessing_" + naming + ".npy"))[:, 0]
+
+    if coords.shape[0] % 2 != 0:
+        logger.error("Number of values in coordsForPreProcessing file is not even.")
+        raise Exception("Number of values in coordsForPreProcessing file is not even.")
+
+    coords = coords.reshape(-1, 2)
+
+    return coords
+
+
+def get_coords(filepath, name, tsNew, removeArtifacts):  # TODO: Make less redundant with fetchCoords
+    if removeArtifacts == True:
+        coords = fetchCoords(filepath, name, tsNew)
+    else:
+        dt = tsNew[1] - tsNew[0]
+        coords = np.array([[tsNew[0] - dt, tsNew[-1] + dt]])
+    return coords
+
+
+def get_all_stores_for_combining_data(folderNames):
+    op = []
+    for i in range(100):
+        temp = []
+        match = r"[\s\S]*" + "_output_" + str(i)
+        for j in folderNames:
+            temp.append(re.findall(match, j))
+        temp = sorted(list(np.concatenate(temp).flatten()), key=str.casefold)
+        if len(temp) > 0:
+            op.append(temp)
+
+    return op
+
+
+# for combining data, reading storeslist file from both data and create a new storeslist array
+def check_storeslistfile(folderNames):
+    storesList = np.array([[], []])
+    for i in range(len(folderNames)):
+        filepath = folderNames[i]
+        storesListPath = takeOnlyDirs(glob.glob(os.path.join(filepath, "*_output_*")))
+        for j in range(len(storesListPath)):
+            filepath = storesListPath[j]
+            storesList = np.concatenate(
+                (
+                    storesList,
+                    np.genfromtxt(os.path.join(filepath, "storesList.csv"), dtype="str", delimiter=",").reshape(2, -1),
+                ),
+                axis=1,
+            )
+
+    storesList = np.unique(storesList, axis=1)
+
+    return storesList
+
+
+def get_control_and_signal_channel_names(storesList):
+    storenames = storesList[0, :]
+    names_for_storenames = storesList[1, :]
+
+    channels_arr = []
+    for i in range(names_for_storenames.shape[0]):
+        if "control" in names_for_storenames[i].lower() or "signal" in names_for_storenames[i].lower():
+            channels_arr.append(names_for_storenames[i])
+
+    channels_arr = sorted(channels_arr, key=str.casefold)
+    try:
+        channels_arr = np.asarray(channels_arr).reshape(2, -1)
+    except:
+        logger.error("Error in saving stores list file or spelling mistake for control or signal")
+        raise Exception("Error in saving stores list file or spelling mistake for control or signal")
+
+    return channels_arr
diff --git a/src/guppy/analysis/standard_io.py b/src/guppy/analysis/standard_io.py
new file mode 100644
index 0000000..e7fe8e0
--- /dev/null
+++ b/src/guppy/analysis/standard_io.py
@@ -0,0 +1,210 @@
+import logging
+import os
+
+import numpy as np
+
+from .io_utils import (
+    decide_naming_convention,
+    fetchCoords,
+    get_control_and_signal_channel_names,
+    read_hdf5,
+    write_hdf5,
+)
+
+logger = logging.getLogger(__name__)
+
+
+def read_control_and_signal(filepath, storesList):
+    channels_arr = get_control_and_signal_channel_names(storesList)
+    storenames = storesList[0, :]
+    names_for_storenames = storesList[1, :]
+
+    name_to_data = {}
+    name_to_timestamps = {}
+    name_to_sampling_rate = {}
+    name_to_npoints = {}
+
+    for i in range(channels_arr.shape[1]):
+        control_name = channels_arr[0, i]
+        signal_name = channels_arr[1, i]
+        idx_c = np.where(names_for_storenames == control_name)[0]
+        idx_s = np.where(names_for_storenames == signal_name)[0]
+        control_storename = storenames[idx_c[0]]
+        signal_storename = storenames[idx_s[0]]
+
+        control_data = read_hdf5(control_storename, filepath, "data")
+        signal_data = read_hdf5(signal_storename, filepath, "data")
+        control_timestamps = read_hdf5(control_storename, filepath, "timestamps")
+        signal_timestamps = read_hdf5(signal_storename, filepath, "timestamps")
+        control_sampling_rate = read_hdf5(control_storename, filepath, "sampling_rate")
+        signal_sampling_rate = read_hdf5(signal_storename, filepath, "sampling_rate")
+        try:  # TODO: define npoints for csv datasets
+            control_npoints = read_hdf5(control_storename, filepath, "npoints")
+            signal_npoints = read_hdf5(signal_storename, filepath, "npoints")
+        except KeyError:  # npoints is not defined for csv datasets
+            control_npoints = None
+            signal_npoints = None
+
+        name_to_data[control_name] = control_data
+        name_to_data[signal_name] = signal_data
+        name_to_timestamps[control_name] = control_timestamps
+        name_to_timestamps[signal_name] = signal_timestamps
+        name_to_sampling_rate[control_name] = control_sampling_rate
+        name_to_sampling_rate[signal_name] = signal_sampling_rate
+        name_to_npoints[control_name] = control_npoints
+        name_to_npoints[signal_name] = signal_npoints
+
+    return name_to_data, name_to_timestamps, name_to_sampling_rate, name_to_npoints
+
+
+def read_ttl(filepath, storesList):
+    channels_arr = get_control_and_signal_channel_names(storesList)
+    storenames = storesList[0, :]
+    names_for_storenames = storesList[1, :]
+
+    name_to_timestamps = {}
+    for storename, name in zip(storenames, names_for_storenames):
+        if name in channels_arr:
+            continue
+        timestamps = read_hdf5(storename, filepath, "timestamps")
+        name_to_timestamps[name] = timestamps
+
+    return name_to_timestamps
+
+
+def write_corrected_timestamps(
+    filepath, corrected_name_to_timestamps, name_to_timestamps, name_to_sampling_rate, name_to_correctionIndex
+):
+    for name, correctionIndex in name_to_correctionIndex.items():
+        timestamps = name_to_timestamps[name]
+        corrected_timestamps = corrected_name_to_timestamps[name]
+        sampling_rate = name_to_sampling_rate[name]
+        if sampling_rate.shape == ():  # numpy scalar
+            sampling_rate = np.asarray([sampling_rate])
+        name_1 = name.split("_")[-1]
+        write_hdf5(np.asarray([timestamps[0]]), "timeCorrection_" + name_1, filepath, "timeRecStart")
+        write_hdf5(corrected_timestamps, "timeCorrection_" + name_1, filepath, "timestampNew")
+        write_hdf5(correctionIndex, "timeCorrection_" + name_1, filepath, "correctionIndex")
+        write_hdf5(sampling_rate, "timeCorrection_" + name_1, filepath, "sampling_rate")
+
+
+def write_corrected_data(filepath, name_to_corrected_data):
+    for name, data in name_to_corrected_data.items():
+        write_hdf5(data, name, filepath, "data")
+
+
+def write_corrected_ttl_timestamps(
+    filepath,
+    compound_name_to_corrected_ttl_timestamps,
+):
+    logger.debug("Applying correction of timestamps to the data and event timestamps")
+    for compound_name, corrected_ttl_timestamps in compound_name_to_corrected_ttl_timestamps.items():
+        write_hdf5(corrected_ttl_timestamps, compound_name, filepath, "ts")
+    logger.info("Timestamps corrections applied to the data and event timestamps.")
+
+
+def read_corrected_data(control_path, signal_path, filepath, name):
+    control = read_hdf5("", control_path, "data").reshape(-1)
+    signal = read_hdf5("", signal_path, "data").reshape(-1)
+    tsNew = read_hdf5("timeCorrection_" + name, filepath, "timestampNew")
+
+    return control, signal, tsNew
+
+
+def write_zscore(filepath, name, z_score, dff, control_fit, temp_control_arr):
+    write_hdf5(z_score, "z_score_" + name, filepath, "data")
+    write_hdf5(dff, "dff_" + name, filepath, "data")
+    write_hdf5(control_fit, "cntrl_sig_fit_" + name, filepath, "data")
+    if temp_control_arr is not None:
+        write_hdf5(temp_control_arr, "control_" + name, filepath, "data")
+
+
+def read_corrected_timestamps_pairwise(filepath):
+    pair_name_to_tsNew = {}
+    pair_name_to_sampling_rate = {}
+    path = decide_naming_convention(filepath)
+    for j in range(path.shape[1]):
+        name_1 = ((os.path.basename(path[0, j])).split(".")[0]).split("_")
+        name_2 = ((os.path.basename(path[1, j])).split(".")[0]).split("_")
+        if name_1[-1] != name_2[-1]:
+            logger.error("Error in naming convention of files or Error in storesList file")
+            raise Exception("Error in naming convention of files or Error in storesList file")
+        name = name_1[-1]
+
+        tsNew = read_hdf5("timeCorrection_" + name, filepath, "timestampNew")
+        sampling_rate = read_hdf5("timeCorrection_" + name, filepath, "sampling_rate")[0]
+        pair_name_to_tsNew[name] = tsNew
+        pair_name_to_sampling_rate[name] = sampling_rate
+    return pair_name_to_tsNew, pair_name_to_sampling_rate
+
+
+def read_coords_pairwise(filepath, pair_name_to_tsNew):
+    pair_name_to_coords = {}
+    path = decide_naming_convention(filepath)
+    for j in range(path.shape[1]):
+        name_1 = ((os.path.basename(path[0, j])).split(".")[0]).split("_")
+        name_2 = ((os.path.basename(path[1, j])).split(".")[0]).split("_")
+        if name_1[-1] != name_2[-1]:
+            logger.error("Error in naming convention of files or Error in storesList file")
+            raise Exception("Error in naming convention of files or Error in storesList file")
+        pair_name = name_1[-1]
+
+        tsNew = pair_name_to_tsNew[pair_name]
+        coords = fetchCoords(filepath, pair_name, tsNew)
+        pair_name_to_coords[pair_name] = coords
+    return pair_name_to_coords
+
+
+def read_corrected_data_dict(filepath, storesList):  # TODO: coordinate with read_corrected_data
+    name_to_corrected_data = {}
+    storenames = storesList[0, :]
+    names_for_storenames = storesList[1, :]
+    control_and_signal_names = get_control_and_signal_channel_names(storesList)
+
+    for storename, name in zip(storenames, names_for_storenames):
+        if name not in control_and_signal_names:
+            continue
+        data = read_hdf5(name, filepath, "data").reshape(-1)
+        name_to_corrected_data[name] = data
+
+    return name_to_corrected_data
+
+
+def read_corrected_ttl_timestamps(filepath, storesList):
+    compound_name_to_ttl_timestamps = {}
+    storenames = storesList[0, :]
+    names_for_storenames = storesList[1, :]
+    arr = get_control_and_signal_channel_names(storesList)
+
+    for storename, name in zip(storenames, names_for_storenames):
+        if name in arr:
+            continue
+        ttl_name = name
+        for i in range(arr.shape[1]):
+            name_1 = arr[0, i].split("_")[-1]
+            name_2 = arr[1, i].split("_")[-1]
+            if name_1 != name_2:
+                logger.error("Error in naming convention of files or Error in storesList file")
+                raise Exception("Error in naming convention of files or Error in storesList file")
+            compound_name = ttl_name + "_" + name_1
+            ts = read_hdf5(compound_name, filepath, "ts")
+            compound_name_to_ttl_timestamps[compound_name] = ts
+
+    return compound_name_to_ttl_timestamps
+
+
+def write_artifact_corrected_timestamps(filepath, pair_name_to_corrected_timestamps):
+    for pair_name, timestamps in pair_name_to_corrected_timestamps.items():
+        write_hdf5(timestamps, "timeCorrection_" + pair_name, filepath, "timestampNew")
+
+
+def write_artifact_removal(
+    filepath,
+    name_to_corrected_data,
+    pair_name_to_corrected_timestamps,
+    compound_name_to_corrected_ttl_timestamps=None,
+):
+    write_corrected_data(filepath, name_to_corrected_data)
+    write_corrected_ttl_timestamps(filepath, compound_name_to_corrected_ttl_timestamps)
+    if pair_name_to_corrected_timestamps is not None:
+        write_artifact_corrected_timestamps(filepath, pair_name_to_corrected_timestamps)
diff --git a/src/guppy/analysis/timestamp_correction.py b/src/guppy/analysis/timestamp_correction.py
new file mode 100644
index 0000000..0806fb8
--- /dev/null
+++ b/src/guppy/analysis/timestamp_correction.py
@@ -0,0 +1,200 @@
+import logging
+
+import numpy as np
+
+from .io_utils import get_control_and_signal_channel_names
+
+logger = logging.getLogger(__name__)
+
+
+def correct_timestamps(
+    timeForLightsTurnOn,
+    storesList,
+    name_to_timestamps,
+    name_to_data,
+    name_to_sampling_rate,
+    name_to_npoints,
+    name_to_timestamps_ttl,
+    mode,
+):
+    name_to_corrected_timestamps, name_to_correctionIndex, name_to_corrected_data = timestampCorrection(
+        timeForLightsTurnOn,
+        storesList,
+        name_to_timestamps,
+        name_to_data,
+        name_to_sampling_rate,
+        name_to_npoints,
+        mode=mode,
+    )
+    compound_name_to_corrected_ttl_timestamps = decide_naming_and_applyCorrection_ttl(
+        timeForLightsTurnOn,
+        storesList,
+        name_to_timestamps_ttl,
+        name_to_timestamps,
+        name_to_data,
+        mode=mode,
+    )
+
+    return (
+        name_to_corrected_timestamps,
+        name_to_correctionIndex,
+        name_to_corrected_data,
+        compound_name_to_corrected_ttl_timestamps,
+    )
+
+
+# function to correct timestamps after eliminating first few seconds of the data (for csv or TDT data depending on mode)
+def timestampCorrection(
+    timeForLightsTurnOn,
+    storesList,
+    name_to_timestamps,
+    name_to_data,
+    name_to_sampling_rate,
+    name_to_npoints,
+    mode,
+):
+    logger.debug(
+        f"Correcting timestamps by getting rid of the first {timeForLightsTurnOn} seconds and convert timestamps to seconds"
+    )
+    if mode not in ["tdt", "csv"]:
+        logger.error("Mode should be either 'tdt' or 'csv'")
+        raise ValueError("Mode should be either 'tdt' or 'csv'")
+    name_to_corrected_timestamps = {}
+    name_to_correctionIndex = {}
+    name_to_corrected_data = {}
+    storenames = storesList[0, :]
+    names_for_storenames = storesList[1, :]
+    channels_arr = get_control_and_signal_channel_names(storesList)
+
+    indices = check_cntrl_sig_length(channels_arr, name_to_data)
+
+    for i in range(channels_arr.shape[1]):
+        control_name = channels_arr[0, i]
+        signal_name = channels_arr[1, i]
+        name_1 = channels_arr[0, i].split("_")[-1]
+        name_2 = channels_arr[1, i].split("_")[-1]
+        if name_1 != name_2:
+            logger.error("Error in naming convention of files or Error in storesList file")
+            raise Exception("Error in naming convention of files or Error in storesList file")
+
+        # dirname = os.path.dirname(path[i])
+        idx = np.where(names_for_storenames == indices[i])[0]
+
+        if idx.shape[0] == 0:
+            logger.error(f"{channels_arr[0,i]} does not exist in the stores list file.")
+            raise Exception("{} does not exist in the stores list file.".format(channels_arr[0, i]))
+
+        name = names_for_storenames[idx][0]
+        timestamp = name_to_timestamps[name]
+        sampling_rate = name_to_sampling_rate[name]
+        npoints = name_to_npoints[name]
+
+        if mode == "tdt":
+            timeRecStart = timestamp[0]
+            timestamps = np.subtract(timestamp, timeRecStart)
+            adder = np.arange(npoints) / sampling_rate
+            lengthAdder = adder.shape[0]
+            timestampNew = np.zeros((len(timestamps), lengthAdder))
+            for i in range(lengthAdder):
+                timestampNew[:, i] = np.add(timestamps, adder[i])
+            timestampNew = (timestampNew.T).reshape(-1, order="F")
+            correctionIndex = np.where(timestampNew >= timeForLightsTurnOn)[0]
+            timestampNew = timestampNew[correctionIndex]
+        elif mode == "csv":
+            correctionIndex = np.where(timestamp >= timeForLightsTurnOn)[0]
+            timestampNew = timestamp[correctionIndex]
+
+        for displayName in [control_name, signal_name]:
+            name_to_corrected_timestamps[displayName] = timestampNew
+            name_to_correctionIndex[displayName] = correctionIndex
+            data = name_to_data[displayName]
+            if (data == 0).all() == True:
+                name_to_corrected_data[displayName] = data
+            else:
+                name_to_corrected_data[displayName] = data[correctionIndex]
+
+    logger.info("Timestamps corrected and converted to seconds.")
+    return name_to_corrected_timestamps, name_to_correctionIndex, name_to_corrected_data
+
+
+def decide_naming_and_applyCorrection_ttl(
+    timeForLightsTurnOn,
+    storesList,
+    name_to_timestamps_ttl,
+    name_to_timestamps,
+    name_to_data,
+    mode,
+):
+    logger.debug("Applying correction of timestamps to the data and event timestamps")
+    storenames = storesList[0, :]
+    names_for_storenames = storesList[1, :]
+    arr = get_control_and_signal_channel_names(storesList)
+    indices = check_cntrl_sig_length(arr, name_to_data)
+
+    compound_name_to_corrected_ttl_timestamps = {}
+    for ttl_name, ttl_timestamps in name_to_timestamps_ttl.items():
+        for i in range(arr.shape[1]):
+            name_1 = arr[0, i].split("_")[-1]
+            name_2 = arr[1, i].split("_")[-1]
+            if name_1 != name_2:
+                logger.error("Error in naming convention of files or Error in storesList file")
+                raise Exception("Error in naming convention of files or Error in storesList file")
+
+            idx = np.where(names_for_storenames == indices[i])[0]
+            if idx.shape[0] == 0:
+                logger.error(f"{arr[0,i]} does not exist in the stores list file.")
+                raise Exception("{} does not exist in the stores list file.".format(arr[0, i]))
+
+            name = names_for_storenames[idx][0]
+            timestamps = name_to_timestamps[name]
+            timeRecStart = timestamps[0]
+            corrected_ttl_timestamps = applyCorrection_ttl(
+                timeForLightsTurnOn,
+                timeRecStart,
+                ttl_timestamps,
+                mode,
+            )
+            compound_name = ttl_name + "_" + name_1
+            compound_name_to_corrected_ttl_timestamps[compound_name] = corrected_ttl_timestamps
+
+    logger.info("Timestamps corrections applied to the data and event timestamps.")
+    return compound_name_to_corrected_ttl_timestamps
+
+
+def applyCorrection_ttl(
+    timeForLightsTurnOn,
+    timeRecStart,
+    ttl_timestamps,
+    mode,
+):
+    corrected_ttl_timestamps = ttl_timestamps
+    if mode == "tdt":
+        res = (corrected_ttl_timestamps >= timeRecStart).all()
+        if res == True:
+            corrected_ttl_timestamps = np.subtract(corrected_ttl_timestamps, timeRecStart)
+            corrected_ttl_timestamps = np.subtract(corrected_ttl_timestamps, timeForLightsTurnOn)
+        else:
+            corrected_ttl_timestamps = np.subtract(corrected_ttl_timestamps, timeForLightsTurnOn)
+    elif mode == "csv":
+        corrected_ttl_timestamps = np.subtract(corrected_ttl_timestamps, timeForLightsTurnOn)
+    return corrected_ttl_timestamps
+
+
+# function to check control and signal channel has same length
+# if not, take a smaller length and do pre-processing
+def check_cntrl_sig_length(channels_arr, name_to_data):
+
+    indices = []
+    for i in range(channels_arr.shape[1]):
+        control_name = channels_arr[0, i]
+        signal_name = channels_arr[1, i]
+        control = name_to_data[control_name]
+        signal = name_to_data[signal_name]
+        if control.shape[0] < signal.shape[0]:
+            indices.append(control_name)
+        elif control.shape[0] > signal.shape[0]:
+            indices.append(signal_name)
+        else:
+            indices.append(signal_name)
+
+    return indices
diff --git a/src/guppy/analysis/z_score.py b/src/guppy/analysis/z_score.py
new file mode 100644
index 0000000..34b29ee
--- /dev/null
+++ b/src/guppy/analysis/z_score.py
@@ -0,0 +1,148 @@
+import logging
+
+import numpy as np
+from scipy import signal as ss
+
+from .control_channel import helper_create_control_channel
+
+logger = logging.getLogger(__name__)
+
+
+# high-level function to compute z-score and deltaF/F
+def compute_z_score(
+    control,
+    signal,
+    tsNew,
+    coords,
+    artifactsRemovalMethod,
+    filter_window,
+    isosbestic_control,
+    zscore_method,
+    baseline_start,
+    baseline_end,
+):
+    if (control == 0).all() == True:
+        control = np.zeros(tsNew.shape[0])
+
+    z_score_arr = np.array([])
+    norm_data_arr = np.full(tsNew.shape[0], np.nan)
+    control_fit_arr = np.full(tsNew.shape[0], np.nan)
+    temp_control_arr = np.full(tsNew.shape[0], np.nan)
+
+    # for artifacts removal, each chunk which was selected by user is being processed individually and then
+    # z-score is calculated
+    for i in range(coords.shape[0]):
+        tsNew_index = np.where((tsNew > coords[i, 0]) & (tsNew < coords[i, 1]))[0]
+        if isosbestic_control == False:
+            control_arr = helper_create_control_channel(signal[tsNew_index], tsNew[tsNew_index], window=101)
+            signal_arr = signal[tsNew_index]
+            norm_data, control_fit = execute_controlFit_dff(control_arr, signal_arr, isosbestic_control, filter_window)
+            temp_control_arr[tsNew_index] = control_arr
+            if i < coords.shape[0] - 1:
+                blank_index = np.where((tsNew > coords[i, 1]) & (tsNew < coords[i + 1, 0]))[0]
+                temp_control_arr[blank_index] = np.full(blank_index.shape[0], np.nan)
+        else:
+            control_arr = control[tsNew_index]
+            signal_arr = signal[tsNew_index]
+            norm_data, control_fit = execute_controlFit_dff(control_arr, signal_arr, isosbestic_control, filter_window)
+        norm_data_arr[tsNew_index] = norm_data
+        control_fit_arr[tsNew_index] = control_fit
+
+    if artifactsRemovalMethod == "concatenate":
+        norm_data_arr = norm_data_arr[~np.isnan(norm_data_arr)]
+        control_fit_arr = control_fit_arr[~np.isnan(control_fit_arr)]
+    z_score = z_score_computation(norm_data_arr, tsNew, zscore_method, baseline_start, baseline_end)
+    z_score_arr = np.concatenate((z_score_arr, z_score))
+
+    # handle the case if there are chunks being cut in the front and the end
+    if isosbestic_control == False:
+        coords = coords.flatten()
+        # front chunk
+        idx = np.where((tsNew >= tsNew[0]) & (tsNew < coords[0]))[0]
+        temp_control_arr[idx] = np.full(idx.shape[0], np.nan)
+        # end chunk
+        idx = np.where((tsNew > coords[-1]) & (tsNew <= tsNew[-1]))[0]
+        temp_control_arr[idx] = np.full(idx.shape[0], np.nan)
+    else:
+        temp_control_arr = None
+
+    return z_score_arr, norm_data_arr, control_fit_arr, temp_control_arr
+
+
+# function to filter control and signal channel, also execute above two function : controlFit and deltaFF
+# function will also take care if there is only signal channel and no control channel
+# if there is only signal channel, z-score will be computed using just signal channel
+def execute_controlFit_dff(control, signal, isosbestic_control, filter_window):
+
+    if isosbestic_control == False:
+        signal_smooth = filterSignal(filter_window, signal)  # ss.filtfilt(b, a, signal)
+        control_fit = controlFit(control, signal_smooth)
+        norm_data = deltaFF(signal_smooth, control_fit)
+    else:
+        control_smooth = filterSignal(filter_window, control)  # ss.filtfilt(b, a, control)
+        signal_smooth = filterSignal(filter_window, signal)  # ss.filtfilt(b, a, signal)
+        control_fit = controlFit(control_smooth, signal_smooth)
+        norm_data = deltaFF(signal_smooth, control_fit)
+
+    return norm_data, control_fit
+
+
+# function to compute deltaF/F using fitted control channel and filtered signal channel
+def deltaFF(signal, control):
+
+    res = np.subtract(signal, control)
+    normData = np.divide(res, control)
+    # deltaFF = normData
+    normData = normData * 100
+
+    return normData
+
+
+# function to fit control channel to signal channel
+def controlFit(control, signal):
+
+    p = np.polyfit(control, signal, 1)
+    arr = (p[0] * control) + p[1]
+    return arr
+
+
+def filterSignal(filter_window, signal):
+    if filter_window == 0:
+        return signal
+    elif filter_window > 1:
+        b = np.divide(np.ones((filter_window,)), filter_window)
+        a = 1
+        filtered_signal = ss.filtfilt(b, a, signal)
+        return filtered_signal
+    else:
+        raise Exception("Moving average filter window value is not correct.")
+
+
+# function to compute z-score based on z-score computation method
+def z_score_computation(dff, timestamps, zscore_method, baseline_start, baseline_end):
+    if zscore_method == "standard z-score":
+        numerator = np.subtract(dff, np.nanmean(dff))
+        zscore = np.divide(numerator, np.nanstd(dff))
+    elif zscore_method == "baseline z-score":
+        idx = np.where((timestamps > baseline_start) & (timestamps < baseline_end))[0]
+        if idx.shape[0] == 0:
+            logger.error(
+                "Baseline Window Parameters for baseline z-score computation zscore_method \
+							are not correct."
+            )
+            raise Exception(
+                "Baseline Window Parameters for baseline z-score computation zscore_method \
+							are not correct."
+            )
+        else:
+            baseline_mean = np.nanmean(dff[idx])
+            baseline_std = np.nanstd(dff[idx])
+            numerator = np.subtract(dff, baseline_mean)
+            zscore = np.divide(numerator, baseline_std)
+    else:
+        median = np.median(dff)
+        mad = np.median(np.abs(dff - median))
+        numerator = 0.6745 * (dff - median)
+        zscore = np.divide(numerator, mad)
+
+    return zscore
diff --git a/src/guppy/preprocess.py b/src/guppy/preprocess.py
index 8b79039..0c41ae4 100755
--- a/src/guppy/preprocess.py
+++ b/src/guppy/preprocess.py
@@ -1,22 +1,40 @@
-import fnmatch
 import glob
 import json
 import logging
 import os
-import re
-import shutil
 import sys
 
-import h5py
 import matplotlib.pyplot as plt
 import numpy as np
-import pandas as pd
-from scipy import signal as ss
-from scipy.optimize import curve_fit
 
-from .combineDataFn import processTimestampsForCombiningData
-
-logger = logging.getLogger(__name__)
+from .analysis.artifact_removal import remove_artifacts
+from .analysis.combine_data import combine_data
+from .analysis.control_channel import add_control_channel, create_control_channel
+from .analysis.io_utils import (
+    check_storeslistfile,
+    check_TDT,
+    find_files,
+    get_all_stores_for_combining_data,  # noqa: F401 -- Necessary for other modules that depend on preprocess.py
+    get_coords,
+    read_hdf5,
+    takeOnlyDirs,
+)
+from .analysis.standard_io import (
+    read_control_and_signal,
+    read_coords_pairwise,
+    read_corrected_data,
+    read_corrected_data_dict,
+    read_corrected_timestamps_pairwise,
+    read_corrected_ttl_timestamps,
+    read_ttl,
+    write_artifact_removal,
+    write_corrected_data,
+    write_corrected_timestamps,
+    write_corrected_ttl_timestamps,
+    write_zscore,
+)
+from .analysis.timestamp_correction import correct_timestamps
+from .analysis.z_score import compute_z_score
 
 logger = logging.getLogger(__name__)
 
@@ -25,404 +43,11 @@
     plt.switch_backend("TKAgg")
 
 
-def takeOnlyDirs(paths):
-    removePaths = []
-    for p in paths:
-        if os.path.isfile(p):
-            removePaths.append(p)
-    return list(set(paths) - set(removePaths))
-
-
 def writeToFile(value: str):
     with open(os.path.join(os.path.expanduser("~"), "pbSteps.txt"), "a") as file:
         file.write(value)
 
 
-# find files by ignoring the case sensitivity
-def find_files(path, glob_path, ignore_case=False):
-    rule = (
-        re.compile(fnmatch.translate(glob_path), re.IGNORECASE)
-        if ignore_case
-        else re.compile(fnmatch.translate(glob_path))
-    )
-
-    no_bytes_path = os.listdir(os.path.expanduser(path))
-    str_path = []
-
-    # converting byte object to string
-    for x in no_bytes_path:
-        try:
-            str_path.append(x.decode("utf-8"))
-        except:
-            str_path.append(x)
-    return [os.path.join(path, n) for n in str_path if rule.match(n)]
-
-
-# curve fit exponential function
-def curveFitFn(x, a, b, c):
-    return a + (b * np.exp(-(1 / c) * x))
-
-
-# helper function to create control channel using signal channel
-# by curve fitting signal channel to exponential function
-# when there is no isosbestic control channel is present
-def helper_create_control_channel(signal, timestamps, window):
-    # check if window is greater than signal shape
-    if window > signal.shape[0]:
-        window = ((signal.shape[0] + 1) / 2) + 1
-        if window % 2 != 0:
-            window = window
-        else:
-            window = window + 1
-
-    filtered_signal = ss.savgol_filter(signal, window_length=window, polyorder=3)
-
-    p0 = [5, 50, 60]
-
-    try:
-        popt, pcov = curve_fit(curveFitFn, timestamps, filtered_signal, p0)
-    except Exception as e:
-        logger.error(str(e))
-
-    # logger.info('Curve Fit Parameters : ', popt)
-    control = curveFitFn(timestamps, *popt)
-
-    return control
-
-
-# main function to create control channel using
-# signal channel and save it to a file
-def create_control_channel(filepath, arr, window=5001):
-
-    storenames = arr[0, :]
-    storesList = arr[1, :]
-
-    for i in range(storesList.shape[0]):
-        event_name, event = storesList[i], storenames[i]
-        if "control" in event_name.lower() and "cntrl" in event.lower():
-            logger.debug("Creating control channel from signal channel using curve-fitting")
-            name = event_name.split("_")[-1]
-            signal = read_hdf5("signal_" + name, filepath, "data")
-            timestampNew = read_hdf5("timeCorrection_" + name, filepath, "timestampNew")
-            sampling_rate = np.full(timestampNew.shape, np.nan)
-            sampling_rate[0] = read_hdf5("timeCorrection_" + name, filepath, "sampling_rate")[0]
-
-            control = helper_create_control_channel(signal, timestampNew, window)
-
-            write_hdf5(control, event_name, filepath, "data")
-            d = {"timestamps": timestampNew, "data": control, "sampling_rate": sampling_rate}
-            df = pd.DataFrame(d)
-            df.to_csv(os.path.join(os.path.dirname(filepath), event.lower() + ".csv"), index=False)
-            logger.info("Control channel from signal channel created using curve-fitting")
-
-
-# function to add control channel when there is no
-# isosbestic control channel and update the storeslist file
-def add_control_channel(filepath, arr):
-
-    storenames = arr[0, :]
-    storesList = np.char.lower(arr[1, :])
-
-    keep_control = np.array([])
-    # check a case if there is isosbestic control channel present
-    for i in range(storesList.shape[0]):
-        if "control" in storesList[i].lower():
-            name = storesList[i].split("_")[-1]
-            new_str = "signal_" + str(name).lower()
-            find_signal = [True for i in storesList if i == new_str]
-            if len(find_signal) > 1:
-                logger.error("Error in naming convention of files or Error in storesList file")
-                raise Exception("Error in naming convention of files or Error in storesList file")
-            if len(find_signal) == 0:
-                logger.error(
-                    "Isosbectic control channel parameter is set to False and still \
-							 	 storeslist file shows there is control channel present"
-                )
-                raise Exception(
-                    "Isosbectic control channel parameter is set to False and still \
-							 	 storeslist file shows there is control channel present"
-                )
-        else:
-            continue
-
-    for i in range(storesList.shape[0]):
-        if "signal" in storesList[i].lower():
-            name = storesList[i].split("_")[-1]
-            new_str = "control_" + str(name).lower()
-            find_signal = [True for i in storesList if i == new_str]
-            if len(find_signal) == 0:
-                src, dst = os.path.join(filepath, arr[0, i] + ".hdf5"), os.path.join(
-                    filepath, "cntrl" + str(i) + ".hdf5"
-                )
-                shutil.copyfile(src, dst)
-                arr = np.concatenate((arr, [["cntrl" + str(i)], ["control_" + str(arr[1, i].split("_")[-1])]]), axis=1)
-
-    np.savetxt(os.path.join(filepath, "storesList.csv"), arr, delimiter=",", fmt="%s")
-
-    return arr
-
-
-# check if dealing with TDT files or csv files
-def check_TDT(filepath):
-    path = glob.glob(os.path.join(filepath, "*.tsq"))
-    if len(path) > 0:
-        return True
-    else:
-        return False
-
-
-# function to read hdf5 file
-def read_hdf5(event, filepath, key):
-    if event:
-        event = event.replace("\\", "_")
-        event = event.replace("/", "_")
-        op = os.path.join(filepath, event + ".hdf5")
-    else:
-        op = filepath
-
-    if os.path.exists(op):
-        with h5py.File(op, "r") as f:
-            arr = np.asarray(f[key])
-    else:
-        logger.error(f"{event}.hdf5 file does not exist")
-        raise Exception("{}.hdf5 file does not exist".format(event))
-
-    return arr
-
-
-# function to write hdf5 file
-def write_hdf5(data, event, filepath, key):
-    event = event.replace("\\", "_")
-    event = event.replace("/", "_")
-    op = os.path.join(filepath, event + ".hdf5")
-
-    # if file does not exist create a new file
-    if not os.path.exists(op):
-        with h5py.File(op, "w") as f:
-            if type(data) is np.ndarray:
-                f.create_dataset(key, data=data, maxshape=(None,), chunks=True)
-            else:
-                f.create_dataset(key, data=data)
-
-    # if file already exists, append data to it or add a new key to it
-    else:
-        with h5py.File(op, "r+") as f:
-            if key in list(f.keys()):
-                if type(data) is np.ndarray:
-                    f[key].resize(data.shape)
-                    arr = f[key]
-                    arr[:] = data
-                else:
-                    arr = f[key]
-                    arr = data
-            else:
-                if type(data) is np.ndarray:
-                    f.create_dataset(key, data=data, maxshape=(None,), chunks=True)
-                else:
-                    f.create_dataset(key, data=data)
-
-
-# function to check control and signal channel has same length
-# if not, take a smaller length and do pre-processing
-def check_cntrl_sig_length(filepath, channels_arr, storenames, storesList):
-
-    indices = []
-    for i in range(channels_arr.shape[1]):
-        idx_c = np.where(storesList == channels_arr[0, i])[0]
-        idx_s = np.where(storesList == channels_arr[1, i])[0]
-        control = read_hdf5(storenames[idx_c[0]], filepath, "data")
-        signal = read_hdf5(storenames[idx_s[0]], filepath, "data")
-        if control.shape[0] < signal.shape[0]:
-            indices.append(storesList[idx_c[0]])
-        elif control.shape[0] > signal.shape[0]:
-            indices.append(storesList[idx_s[0]])
-        else:
-            indices.append(storesList[idx_s[0]])
-
-    return indices
-
-
-# function to correct timestamps after eliminating first few seconds of the data (for csv data)
-def timestampCorrection_csv(filepath, timeForLightsTurnOn, storesList):
-
-    logger.debug(
-        f"Correcting timestamps by getting rid of the first {timeForLightsTurnOn} seconds and convert timestamps to seconds"
-    )
-    storenames = storesList[0, :]
-    storesList = storesList[1, :]
-
-    arr = []
-    for i in range(storesList.shape[0]):
-        if "control" in storesList[i].lower() or "signal" in storesList[i].lower():
-            arr.append(storesList[i])
-
-    arr = sorted(arr, key=str.casefold)
-    try:
-        arr = np.asarray(arr).reshape(2, -1)
-    except:
-        logger.error("Error in saving stores list file or spelling mistake for control or signal")
-        raise Exception("Error in saving stores list file or spelling mistake for control or signal")
-
-    indices = check_cntrl_sig_length(filepath, arr, storenames, storesList)
-
-    for i in range(arr.shape[1]):
-        name_1 = arr[0, i].split("_")[-1]
-        name_2 = arr[1, i].split("_")[-1]
-        # dirname = os.path.dirname(path[i])
-        idx = np.where(storesList == indices[i])[0]
-
-        if idx.shape[0] == 0:
-            logger.error(f"{arr[0,i]} does not exist in the stores list file.")
-            raise Exception("{} does not exist in the stores list file.".format(arr[0, i]))
-
-        timestamp = read_hdf5(storenames[idx][0], filepath, "timestamps")
-        sampling_rate = read_hdf5(storenames[idx][0], filepath, "sampling_rate")
-
-        if name_1 == name_2:
-            correctionIndex = np.where(timestamp >= timeForLightsTurnOn)[0]
-            timestampNew = timestamp[correctionIndex]
-            write_hdf5(timestampNew, "timeCorrection_" + name_1, filepath, "timestampNew")
-            write_hdf5(correctionIndex, "timeCorrection_" + name_1, filepath, "correctionIndex")
-            write_hdf5(np.asarray(sampling_rate), "timeCorrection_" + name_1, filepath, "sampling_rate")
-
-        else:
-            logger.error("Error in naming convention of files or Error in storesList file")
-            raise Exception("Error in naming convention of files or Error in storesList file")
-
-    logger.info("Timestamps corrected and converted to seconds.")
-
-
-# function to correct timestamps after eliminating first few seconds of the data (for TDT data)
-def timestampCorrection_tdt(filepath, timeForLightsTurnOn, storesList):
-
-    logger.debug(
-        f"Correcting timestamps by getting rid of the first {timeForLightsTurnOn} seconds and convert timestamps to seconds"
-    )
-    storenames = storesList[0, :]
-    storesList = storesList[1, :]
-
-    arr = []
-    for i in range(storesList.shape[0]):
-        if "control" in storesList[i].lower() or "signal" in storesList[i].lower():
-            arr.append(storesList[i])
-
-    arr = sorted(arr, key=str.casefold)
-
-    try:
-        arr = np.asarray(arr).reshape(2, -1)
-    except:
-        logger.error("Error in saving stores list file or spelling mistake for control or signal")
-        raise Exception("Error in saving stores list file or spelling mistake for control or signal")
-
-    indices = check_cntrl_sig_length(filepath, arr, storenames, storesList)
-
-    for i in range(arr.shape[1]):
-        name_1 = arr[0, i].split("_")[-1]
-        name_2 = arr[1, i].split("_")[-1]
-        # dirname = os.path.dirname(path[i])
-        idx = np.where(storesList == indices[i])[0]
-
-        if idx.shape[0] == 0:
-            logger.error(f"{arr[0,i]} does not exist in the stores list file.")
-            raise Exception("{} does not exist in the stores list file.".format(arr[0, i]))
-
-        timestamp = read_hdf5(storenames[idx][0], filepath, "timestamps")
-        npoints = read_hdf5(storenames[idx][0], filepath, "npoints")
-        sampling_rate = read_hdf5(storenames[idx][0], filepath, "sampling_rate")
-
-        if name_1 == name_2:
-            timeRecStart = timestamp[0]
-            timestamps = np.subtract(timestamp, timeRecStart)
-            adder = np.arange(npoints) / sampling_rate
-            lengthAdder = adder.shape[0]
-            timestampNew = np.zeros((len(timestamps), lengthAdder))
-            for i in range(lengthAdder):
-                timestampNew[:, i] = np.add(timestamps, adder[i])
-            timestampNew = (timestampNew.T).reshape(-1, order="F")
-            correctionIndex = np.where(timestampNew >= timeForLightsTurnOn)[0]
-            timestampNew = timestampNew[correctionIndex]
-
-            write_hdf5(np.asarray([timeRecStart]), "timeCorrection_" + name_1, filepath, "timeRecStart")
-            write_hdf5(timestampNew, "timeCorrection_" + name_1, filepath, "timestampNew")
-            write_hdf5(correctionIndex, "timeCorrection_" + name_1, filepath, "correctionIndex")
-            write_hdf5(np.asarray([sampling_rate]), "timeCorrection_" + name_1, filepath, "sampling_rate")
-        else:
-            logger.error("Error in naming convention of files or Error in storesList file")
-            raise Exception("Error in naming convention of files or Error in storesList file")
-
-    logger.info("Timestamps corrected and converted to seconds.")
-    # return timeRecStart, correctionIndex, timestampNew
-
-
-# function to apply correction to control, signal and event timestamps
-def applyCorrection(filepath, timeForLightsTurnOn, event, displayName, naming):
-
-    cond = check_TDT(os.path.dirname(filepath))
-
-    if cond == True:
-        timeRecStart = read_hdf5("timeCorrection_" + naming, filepath, "timeRecStart")[0]
-
-    timestampNew = read_hdf5("timeCorrection_" + naming, filepath, "timestampNew")
-    correctionIndex = read_hdf5("timeCorrection_" + naming, filepath, "correctionIndex")
-
-    if "control" in displayName.lower() or "signal" in displayName.lower():
-        split_name = displayName.split("_")[-1]
-        if split_name == naming:
-            pass
-        else:
-            correctionIndex = read_hdf5("timeCorrection_" + split_name, filepath, "correctionIndex")
-        arr = read_hdf5(event, filepath, "data")
-        if (arr == 0).all() == True:
-            arr = arr
-        else:
-            arr = arr[correctionIndex]
-        write_hdf5(arr, displayName, filepath, "data")
-    else:
-        arr = read_hdf5(event, filepath, "timestamps")
-        if cond == True:
-            res = (arr >= timeRecStart).all()
-            if res == True:
-                arr = np.subtract(arr, timeRecStart)
-                arr = np.subtract(arr, timeForLightsTurnOn)
-            else:
-                arr = np.subtract(arr, timeForLightsTurnOn)
-        else:
-            arr = np.subtract(arr, timeForLightsTurnOn)
-        write_hdf5(arr, displayName + "_" + naming, filepath, "ts")
-
-    # if isosbestic_control==False and 'control' in displayName.lower():
-    # 	control = create_control_channel(filepath, displayName)
-    # 	write_hdf5(control, displayName, filepath, 'data')
-
-
-# function to check if naming convention was followed while saving storeslist file
-# and apply timestamps correction using the function applyCorrection
-def decide_naming_convention_and_applyCorrection(filepath, timeForLightsTurnOn, event, displayName, storesList):
-
-    logger.debug("Applying correction of timestamps to the data and event timestamps")
-    storesList = storesList[1, :]
-
-    arr = []
-    for i in range(storesList.shape[0]):
-        if "control" in storesList[i].lower() or "signal" in storesList[i].lower():
-            arr.append(storesList[i])
-
-    arr = sorted(arr, key=str.casefold)
-    arr = np.asarray(arr).reshape(2, -1)
-
-    for i in range(arr.shape[1]):
-        name_1 = arr[0, i].split("_")[-1]
-        name_2 = arr[1, i].split("_")[-1]
-        # dirname = os.path.dirname(path[i])
-        if name_1 == name_2:
-            applyCorrection(filepath, timeForLightsTurnOn, event, displayName, name_1)
-        else:
-            logger.error("Error in naming convention of files or Error in storesList file")
-            raise Exception("Error in naming convention of files or Error in storesList file")
-
-    logger.info("Timestamps corrections applied to the data and event timestamps.")
-
-
 # function to plot z_score
 def visualize_z_score(filepath):
 
@@ -590,421 +215,6 @@ def visualizeControlAndSignal(filepath, removeArtifacts):
         visualize(filepath, ts, control, signal, cntrl_sig_fit, plot_name, removeArtifacts)
 
 
-# function to check if the naming convention for saving storeslist file was followed or not
-def decide_naming_convention(filepath):
-    path_1 = find_files(filepath, "control_*", ignore_case=True)  # glob.glob(os.path.join(filepath, 'control*'))
-
-    path_2 = find_files(filepath, "signal_*", ignore_case=True)  # glob.glob(os.path.join(filepath, 'signal*'))
-
-    path = sorted(path_1 + path_2, key=str.casefold)
-    if len(path) % 2 != 0:
-        logger.error("There are not equal number of Control and Signal data")
-        raise Exception("There are not equal number of Control and Signal data")
-
-    path = np.asarray(path).reshape(2, -1)
-
-    return path
-
-
-# function to read coordinates file which was saved by selecting chunks for artifacts removal
-def fetchCoords(filepath, naming, data):
-
-    path = os.path.join(filepath, "coordsForPreProcessing_" + naming + ".npy")
-
-    if not os.path.exists(path):
-        coords = np.array([0, data[-1]])
-    else:
-        coords = np.load(os.path.join(filepath, "coordsForPreProcessing_" + naming + ".npy"))[:, 0]
-
-    if coords.shape[0] % 2 != 0:
-        logger.error("Number of values in coordsForPreProcessing file is not even.")
-        raise Exception("Number of values in coordsForPreProcessing file is not even.")
-
-    coords = coords.reshape(-1, 2)
-
-    return coords
-
-
-# helper function to process control and signal timestamps
-def eliminateData(filepath, timeForLightsTurnOn, event, sampling_rate, naming):
-
-    ts = read_hdf5("timeCorrection_" + naming, filepath, "timestampNew")
-    data = read_hdf5(event, filepath, "data").reshape(-1)
-    coords = fetchCoords(filepath, naming, ts)
-
-    if (data == 0).all() == True:
-        data = np.zeros(ts.shape[0])
-
-    arr = np.array([])
-    ts_arr = np.array([])
-    for i in range(coords.shape[0]):
-
-        index = np.where((ts > coords[i, 0]) & (ts < coords[i, 1]))[0]
-
-        if len(arr) == 0:
-            arr = np.concatenate((arr, data[index]))
-            sub = ts[index][0] - timeForLightsTurnOn
-            new_ts = ts[index] - sub
-            ts_arr = np.concatenate((ts_arr, new_ts))
-        else:
-            temp = data[index]
-            # new = temp + (arr[-1]-temp[0])
-            temp_ts = ts[index]
-            new_ts = temp_ts - (temp_ts[0] - ts_arr[-1])
-            arr = np.concatenate((arr, temp))
-            ts_arr = np.concatenate((ts_arr, new_ts + (1 / sampling_rate)))
-
-    # logger.info(arr.shape, ts_arr.shape)
-    return arr, ts_arr
-
-
-# helper function to align event timestamps with the control and signal timestamps
-def eliminateTs(filepath, timeForLightsTurnOn, event, sampling_rate, naming):
-
-    tsNew = read_hdf5("timeCorrection_" + naming, filepath, "timestampNew")
-    ts = read_hdf5(event + "_" + naming, filepath, "ts").reshape(-1)
-    coords = fetchCoords(filepath, naming, tsNew)
-
-    ts_arr = np.array([])
-    tsNew_arr = np.array([])
-    for i in range(coords.shape[0]):
-        tsNew_index = np.where((tsNew > coords[i, 0]) & (tsNew < coords[i, 1]))[0]
-        ts_index = np.where((ts > coords[i, 0]) & (ts < coords[i, 1]))[0]
-
-        if len(tsNew_arr) == 0:
-            sub = tsNew[tsNew_index][0] - timeForLightsTurnOn
-            tsNew_arr = np.concatenate((tsNew_arr, tsNew[tsNew_index] - sub))
-            ts_arr = np.concatenate((ts_arr, ts[ts_index] - sub))
-        else:
-            temp_tsNew = tsNew[tsNew_index]
-            temp_ts = ts[ts_index]
-            new_ts = temp_ts - (temp_tsNew[0] - tsNew_arr[-1])
-            new_tsNew = temp_tsNew - (temp_tsNew[0] - tsNew_arr[-1])
-            tsNew_arr = np.concatenate((tsNew_arr, new_tsNew + (1 / sampling_rate)))
-            ts_arr = np.concatenate((ts_arr, new_ts + (1 / sampling_rate)))
-
-    return ts_arr
-
-
-# adding nan values to removed chunks
-# when using artifacts removal method - replace with NaN
-def addingNaNValues(filepath, event, naming):
-
-    ts = read_hdf5("timeCorrection_" + naming, filepath, "timestampNew")
-    data = read_hdf5(event, filepath, "data").reshape(-1)
-    coords = fetchCoords(filepath, naming, ts)
-
-    if (data == 0).all() == True:
-        data = np.zeros(ts.shape[0])
-
-    arr = np.array([])
-    ts_index = np.arange(ts.shape[0])
-    for i in range(coords.shape[0]):
-
-        index = np.where((ts > coords[i, 0]) & (ts < coords[i, 1]))[0]
-        arr = np.concatenate((arr, index))
-
-    nan_indices = list(set(ts_index).symmetric_difference(arr))
-    data[nan_indices] = np.nan
-
-    return data
-
-
-# remove event TTLs which falls in the removed chunks
-# when using artifacts removal method - replace with NaN
-def removeTTLs(filepath, event, naming):
-    tsNew = read_hdf5("timeCorrection_" + naming, filepath, "timestampNew")
-    ts = read_hdf5(event + "_" + naming, filepath, "ts").reshape(-1)
-    coords = fetchCoords(filepath, naming, tsNew)
-
-    ts_arr = np.array([])
-    for i in range(coords.shape[0]):
-        ts_index = np.where((ts > coords[i, 0]) & (ts < coords[i, 1]))[0]
-        ts_arr = np.concatenate((ts_arr, ts[ts_index]))
-
-    return ts_arr
-
-
-def addingNaNtoChunksWithArtifacts(filepath, events):
-
-    logger.debug("Replacing chunks with artifacts by NaN values.")
-    storesList = events[1, :]
-
-    path = decide_naming_convention(filepath)
-
-    for j in range(path.shape[1]):
-        name_1 = ((os.path.basename(path[0, j])).split(".")[0]).split("_")
-        name_2 = ((os.path.basename(path[1, j])).split(".")[0]).split("_")
-        # dirname = os.path.dirname(path[i])
-        if name_1[-1] == name_2[-1]:
-            name = name_1[-1]
-            sampling_rate = read_hdf5("timeCorrection_" + name, filepath, "sampling_rate")[0]
-            for i in range(len(storesList)):
-                if (
-                    "control_" + name.lower() in storesList[i].lower()
-                    or "signal_" + name.lower() in storesList[i].lower()
-                ):  # changes done
-                    data = addingNaNValues(filepath, storesList[i], name)
-                    write_hdf5(data, storesList[i], filepath, "data")
-                else:
-                    if "control" in storesList[i].lower() or "signal" in storesList[i].lower():
-                        continue
-                    else:
-                        ts = removeTTLs(filepath, storesList[i], name)
-                        write_hdf5(ts, storesList[i] + "_" + name, filepath, "ts")
-
-        else:
-            logger.error("Error in naming convention of files or Error in storesList file")
-            raise Exception("Error in naming convention of files or Error in storesList file")
-    logger.info("Chunks with artifacts are replaced by NaN values.")
-
-
-# main function to align timestamps for control, signal and event timestamps for artifacts removal
-def processTimestampsForArtifacts(filepath, timeForLightsTurnOn, events):
-
-    logger.debug("Processing timestamps to get rid of artifacts using concatenate method...")
-    storesList = events[1, :]
-
-    path = decide_naming_convention(filepath)
-
-    timestamp_dict = dict()
-    for j in range(path.shape[1]):
-        name_1 = ((os.path.basename(path[0, j])).split(".")[0]).split("_")
-        name_2 = ((os.path.basename(path[1, j])).split(".")[0]).split("_")
-        # dirname = os.path.dirname(path[i])
-        if name_1[-1] == name_2[-1]:
-            name = name_1[-1]
-            sampling_rate = read_hdf5("timeCorrection_" + name, filepath, "sampling_rate")[0]
-
-            for i in range(len(storesList)):
-                if (
-                    "control_" + name.lower() in storesList[i].lower()
-                    or "signal_" + name.lower() in storesList[i].lower()
-                ):  # changes done
-                    data, timestampNew = eliminateData(
-                        filepath, timeForLightsTurnOn, storesList[i], sampling_rate, name
-                    )
-                    write_hdf5(data, storesList[i], filepath, "data")
-                else:
-                    if "control" in storesList[i].lower() or "signal" in storesList[i].lower():
-                        continue
-                    else:
-                        ts = eliminateTs(filepath, timeForLightsTurnOn, storesList[i], sampling_rate, name)
-                        write_hdf5(ts, storesList[i] + "_" + name, filepath, "ts")
-
-            # timestamp_dict[name] = timestampNew
-            write_hdf5(timestampNew, "timeCorrection_" + name, filepath, "timestampNew")
-        else:
-            logger.error("Error in naming convention of files or Error in storesList file")
-            raise Exception("Error in naming convention of files or Error in storesList file")
-    logger.info("Timestamps processed, artifacts are removed and good chunks are concatenated.")
-
-
-# function to compute deltaF/F using fitted control channel and filtered signal channel
-def deltaFF(signal, control):
-
-    res = np.subtract(signal, control)
-    normData = np.divide(res, control)
-    # deltaFF = normData
-    normData = normData * 100
-
-    return normData
-
-
-# function to fit control channel to signal channel
-def controlFit(control, signal):
-
-    p = np.polyfit(control, signal, 1)
-    arr = (p[0] * control) + p[1]
-    return arr
-
-
-def filterSignal(filter_window, signal):
-    if filter_window == 0:
-        return signal
-    elif filter_window > 1:
-        b = np.divide(np.ones((filter_window,)), filter_window)
-        a = 1
-        filtered_signal = ss.filtfilt(b, a, signal)
-        return filtered_signal
-    else:
-        raise Exception("Moving average filter window value is not correct.")
-
-
-# function to filter control and signal channel, also execute above two function : controlFit and deltaFF
-# function will also take care if there is only signal channel and no control channel
-# if there is only signal channel, z-score will be computed using just signal channel
-def execute_controlFit_dff(control, signal, isosbestic_control, filter_window):
-
-    if isosbestic_control == False:
-        signal_smooth = filterSignal(filter_window, signal)  # ss.filtfilt(b, a, signal)
-        control_fit = controlFit(control, signal_smooth)
-        norm_data = deltaFF(signal_smooth, control_fit)
-    else:
-        control_smooth = filterSignal(filter_window, control)  # ss.filtfilt(b, a, control)
-        signal_smooth = filterSignal(filter_window, signal)  # ss.filtfilt(b, a, signal)
-        control_fit = controlFit(control_smooth, signal_smooth)
-        norm_data = deltaFF(signal_smooth, control_fit)
-
-    return norm_data, control_fit
-
-
-# function to compute z-score based on z-score computation method
-def z_score_computation(dff, timestamps, inputParameters):
-
-    zscore_method = inputParameters["zscore_method"]
-    baseline_start, baseline_end = inputParameters["baselineWindowStart"], inputParameters["baselineWindowEnd"]
-
-    if zscore_method == "standard z-score":
-        numerator = np.subtract(dff, np.nanmean(dff))
-        zscore = np.divide(numerator, np.nanstd(dff))
-    elif zscore_method == "baseline z-score":
-        idx = np.where((timestamps > baseline_start) & (timestamps < baseline_end))[0]
-        if idx.shape[0] == 0:
-            logger.error(
-                "Baseline Window Parameters for baseline z-score computation zscore_method \
-							are not correct."
-            )
-            raise Exception(
-                "Baseline Window Parameters for baseline z-score computation zscore_method \
-							are not correct."
-            )
-        else:
-            baseline_mean = np.nanmean(dff[idx])
-            baseline_std = np.nanstd(dff[idx])
-            numerator = np.subtract(dff, baseline_mean)
-            zscore = np.divide(numerator, baseline_std)
-    else:
-        median = np.median(dff)
-        mad = np.median(np.abs(dff - median))
-        numerator = 0.6745 * (dff - median)
-        zscore = np.divide(numerator, mad)
-
-    return zscore
-
-
-# helper function to compute z-score and deltaF/F
-def helper_z_score(control, signal, filepath, name, inputParameters):  # helper_z_score(control_smooth, signal_smooth):
-
-    removeArtifacts = inputParameters["removeArtifacts"]
-    artifactsRemovalMethod = inputParameters["artifactsRemovalMethod"]
-    filter_window = inputParameters["filter_window"]
-
-    isosbestic_control = inputParameters["isosbestic_control"]
-    tsNew = read_hdf5("timeCorrection_" + name, filepath, "timestampNew")
-    coords_path = os.path.join(filepath, "coordsForPreProcessing_" + name + ".npy")
-
-    logger.info("Remove Artifacts : ", removeArtifacts)
-
-    if (control == 0).all() == True:
-        control = np.zeros(tsNew.shape[0])
-
-    z_score_arr = np.array([])
-    norm_data_arr = np.full(tsNew.shape[0], np.nan)
-    control_fit_arr = np.full(tsNew.shape[0], np.nan)
-    temp_control_arr = np.full(tsNew.shape[0], np.nan)
-
-    if removeArtifacts == True:
-        coords = fetchCoords(filepath, name, tsNew)
-
-        # for artifacts removal, each chunk which was selected by user is being processed individually and then
-        # z-score is calculated
-        for i in range(coords.shape[0]):
-            tsNew_index = np.where((tsNew > coords[i, 0]) & (tsNew < coords[i, 1]))[0]
-            if isosbestic_control == False:
-                control_arr = helper_create_control_channel(signal[tsNew_index], tsNew[tsNew_index], window=101)
-                signal_arr = signal[tsNew_index]
-                norm_data, control_fit = execute_controlFit_dff(
-                    control_arr, signal_arr, isosbestic_control, filter_window
-                )
-                temp_control_arr[tsNew_index] = control_arr
-                if i < coords.shape[0] - 1:
-                    blank_index = np.where((tsNew > coords[i, 1]) & (tsNew < coords[i + 1, 0]))[0]
-                    temp_control_arr[blank_index] = np.full(blank_index.shape[0], np.nan)
-            else:
-                control_arr = control[tsNew_index]
-                signal_arr = signal[tsNew_index]
-                norm_data, control_fit = execute_controlFit_dff(
-                    control_arr, signal_arr, isosbestic_control, filter_window
-                )
-            norm_data_arr[tsNew_index] = norm_data
-            control_fit_arr[tsNew_index] = control_fit
-
-        if artifactsRemovalMethod == "concatenate":
-            norm_data_arr = norm_data_arr[~np.isnan(norm_data_arr)]
-            control_fit_arr = control_fit_arr[~np.isnan(control_fit_arr)]
-        z_score = z_score_computation(norm_data_arr, tsNew, inputParameters)
-        z_score_arr = np.concatenate((z_score_arr, z_score))
-    else:
-        tsNew_index = np.arange(tsNew.shape[0])
-        norm_data, control_fit = execute_controlFit_dff(control, signal, isosbestic_control, filter_window)
-        z_score = z_score_computation(norm_data, tsNew, inputParameters)
-        z_score_arr = np.concatenate((z_score_arr, z_score))
-        norm_data_arr[tsNew_index] = norm_data  # np.concatenate((norm_data_arr, norm_data))
-        control_fit_arr[tsNew_index] = control_fit  # np.concatenate((control_fit_arr, control_fit))
-
-    # handle the case if there are chunks being cut in the front and the end
-    if isosbestic_control == False and removeArtifacts == True:
-        coords = coords.flatten()
-        # front chunk
-        idx = np.where((tsNew >= tsNew[0]) & (tsNew < coords[0]))[0]
-        temp_control_arr[idx] = np.full(idx.shape[0], np.nan)
-        # end chunk
-        idx = np.where((tsNew > coords[-1]) & (tsNew <= tsNew[-1]))[0]
-        temp_control_arr[idx] = np.full(idx.shape[0], np.nan)
-        write_hdf5(temp_control_arr, "control_" + name, filepath, "data")
-
-    return z_score_arr, norm_data_arr, control_fit_arr
-
-
-# compute z-score and deltaF/F and save it to hdf5 file
-def compute_z_score(filepath, inputParameters):
-
-    logger.debug(f"Computing z-score for each of the data in {filepath}")
-    remove_artifacts = inputParameters["removeArtifacts"]
-
-    path_1 = find_files(filepath, "control_*", ignore_case=True)  # glob.glob(os.path.join(filepath, 'control*'))
-    path_2 = find_files(filepath, "signal_*", ignore_case=True)  # glob.glob(os.path.join(filepath, 'signal*'))
-
-    path = sorted(path_1 + path_2, key=str.casefold)
-
-    b = np.divide(np.ones((100,)), 100)
-    a = 1
-
-    if len(path) % 2 != 0:
-        logger.error("There are not equal number of Control and Signal data")
-        raise Exception("There are not equal number of Control and Signal data")
-
-    path = np.asarray(path).reshape(2, -1)
-
-    for i in range(path.shape[1]):
-        name_1 = ((os.path.basename(path[0, i])).split(".")[0]).split("_")
-        name_2 = ((os.path.basename(path[1, i])).split(".")[0]).split("_")
-        # dirname = os.path.dirname(path[i])
-
-        if name_1[-1] == name_2[-1]:
-            name = name_1[-1]
-            control = read_hdf5("", path[0, i], "data").reshape(-1)
-            signal = read_hdf5("", path[1, i], "data").reshape(-1)
-            # control_smooth = ss.filtfilt(b, a, control)
-            # signal_smooth = ss.filtfilt(b, a, signal)
-            # _score, dff = helper_z_score(control_smooth, signal_smooth)
-            z_score, dff, control_fit = helper_z_score(control, signal, filepath, name, inputParameters)
-            if remove_artifacts == True:
-                write_hdf5(z_score, "z_score_" + name, filepath, "data")
-                write_hdf5(dff, "dff_" + name, filepath, "data")
-                write_hdf5(control_fit, "cntrl_sig_fit_" + name, filepath, "data")
-            else:
-                write_hdf5(z_score, "z_score_" + name, filepath, "data")
-                write_hdf5(dff, "dff_" + name, filepath, "data")
-                write_hdf5(control_fit, "cntrl_sig_fit_" + name, filepath, "data")
-        else:
-            logger.error("Error in naming convention of files or Error in storesList file")
-            raise Exception("Error in naming convention of files or Error in storesList file")
-
-    logger.info(f"z-score for the data in {filepath} computed.")
-
-
 # function to execute timestamps corrections using functions timestampCorrection and decide_naming_convention_and_applyCorrection
 def execute_timestamp_correction(folderNames, inputParameters):
 
@@ -1014,7 +224,7 @@ def execute_timestamp_correction(folderNames, inputParameters):
     for i in range(len(folderNames)):
         filepath = folderNames[i]
         storesListPath = takeOnlyDirs(glob.glob(os.path.join(filepath, "*_output_*")))
-        cond = check_TDT(folderNames[i])
+        mode = "tdt" if check_TDT(folderNames[i]) else "csv"
         logger.debug(f"Timestamps corrections started for {filepath}")
         for j in range(len(storesListPath)):
             filepath = storesListPath[j]
@@ -1025,15 +235,36 @@ def execute_timestamp_correction(folderNames, inputParameters):
             if isosbestic_control == False:
                 storesList = add_control_channel(filepath, storesList)
 
-            if cond == True:
-                timestampCorrection_tdt(filepath, timeForLightsTurnOn, storesList)
-            else:
-                timestampCorrection_csv(filepath, timeForLightsTurnOn, storesList)
-
-            for k in range(storesList.shape[1]):
-                decide_naming_convention_and_applyCorrection(
-                    filepath, timeForLightsTurnOn, storesList[0, k], storesList[1, k], storesList
-                )
+            control_and_signal_dicts = read_control_and_signal(filepath, storesList)
+            name_to_data, name_to_timestamps, name_to_sampling_rate, name_to_npoints = control_and_signal_dicts
+            name_to_timestamps_ttl = read_ttl(filepath, storesList)
+
+            timestamps_dicts = correct_timestamps(
+                timeForLightsTurnOn,
+                storesList,
+                name_to_timestamps,
+                name_to_data,
+                name_to_sampling_rate,
+                name_to_npoints,
+                name_to_timestamps_ttl,
+                mode=mode,
+            )
+            (
+                name_to_corrected_timestamps,
+                name_to_correctionIndex,
+                name_to_corrected_data,
+                compound_name_to_corrected_ttl_timestamps,
+            ) = timestamps_dicts
+
+            write_corrected_timestamps(
+                filepath,
+                name_to_corrected_timestamps,
+                name_to_timestamps,
+                name_to_sampling_rate,
+                name_to_correctionIndex,
+            )
+            write_corrected_data(filepath, name_to_corrected_data)
+            write_corrected_ttl_timestamps(filepath, compound_name_to_corrected_ttl_timestamps)
 
             # check if isosbestic control is false and also if new control channel is added
             if isosbestic_control == False:
@@ -1044,45 +275,133 @@ def execute_timestamp_correction(folderNames, inputParameters):
         logger.info(f"Timestamps corrections finished for {filepath}")
 
 
-# for combining data, reading storeslist file from both data and create a new storeslist array
-def check_storeslistfile(folderNames):
-    storesList = np.array([[], []])
+# function to compute z-score and deltaF/F
+def execute_zscore(folderNames, inputParameters):
+
+    plot_zScore_dff = inputParameters["plot_zScore_dff"]
+    combine_data = inputParameters["combine_data"]
+    remove_artifacts = inputParameters["removeArtifacts"]
+    artifactsRemovalMethod = inputParameters["artifactsRemovalMethod"]
+    filter_window = inputParameters["filter_window"]
+    isosbestic_control = inputParameters["isosbestic_control"]
+    zscore_method = inputParameters["zscore_method"]
+    baseline_start, baseline_end = inputParameters["baselineWindowStart"], inputParameters["baselineWindowEnd"]
+
+    storesListPath = []
     for i in range(len(folderNames)):
-        filepath = folderNames[i]
-        storesListPath = takeOnlyDirs(glob.glob(os.path.join(filepath, "*_output_*")))
-        for j in range(len(storesListPath)):
-            filepath = storesListPath[j]
-            storesList = np.concatenate(
-                (
-                    storesList,
-                    np.genfromtxt(os.path.join(filepath, "storesList.csv"), dtype="str", delimiter=",").reshape(2, -1),
-                ),
-                axis=1,
+        if combine_data == True:
+            storesListPath.append([folderNames[i][0]])
+        else:
+            filepath = folderNames[i]
+            storesListPath.append(takeOnlyDirs(glob.glob(os.path.join(filepath, "*_output_*"))))
+    storesListPath = np.concatenate(storesListPath)
+
+    for j in range(len(storesListPath)):
+        filepath = storesListPath[j]
+        logger.debug(f"Computing z-score for each of the data in {filepath}")
+        path_1 = find_files(filepath, "control_*", ignore_case=True)  # glob.glob(os.path.join(filepath, 'control*'))
+        path_2 = find_files(filepath, "signal_*", ignore_case=True)  # glob.glob(os.path.join(filepath, 'signal*'))
+        path = sorted(path_1 + path_2, key=str.casefold)
+        if len(path) % 2 != 0:
+            logger.error("There are not equal number of Control and Signal data")
+            raise Exception("There are not equal number of Control and Signal data")
+        path = np.asarray(path).reshape(2, -1)
+
+        for i in range(path.shape[1]):
+            name_1 = ((os.path.basename(path[0, i])).split(".")[0]).split("_")
+            name_2 = ((os.path.basename(path[1, i])).split(".")[0]).split("_")
+            if name_1[-1] != name_2[-1]:
+                logger.error("Error in naming convention of files or Error in storesList file")
+                raise Exception("Error in naming convention of files or Error in storesList file")
+            name = name_1[-1]
+
+            control, signal, tsNew = read_corrected_data(path[0, i], path[1, i], filepath, name)
+            coords = get_coords(filepath, name, tsNew, remove_artifacts)
+            z_score, dff, control_fit, temp_control_arr = compute_z_score(
+                control,
+                signal,
+                tsNew,
+                coords,
+                artifactsRemovalMethod,
+                filter_window,
+                isosbestic_control,
+                zscore_method,
+                baseline_start,
+                baseline_end,
             )
+            write_zscore(filepath, name, z_score, dff, control_fit, temp_control_arr)
 
-    storesList = np.unique(storesList, axis=1)
+        logger.info(f"z-score for the data in {filepath} computed.")
 
-    return storesList
+        if not remove_artifacts:
+            visualizeControlAndSignal(filepath, removeArtifacts=remove_artifacts)
 
+        if plot_zScore_dff == "z_score":
+            visualize_z_score(filepath)
+        if plot_zScore_dff == "dff":
+            visualize_dff(filepath)
+        if plot_zScore_dff == "Both":
+            visualize_z_score(filepath)
+            visualize_dff(filepath)
 
-def get_all_stores_for_combining_data(folderNames):
-    op = []
-    for i in range(100):
-        temp = []
-        match = r"[\s\S]*" + "_output_" + str(i)
-        for j in folderNames:
-            temp.append(re.findall(match, j))
-        temp = sorted(list(np.concatenate(temp).flatten()), key=str.casefold)
-        if len(temp) > 0:
-            op.append(temp)
+        writeToFile(str(10 + ((inputParameters["step"] + 1) * 10)) + "\n")
+        inputParameters["step"] += 1
+
+    plt.show()
+    logger.info("Z-score computation completed.")
 
-    return op
+
+# function to remove artifacts from z-score data
+def execute_artifact_removal(folderNames, inputParameters):
+
+    timeForLightsTurnOn = inputParameters["timeForLightsTurnOn"]
+    artifactsRemovalMethod = inputParameters["artifactsRemovalMethod"]
+    combine_data = inputParameters["combine_data"]
+
+    storesListPath = []
+    for i in range(len(folderNames)):
+        if combine_data == True:
+            storesListPath.append([folderNames[i][0]])
+        else:
+            filepath = folderNames[i]
+            storesListPath.append(takeOnlyDirs(glob.glob(os.path.join(filepath, "*_output_*"))))
+
+    storesListPath = np.concatenate(storesListPath)
+
+    for j in range(len(storesListPath)):
+        filepath = storesListPath[j]
+        storesList = np.genfromtxt(os.path.join(filepath, "storesList.csv"), dtype="str", delimiter=",").reshape(2, -1)
+
+        name_to_data = read_corrected_data_dict(filepath, storesList)
+        pair_name_to_tsNew, pair_name_to_sampling_rate = read_corrected_timestamps_pairwise(filepath)
+        pair_name_to_coords = read_coords_pairwise(filepath, pair_name_to_tsNew)
+        compound_name_to_ttl_timestamps = read_corrected_ttl_timestamps(filepath, storesList)
+
+        logger.debug("Removing artifacts from the data...")
+        name_to_data, pair_name_to_timestamps, compound_name_to_ttl_timestamps = remove_artifacts(
+            timeForLightsTurnOn,
+            storesList,
+            pair_name_to_tsNew,
+            pair_name_to_sampling_rate,
+            pair_name_to_coords,
+            name_to_data,
+            compound_name_to_ttl_timestamps,
+            method=artifactsRemovalMethod,
+        )
+
+        write_artifact_removal(filepath, name_to_data, pair_name_to_timestamps, compound_name_to_ttl_timestamps)
+        visualizeControlAndSignal(filepath, removeArtifacts=True)
+
+        writeToFile(str(10 + ((inputParameters["step"] + 1) * 10)) + "\n")
+        inputParameters["step"] += 1
+
+    plt.show()
+    logger.info("Artifact removal completed.")
 
 
 # function to combine data when there are two different data files for the same recording session
 # it will combine the data, do timestamps processing and save the combined data in the first output folder.
-def combineData(folderNames, inputParameters, storesList):
-
+def execute_combine_data(folderNames, inputParameters, storesList):
     logger.debug("Combining Data from different data files...")
     timeForLightsTurnOn = inputParameters["timeForLightsTurnOn"]
     op_folder = []
@@ -1117,64 +436,12 @@ def combineData(folderNames, inputParameters, storesList):
     op = get_all_stores_for_combining_data(op_folder)
 
     # processing timestamps for combining the data
-    processTimestampsForCombiningData(op, timeForLightsTurnOn, storesList, sampling_rate[0])
+    combine_data(op, timeForLightsTurnOn, storesList, sampling_rate[0])
     logger.info("Data is combined from different data files.")
 
     return op
 
 
-# function to compute z-score and deltaF/F using functions : compute_z_score and/or processTimestampsForArtifacts
-def execute_zscore(folderNames, inputParameters):
-
-    timeForLightsTurnOn = inputParameters["timeForLightsTurnOn"]
-    remove_artifacts = inputParameters["removeArtifacts"]
-    artifactsRemovalMethod = inputParameters["artifactsRemovalMethod"]
-    plot_zScore_dff = inputParameters["plot_zScore_dff"]
-    combine_data = inputParameters["combine_data"]
-    isosbestic_control = inputParameters["isosbestic_control"]
-
-    storesListPath = []
-    for i in range(len(folderNames)):
-        if combine_data == True:
-            storesListPath.append([folderNames[i][0]])
-        else:
-            filepath = folderNames[i]
-            storesListPath.append(takeOnlyDirs(glob.glob(os.path.join(filepath, "*_output_*"))))
-
-    storesListPath = np.concatenate(storesListPath)
-
-    for j in range(len(storesListPath)):
-        filepath = storesListPath[j]
-        storesList = np.genfromtxt(os.path.join(filepath, "storesList.csv"), dtype="str", delimiter=",").reshape(2, -1)
-
-        if remove_artifacts == True:
-            logger.debug("Removing Artifacts from the data and correcting timestamps...")
-            compute_z_score(filepath, inputParameters)
-            if artifactsRemovalMethod == "concatenate":
-                processTimestampsForArtifacts(filepath, timeForLightsTurnOn, storesList)
-            else:
-                addingNaNtoChunksWithArtifacts(filepath, storesList)
-            visualizeControlAndSignal(filepath, remove_artifacts)
-            logger.info("Artifacts from the data are removed and timestamps are corrected.")
-        else:
-            compute_z_score(filepath, inputParameters)
-            visualizeControlAndSignal(filepath, remove_artifacts)
-
-        if plot_zScore_dff == "z_score":
-            visualize_z_score(filepath)
-        if plot_zScore_dff == "dff":
-            visualize_dff(filepath)
-        if plot_zScore_dff == "Both":
-            visualize_z_score(filepath)
-            visualize_dff(filepath)
-
-        writeToFile(str(10 + ((inputParameters["step"] + 1) * 10)) + "\n")
-        inputParameters["step"] += 1
-
-    plt.show()
-    logger.info("Signal data and event timestamps are extracted.")
-
-
 def extractTsAndSignal(inputParameters):
 
     logger.debug("Extracting signal data and event timestamps...")
@@ -1203,13 +470,17 @@ def extractTsAndSignal(inputParameters):
         writeToFile(str((pbMaxValue + 1) * 10) + "\n" + str(10) + "\n")
         execute_timestamp_correction(folderNames, inputParameters)
         execute_zscore(folderNames, inputParameters)
+        if remove_artifacts == True:
+            execute_artifact_removal(folderNames, inputParameters)
     else:
         pbMaxValue = 1 + len(folderNames)
         writeToFile(str((pbMaxValue) * 10) + "\n" + str(10) + "\n")
         execute_timestamp_correction(folderNames, inputParameters)
         storesList = check_storeslistfile(folderNames)
-        op_folder = combineData(folderNames, inputParameters, storesList)
+        op_folder = execute_combine_data(folderNames, inputParameters, storesList)
         execute_zscore(op_folder, inputParameters)
+        if remove_artifacts == True:
+            execute_artifact_removal(op_folder, inputParameters)
 
 
 def main(input_parameters):
diff --git a/step4_data_flow_analysis.md b/step4_data_flow_analysis.md
new file mode 100644
index 0000000..d86e938
--- /dev/null
+++ b/step4_data_flow_analysis.md
@@ -0,0 +1,348 @@
+# Step 4 (preprocess.py) Data Flow Analysis
+
+## Overview
+
+Step 4 processes timestamp-corrected photometry data and computes normalized signals (ΔF/F and z-scores). It handles artifact removal, data combination from multiple sessions, and generates quality control visualizations.
+
+## High-Level Data Flow
+
+```mermaid
+flowchart TD
+    A[Entry: extractTsAndSignal] --> B{combine_data?}
+
+    B -->|False| C[execute_timestamp_correction]
+    B -->|True| D[execute_timestamp_correction]
+
+    C --> E[execute_zscore]
+
+    D --> F[check_storeslistfile]
+    F --> G[combineData]
+    G --> H[execute_zscore]
+
+    E --> I[Output: z_score, dff, cntrl_sig_fit HDF5 files]
+    H --> I
+
+    style A fill:#e1f5ff
+    style I fill:#d4edda
+```
+
+## Main Processing Paths
+
+### Entry Point
+**`extractTsAndSignal(inputParameters)`** (line 1178) is the main entry point called by the GUI or API.
+
+### Path 1: Normal Processing (combine_data = False)
+1. `execute_timestamp_correction()` → Correct timestamps and align data
+2. `execute_zscore()` → Compute z-scores and ΔF/F
+
+### Path 2: Combined Data Processing (combine_data = True)
+1. `execute_timestamp_correction()` → Correct timestamps for each file
+2. `check_storeslistfile()` → Merge store lists from multiple files
+3. `combineData()` → Combine data from multiple recording sessions
+4. `execute_zscore()` → Compute z-scores and ΔF/F on combined data
+
+## Detailed Processing Stages
+
+### Stage 1: Timestamp Correction
+
+```mermaid
+flowchart LR
+    A[Raw HDF5 files] --> B[Read storesList.csv]
+    B --> C{isosbestic_control?}
+    C -->|No| D[add_control_channel]
+    C -->|Yes| E[timestampCorrection_tdt/csv]
+    D --> E
+    E --> F[Eliminate first N seconds]
+    F --> G[decide_naming_convention_and_applyCorrection]
+    G --> H[applyCorrection for each store]
+    H --> I{isosbestic_control?}
+    I -->|No| J[create_control_channel via curve fitting]
+    I -->|Yes| K[timeCorrection_*.hdf5 files]
+    J --> K
+
+    style A fill:#e1f5ff
+    style K fill:#d4edda
+```
+
+#### Function: `execute_timestamp_correction(folderNames, inputParameters)`
+
+**Input:**
+- Raw HDF5 files from extractors: `control_*.hdf5`, `signal_*.hdf5`, `event_*.hdf5`
+
+**Process:**
+1. For each session folder:
+   - Read `storesList.csv` (mapping of raw names to semantic names)
+   - If no isosbestic control: `add_control_channel()` creates placeholder control files
+   - **`timestampCorrection_tdt()`** or **`timestampCorrection_csv()`**:
+     - Eliminates first N seconds (`timeForLightsTurnOn`)
+     - For TDT: expands timestamps from block timestamps + sampling rate
+     - For CSV: uses timestamps as-is
+     - Writes `timeCorrection_*.hdf5` with keys: `timestampNew`, `correctionIndex`, `sampling_rate`
+   - **`decide_naming_convention_and_applyCorrection()`**:
+     - For each store, calls `applyCorrection()` to crop data using `correctionIndex`
+     - For control/signal channels: crops data arrays
+     - For event channels: subtracts time offset from timestamps
+   - If no isosbestic control: **`create_control_channel()`** generates synthetic control via curve fitting
+
+**Output:**
+- Timestamp-corrected HDF5 files with trimmed data
+- `timeCorrection_*.hdf5` files containing corrected timestamps
+
+### Stage 2: Z-Score Computation
+
+```mermaid
+flowchart TD
+    A[Timestamp-corrected HDF5] --> B[compute_z_score]
+    B --> C{removeArtifacts?}
+
+    C -->|No| D[helper_z_score: full data]
+    C -->|Yes| E[helper_z_score: chunk-by-chunk]
+
+    D --> F[filterSignal]
+    E --> F
+
+    F --> G[controlFit: linear regression]
+    G --> H[deltaFF: compute ΔF/F]
+    H --> I[z_score_computation]
+
+    I --> J{removeArtifacts?}
+
+    J -->|No| K[Write z_score, dff, cntrl_sig_fit]
+    J -->|Yes| L{artifactsRemovalMethod?}
+
+    L -->|concatenate| M[processTimestampsForArtifacts]
+    L -->|NaN| N[addingNaNtoChunksWithArtifacts]
+
+    M --> K
+    N --> K
+
+    K --> O[visualizeControlAndSignal]
+
+    style A fill:#e1f5ff
+    style K fill:#d4edda
+    style O fill:#fff3cd
+```
+
+#### Function: `execute_zscore(folderNames, inputParameters)`
+
+**Input:**
+- Timestamp-corrected HDF5 files
+
+**Process:**
+1. For each output folder:
+
+   **`compute_z_score(filepath, inputParameters)`**:
+   - For each control/signal pair:
+     - **`helper_z_score(control, signal, filepath, name, inputParameters)`**:
+
+       **Without artifacts removal:**
+       - `execute_controlFit_dff()`: Filter signals → fit control to signal → compute ΔF/F
+       - `z_score_computation()`: Compute z-score from ΔF/F
+
+       **With artifacts removal:**
+       - For each user-selected chunk (from `coordsForPreProcessing_*.npy`):
+         - If no isosbestic: `helper_create_control_channel()` creates synthetic control
+         - `execute_controlFit_dff()` on chunk
+       - Concatenate or NaN-fill between chunks
+       - `z_score_computation()` on processed data
+
+     - Writes: `z_score_*.hdf5`, `dff_*.hdf5`, `cntrl_sig_fit_*.hdf5`
+
+   **If artifacts removal with concatenate method:**
+   - **`processTimestampsForArtifacts()`**:
+     - `eliminateData()`: Concatenates good chunks, adjusts timestamps to be continuous
+     - `eliminateTs()`: Aligns event timestamps with new timeline
+     - Overwrites data files with concatenated versions
+
+   **If artifacts removal with NaN method:**
+   - **`addingNaNtoChunksWithArtifacts()`**:
+     - `addingNaNValues()`: Replaces bad chunks with NaN
+     - `removeTTLs()`: Filters event timestamps to keep only valid times
+
+   - **`visualizeControlAndSignal()`**: Plots control, signal, cntrl_sig_fit for QC
+
+**Output:**
+- `z_score_*.hdf5` (z-scored signal)
+- `dff_*.hdf5` (ΔF/F)
+- `cntrl_sig_fit_*.hdf5` (fitted control channel)
+
+## Key Data Transformations
+
+### Signal Processing Pipeline
+
+```mermaid
+flowchart LR
+    A[Raw Signal] --> B[filterSignal: Moving Average]
+    C[Raw Control] --> D[filterSignal: Moving Average]
+
+    B --> E[controlFit: Linear Regression]
+    D --> E
+
+    E --> F[control_fit = p0*control + p1]
+    F --> G[deltaFF]
+
+    B --> G
+
+    G --> H[ΔF/F = signal - control_fit / control_fit * 100]
+    H --> I[z_score_computation]
+
+    I --> J{zscore_method?}
+    J -->|standard| K[z = ΔF/F - mean / std]
+    J -->|baseline| L[z = ΔF/F - baseline_mean / baseline_std]
+    J -->|robust| M[z = 0.6745 * ΔF/F - median / MAD]
+
+    K --> N[Z-Score Output]
+    L --> N
+    M --> N
+
+    style A fill:#e1f5ff
+    style C fill:#e1f5ff
+    style N fill:#d4edda
+```
+
+### Transformation Functions
+
+1. **`filterSignal(filter_window, signal)`** (line 822)
+   - Applies moving average filter with configurable window
+   - Uses `scipy.signal.filtfilt` for zero-phase filtering
+
+2. **`controlFit(control, signal)`** (line 815)
+   - Linear regression: fits control to signal
+   - Returns: `fitted_control = p[0] * control + p[1]`
+
+3. **`deltaFF(signal, control)`** (line 804)
+   - Formula: `((signal - control) / control) * 100`
+   - Computes normalized fluorescence change
+
+4. **`z_score_computation(dff, timestamps, inputParameters)`** (line 853)
+   - **Standard z-score:** `(ΔF/F - mean(ΔF/F)) / std(ΔF/F)`
+   - **Baseline z-score:** `(ΔF/F - mean(baseline)) / std(baseline)`
+   - **Robust z-score:** `0.6745 * (ΔF/F - median) / MAD`
+
+## Artifact Removal Workflow
+
+### Interactive Artifact Selection
+
+The `visualize()` function (line 469) provides an interactive matplotlib plot:
+- **Space key:** Mark artifact boundary (vertical line drawn)
+- **'d' key:** Delete last marked boundary
+- **Close plot:** Save coordinates to `coordsForPreProcessing_*.npy`
+
+### Two Removal Methods
+
+**Concatenate Method:**
+- Removes artifact chunks completely
+- Concatenates good chunks end-to-end
+- Adjusts timestamps to be continuous
+- Event timestamps realigned to new timeline
+
+**NaN Method:**
+- Replaces artifact chunks with NaN values
+- Preserves original timeline
+- Filters out event timestamps in artifact regions
+
+## Supporting Functions
+
+### Control Channel Creation
+
+**`helper_create_control_channel(signal, timestamps, window)`** (line 69)
+- Used when no isosbestic control is available
+- Applies Savitzky-Golay filter to signal
+- Fits to exponential function: `f(x) = a + b * exp(-(1/c) * x)`
+- Returns synthetic control channel
+
+### Data Combination
+
+**`combineData(folderNames, inputParameters, storesList)`** (line 1084)
+- Merges data from multiple recording sessions
+- Validates that sampling rates match across sessions
+- Calls `processTimestampsForCombiningData()` to align timelines
+- Saves combined data to first output folder
+
+### Coordinate Fetching
+
+**`fetchCoords(filepath, naming, data)`** (line 610)
+- Reads `coordsForPreProcessing_*.npy` (artifact boundary coordinates)
+- If file doesn't exist: uses `[0, data[-1]]` (entire recording)
+- Validates even number of coordinates (pairs of boundaries)
+- Returns reshaped array of coordinate pairs
+
+## File I/O Summary
+
+### Files Read
+
+| File Pattern | Content | Source |
+|-------------|---------|--------|
+| `control_*.hdf5` | Control channel data | Extractors (Step 3) |
+| `signal_*.hdf5` | Signal channel data | Extractors (Step 3) |
+| `event_*.hdf5` | Event timestamps | Extractors (Step 3) |
+| `storesList.csv` | Channel name mapping | Step 2 |
+| `coordsForPreProcessing_*.npy` | Artifact boundaries | User selection (optional) |
+
+### Files Written
+
+| File Pattern | Content | Keys |
+|-------------|---------|------|
+| `timeCorrection_*.hdf5` | Corrected timestamps | `timestampNew`, `correctionIndex`, `sampling_rate`, `timeRecStart` (TDT only) |
+| `z_score_*.hdf5` | Z-scored signal | `data` |
+| `dff_*.hdf5` | ΔF/F signal | `data` |
+| `cntrl_sig_fit_*.hdf5` | Fitted control | `data` |
+| `event_*_*.hdf5` | Corrected event timestamps | `ts` |
+
+## Key Parameters from inputParameters
+
+| Parameter | Purpose | Default/Options |
+|-----------|---------|-----------------|
+| `timeForLightsTurnOn` | Seconds to eliminate from start | 1 |
+| `filter_window` | Moving average window size | 100 |
+| `isosbestic_control` | Use isosbestic control channel? | True/False |
+| `removeArtifacts` | Enable artifact removal? | True/False |
+| `artifactsRemovalMethod` | How to handle artifacts | "concatenate" / "NaN" |
+| `zscore_method` | Z-score computation method | "standard z-score" / "baseline z-score" / "robust z-score" |
+| `baselineWindowStart` | Baseline window start (seconds) | 0 |
+| `baselineWindowEnd` | Baseline window end (seconds) | 0 |
+| `combine_data` | Combine multiple recordings? | True/False |
+
+## Architecture Notes for Refactoring
+
+### Current Coupling Issues
+
+1. **GUI Progress Tracking:** `writeToFile()` writes to `~/pbSteps.txt` for progress bar updates (lines 36-38, 1042, 1171, 1203, 1208, 1220)
+2. **Interactive Plotting:** `visualize()` requires user interaction (matplotlib event handlers)
+3. **File Path Assumptions:** Hard-coded path patterns (`*_output_*`, naming conventions)
+4. **Mixed Responsibilities:** Single functions handle both computation and I/O
+
+### Recommended Separation Points
+
+**Backend Analysis Layer Should Include:**
+- `filterSignal()` - pure signal processing
+- `controlFit()` - pure regression
+- `deltaFF()` - pure computation
+- `z_score_computation()` - pure statistical computation
+- `helper_create_control_channel()` - algorithmic control generation
+- Core timestamp correction logic (separated from I/O)
+- Core artifact removal logic (separated from I/O)
+
+**Data I/O Layer Should Include:**
+- `read_hdf5()`, `write_hdf5()` - file operations
+- Store list reading/writing
+- Coordinate file handling
+- HDF5 file discovery and path management
+
+**Frontend Visualization Layer Should Include:**
+- `visualize()` - interactive artifact selection
+- `visualizeControlAndSignal()` - QC plots
+- `visualize_z_score()`, `visualize_dff()` - result visualization
+- Progress tracking callbacks (replace `writeToFile()`)
+
+### Potential Refactoring Strategy
+
+1. **Extract pure computation functions** into a `signal_processing` module
+2. **Create data models** (dataclasses) for:
+   - TimeCorrectionResult
+   - ProcessedSignal (with z_score, dff, control_fit)
+   - ArtifactRegions
+3. **Separate I/O operations** into `io_utils` module with consistent interfaces
+4. **Create processing pipelines** that accept data objects, return data objects
+5. **Move visualization to separate module** with callbacks for progress/interaction
+6. **Use dependency injection** for progress callbacks instead of hard-coded file writes
diff --git a/timestamp_correction_analysis.md b/timestamp_correction_analysis.md
new file mode 100644
index 0000000..121aa3f
--- /dev/null
+++ b/timestamp_correction_analysis.md
@@ -0,0 +1,723 @@
+# Timestamp Correction Module Analysis
+
+## Overview
+
+The `timestamp_correction.py` module handles the correction of timestamps for photometry data, including:
+- Eliminating the first N seconds of recording (light stabilization period)
+- Expanding TDT block timestamps into continuous timestamps
+- Creating synthetic control channels when no isosbestic control is present
+- Applying corrections to both data channels and event markers
+
+## Module Structure
+
+### Entry Point from preprocess.py
+
+```python
+execute_timestamp_correction(folderNames, inputParameters)  # preprocess.py:212
+```
+
+This orchestrator loops through all session folders and calls functions in this module.
+
+## Two-Phase Control Channel Creation Pattern
+
+### Understanding add_control_channel vs create_control_channel
+
+These two functions work together in a **two-phase process** to handle synthetic control channel generation. They are **not redundant** but serve distinct purposes:
+
+#### Phase 1: `add_control_channel` (Called BEFORE timestamp correction)
+
+**Execution:** Line 229 in `execute_timestamp_correction`
+
+**Purpose:** Create **PLACEHOLDER** control files to satisfy workflow requirements
+
+**What it does:**
+1. Validates that if `isosbestic_control=False`, no real control channels exist
+2. For each signal channel without a matching control:
+   - Copies the raw signal HDF5 file to `cntrl{i}.hdf5` (placeholder)
+   - Adds entry to storesList: `[["cntrl{i}"], ["control_{region}"]]`
+3. Saves updated `storesList.csv`
+
+**Files created:**
+- `cntrl0.hdf5`, `cntrl1.hdf5`, etc. (copies of **RAW** signal data)
+- Updated `storesList.csv` with placeholder entries
+
+**Why it's needed:**
+- Timestamp correction workflow expects **paired** control/signal channels in storesList
+- Without placeholders, the pairing logic in `timestampCorrection_xxx` and `check_cntrl_sig_length` would fail
+- The placeholder **data is never actually used** - it just satisfies structural requirements
+
+#### Phase 2: `create_control_channel` (Called AFTER timestamp correction)
+
+**Execution:** Line 243 in `execute_timestamp_correction`
+
+**Purpose:** Generate **ACTUAL** synthetic control via curve fitting and overwrite placeholders
+
+**What it does:**
+1. Looks for placeholder files (checks: `"control" in event_name.lower() and "cntrl" in event.lower()`)
+2. Reads the **CORRECTED** signal data: `signal_{region}.hdf5` (after timestamp correction)
+3. Calls `helper_create_control_channel()` to:
+   - Apply Savitzky-Golay filter to cleaned signal
+   - Fit to exponential function: `f(x) = a + b * exp(-(1/c) * x)`
+4. **OVERWRITES** the placeholder `control_{region}.hdf5` with real synthetic control
+5. Also exports to CSV format (legacy)
+
+**Files written:**
+- `control_{region}.hdf5` → `data` (replaces placeholder with curve-fitted control)
+- `{raw_name}.csv` (timestamps, data, sampling_rate columns)
+
+**Why it's separate:**
+- Requires **timestamp-corrected** signal data (doesn't exist until after lines 232-239)
+- Curve fitting algorithm needs clean timestamps (first N seconds eliminated)
+- Cannot be done before timestamp correction without re-correcting the synthetic control
+
+#### Execution Timeline
+
+```python
+# When isosbestic_control == False:
+
+# ========== PHASE 1: BEFORE TIMESTAMP CORRECTION ==========
+# Line 229: Create placeholders (just file copies)
+storesList = add_control_channel(filepath, storesList)
+# Result: storesList now has paired structure
+#   [["Dv1A", "cntrl0"], ["signal_dms", "control_dms"]]
+# Files: cntrl0.hdf5 (copy of raw signal, never used)
+
+# ========== TIMESTAMP CORRECTION PHASE ==========
+# Lines 232-234: Process both signal AND placeholder control
+timestampCorrection_tdt(filepath, timeForLightsTurnOn, storesList)
+# Result: Creates timeCorrection_dms.hdf5 with correctionIndex
+
+# Lines 236-239: Apply corrections to all channels
+decide_naming_convention_and_applyCorrection(...)
+# Result: signal_dms.hdf5 now contains corrected signal data
+#         control_dms.hdf5 still contains uncorrected placeholder copy
+
+# ========== PHASE 2: AFTER TIMESTAMP CORRECTION ==========
+# Line 243: Generate REAL synthetic controls
+create_control_channel(filepath, storesList, window=101)
+# Result: control_dms.hdf5 OVERWRITTEN with curve-fitted synthetic control
+#         Now contains valid control data derived from corrected signal
+```
+
+#### Why This Design Exists
+
+This is a **chicken-and-egg problem solved with placeholders:**
+
+1. **Requirement:** Timestamp correction expects paired control/signal channels
+2. **Constraint:** Synthetic control generation requires timestamp-corrected signal data
+3. **Solution:** Create dummy placeholders → correct everything → replace placeholders with real data
+
+#### Visual Flow
+
+```mermaid
+flowchart TD
+    A[isosbestic_control = False] --> B[add_control_channel]
+    B --> C[Copy signal.hdf5 to cntrl0.hdf5]
+    C --> D[Update storesList.csv]
+
+    D --> E[timestampCorrection_xxx]
+    E --> F[Creates timeCorrection_dms.hdf5]
+
+    F --> G[decide_naming_convention_and_applyCorrection]
+    G --> H[Corrects signal_dms.hdf5]
+    G --> I[Corrects control_dms.hdf5<br/>still contains placeholder]
+
+    I --> J[create_control_channel]
+    J --> K[Read corrected signal_dms.hdf5]
+    K --> L[helper_create_control_channel<br/>curve fit]
+    L --> M[OVERWRITE control_dms.hdf5<br/>with synthetic control]
+
+    style C fill:#fff3cd
+    style I fill:#fff3cd
+    style M fill:#d4edda
+```
+
+#### Refactoring Opportunity
+
+This placeholder pattern is a **code smell** indicating potential design improvements:
+
+**Issues:**
+1. **Unnecessary I/O:** Placeholder files are written and then overwritten
+2. **Confusing flow:** Hard to understand that placeholders are temporary
+3. **Tight coupling:** Timestamp correction assumes paired files exist
+4. **Wasted computation:** Placeholder controls get timestamp-corrected unnecessarily
+
+**Potential Improvements:**
+
+**Option 1: Lazy Control Creation**
+- Modify timestamp correction to handle missing controls gracefully
+- Only create synthetic controls after all corrections complete
+- Remove placeholder file creation entirely
+
+**Option 2: Data Structure Refactoring**
+- Use a data structure that doesn't require physical paired files upfront
+- Track "needs synthetic control" as metadata rather than file presence
+- Generate and write controls only once at the end
+
+**Option 3: Two-Pass Workflow**
+- First pass: Correct only signal channels
+- Second pass: Generate synthetic controls from corrected signals
+- Would require refactoring `check_cntrl_sig_length` and pairing logic
+
+## Function Catalog
+
+### 1. add_control_channel
+**Location:** `timestamp_correction.py:20`
+**Purpose:** Create placeholder control channel files when no isosbestic control exists
+
+```python
+def add_control_channel(filepath, arr) -> arr
+```
+
+**Input:**
+- `filepath`: Path to session output folder
+- `arr`: 2D array `[[storenames], [storesList]]` from storesList.csv
+
+**Process:**
+1. Validates that control/signal pairs match (raises error if mismatched)
+2. For each signal channel without a matching control:
+   - Copies signal HDF5 file to `cntrl{i}.hdf5` (placeholder)
+   - Adds entry to storesList array: `[["cntrl{i}"], ["control_{region}"]]`
+3. Writes updated storesList.csv
+
+**Output:**
+- Updated `arr` with new control channel entries
+- **Files Written:** Updated `storesList.csv`, copied `cntrl*.hdf5` files
+
+**I/O Summary:**
+- **Reads:** Signal HDF5 files (via shutil.copyfile)
+- **Writes:** `storesList.csv`, placeholder `cntrl*.hdf5` files
+
+---
+
+### 2. timestampCorrection_csv
+**Location:** `timestamp_correction.py:65`
+**Purpose:** Correct timestamps for CSV-format data (Doric, NPM, custom CSV)
+
+```python
+def timestampCorrection_csv(filepath, timeForLightsTurnOn, storesList)
+```
+
+**Input:**
+- `filepath`: Path to session output folder
+- `timeForLightsTurnOn`: Seconds to eliminate from start (default: 1)
+- `storesList`: 2D array `[[storenames], [storesList]]`
+
+**Process:**
+1. Filters storesList to control/signal channels only
+2. Pairs control/signal channels, validates naming matches
+3. Calls `check_cntrl_sig_length()` to determine which channel to use (shorter one)
+4. For each control/signal pair:
+   - **Reads:** `timestamps` and `sampling_rate` from raw HDF5
+   - **Computes:** `correctionIndex = np.where(timestamp >= timeForLightsTurnOn)`
+   - **Writes:** `timeCorrection_{region}.hdf5` with keys:
+     - `timestampNew`: Corrected timestamps
+     - `correctionIndex`: Indices to keep
+     - `sampling_rate`: Sampling rate
+
+**Output:**
+- **Files Written:** `timeCorrection_{region}.hdf5` for each control/signal pair
+
+**I/O Summary:**
+- **Reads:** `{storename}.hdf5` → `timestamps`, `sampling_rate`
+- **Writes:** `timeCorrection_{region}.hdf5` → `timestampNew`, `correctionIndex`, `sampling_rate`
+
+---
+
+### 3. timestampCorrection_tdt
+**Location:** `timestamp_correction.py:115`
+**Purpose:** Correct timestamps for TDT-format data (expands block timestamps)
+
+```python
+def timestampCorrection_tdt(filepath, timeForLightsTurnOn, storesList)
+```
+
+**Input:** Same as `timestampCorrection_csv`
+
+**Process:**
+1. Filters storesList to control/signal channels only
+2. Pairs control/signal channels, validates naming matches
+3. Calls `check_cntrl_sig_length()` to determine which channel to use
+4. For each control/signal pair:
+   - **Reads:** `timestamps`, `npoints`, `sampling_rate` from raw HDF5
+   - **TDT-specific expansion algorithm:**
+     ```python
+     timeRecStart = timestamp[0]
+     timestamps = np.subtract(timestamp, timeRecStart)  # Zero-base
+     adder = np.arange(npoints) / sampling_rate         # Within-block offsets
+     # Expand: for each block timestamp, add within-block offsets
+     timestampNew = np.zeros((len(timestamps), lengthAdder))
+     for i in range(lengthAdder):
+         timestampNew[:, i] = np.add(timestamps, adder[i])
+     timestampNew = (timestampNew.T).reshape(-1, order="F")  # Flatten
+     correctionIndex = np.where(timestampNew >= timeForLightsTurnOn)
+     timestampNew = timestampNew[correctionIndex]
+     ```
+   - **Writes:** `timeCorrection_{region}.hdf5` with keys:
+     - `timeRecStart`: Recording start time (TDT-specific)
+     - `timestampNew`: Expanded, corrected timestamps
+     - `correctionIndex`: Indices to keep
+     - `sampling_rate`: Sampling rate
+
+**Output:**
+- **Files Written:** `timeCorrection_{region}.hdf5` with TDT-specific `timeRecStart` key
+
+**I/O Summary:**
+- **Reads:** `{storename}.hdf5` → `timestamps`, `npoints`, `sampling_rate`
+- **Writes:** `timeCorrection_{region}.hdf5` → `timeRecStart`, `timestampNew`, `correctionIndex`, `sampling_rate`
+
+---
+
+### 4. check_cntrl_sig_length
+**Location:** `timestamp_correction.py:273`
+**Purpose:** Determine which channel (control or signal) to use as reference based on length
+
+```python
+def check_cntrl_sig_length(filepath, channels_arr, storenames, storesList) -> indices
+```
+
+**Input:**
+- `filepath`: Path to session output folder
+- `channels_arr`: Paired control/signal array `[["control_A", "control_B"], ["signal_A", "signal_B"]]`
+- `storenames`: Raw HDF5 filenames
+- `storesList`: Semantic channel names
+
+**Process:**
+1. For each control/signal pair:
+   - **Reads:** `data` from both control and signal HDF5
+   - Compares lengths: `control.shape[0]` vs `signal.shape[0]`
+   - Returns the shorter one's storename (or signal if equal)
+
+**Output:**
+- List of storenames to use for timestamp correction (one per pair)
+
+**I/O Summary:**
+- **Reads:** `{control_storename}.hdf5` → `data`, `{signal_storename}.hdf5` → `data`
+
+**Note:** This is a pure analysis function but performs I/O to determine which data to use.
+
+---
+
+### 5. decide_naming_convention_and_applyCorrection
+**Location:** `timestamp_correction.py:178`
+**Purpose:** Loop through all channels and apply timestamp corrections
+
+```python
+def decide_naming_convention_and_applyCorrection(filepath, timeForLightsTurnOn, event, displayName, storesList)
+```
+
+**Input:**
+- `filepath`: Path to session output folder
+- `timeForLightsTurnOn`: Seconds eliminated from start
+- `event`: Raw storename (e.g., "Dv1A")
+- `displayName`: Semantic name (e.g., "control_DMS")
+- `storesList`: Full storesList array
+
+**Process:**
+1. Filters storesList to control/signal channels
+2. Pairs channels and validates naming conventions
+3. For each pair, calls `applyCorrection(filepath, timeForLightsTurnOn, event, displayName, region)`
+
+**Output:**
+- Delegates to `applyCorrection()` (no direct I/O)
+
+---
+
+### 6. applyCorrection
+**Location:** `timestamp_correction.py:205`
+**Purpose:** Apply timestamp corrections to data channels or event markers
+
+```python
+def applyCorrection(filepath, timeForLightsTurnOn, event, displayName, naming)
+```
+
+**Input:**
+- `filepath`: Path to session output folder
+- `timeForLightsTurnOn`: Seconds eliminated from start
+- `event`: Raw storename
+- `displayName`: Semantic display name
+- `naming`: Region identifier (e.g., "dms")
+
+**Process:**
+
+**For Control/Signal Channels:**
+1. **Reads:** `timeCorrection_{naming}.hdf5` → `correctionIndex`
+2. **Reads:** `{event}.hdf5` → `data`
+3. **Applies:** `arr = arr[correctionIndex]` (crops data)
+4. **Writes:** `{displayName}.hdf5` → `data` (overwrites with corrected data)
+
+**For Event Channels:**
+1. Detects TDT format: `check_TDT(os.path.dirname(filepath))`
+2. **Reads:** `timeCorrection_{naming}.hdf5` → `timeRecStart` (if TDT)
+3. **Reads:** `{event}.hdf5` → `timestamps`
+4. **Applies corrections:**
+   - If TDT and timestamps >= timeRecStart: subtract both `timeRecStart` and `timeForLightsTurnOn`
+   - Otherwise: subtract only `timeForLightsTurnOn`
+5. **Writes:** `{event}_{naming}.hdf5` → `ts` (corrected event timestamps)
+
+**Output:**
+- **Files Written:**
+  - `{displayName}.hdf5` → `data` (for control/signal)
+  - `{event}_{naming}.hdf5` → `ts` (for events)
+
+**I/O Summary:**
+- **Reads:** `timeCorrection_{naming}.hdf5`, `{event}.hdf5`
+- **Writes:** `{displayName}.hdf5` or `{event}_{naming}.hdf5`
+
+---
+
+### 7. create_control_channel
+**Location:** `timestamp_correction.py:247`
+**Purpose:** Generate synthetic control channel using curve fitting (when no isosbestic control exists)
+
+```python
+def create_control_channel(filepath, arr, window=5001)
+```
+
+**Input:**
+- `filepath`: Path to session output folder
+- `arr`: storesList array `[[storenames], [storesList]]`
+- `window`: Savitzky-Golay filter window (default: 5001)
+
+**Process:**
+1. Loops through storesList to find placeholder control channels (`cntrl` in storename)
+2. For each placeholder:
+   - **Reads:** `signal_{region}.hdf5` → `data` (corrected signal)
+   - **Reads:** `timeCorrection_{region}.hdf5` → `timestampNew`, `sampling_rate`
+   - **Calls:** `helper_create_control_channel(signal, timestampNew, window)` from `control_channel.py`
+     - Applies Savitzky-Golay filter
+     - Fits to exponential: `f(x) = a + b * exp(-(1/c) * x)`
+   - **Writes:** `{control_name}.hdf5` → `data` (synthetic control)
+   - **Writes:** `{event_name}.csv` with columns: `timestamps`, `data`, `sampling_rate`
+
+**Output:**
+- **Files Written:**
+  - `control_{region}.hdf5` → `data` (replaces placeholder)
+  - `{raw_name}.csv` (legacy format export)
+
+**I/O Summary:**
+- **Reads:** `signal_{region}.hdf5` → `data`, `timeCorrection_{region}.hdf5` → `timestampNew`, `sampling_rate`
+- **Writes:** `control_{region}.hdf5` → `data`, `{raw_name}.csv`
+
+---
+
+## Data Flow Diagram
+
+### High-Level Flow (called from execute_timestamp_correction)
+
+```mermaid
+flowchart TD
+    A[execute_timestamp_correction] --> B[Read storesList.csv]
+    B --> C{isosbestic_control?}
+
+    C -->|False| D[add_control_channel]
+    C -->|True| E{Check format}
+    D --> E
+
+    E -->|TDT| F[timestampCorrection_tdt]
+    E -->|CSV/Doric/NPM| G[timestampCorrection_csv]
+
+    F --> H[Loop: decide_naming_convention_and_applyCorrection]
+    G --> H
+
+    H --> I[For each store: applyCorrection]
+
+    I --> J{isosbestic_control?}
+    J -->|False| K[create_control_channel]
+    J -->|True| L[Done]
+    K --> L
+
+    style A fill:#e1f5ff
+    style L fill:#d4edda
+```
+
+### Detailed Flow: timestampCorrection Functions
+
+```mermaid
+flowchart LR
+    A[Raw HDF5 files] --> B[check_cntrl_sig_length]
+    B --> C[Read control & signal data]
+    C --> D[Return shorter channel name]
+
+    D --> E{Format?}
+    E -->|CSV| F[timestampCorrection_csv]
+    E -->|TDT| G[timestampCorrection_tdt]
+
+    F --> H[Read timestamps from selected channel]
+    G --> I[Read timestamps, npoints, sampling_rate]
+
+    H --> J[correctionIndex = where >= timeForLightsTurnOn]
+    I --> K[Expand block timestamps]
+    K --> J
+
+    J --> L[Write timeCorrection_{region}.hdf5]
+
+    style A fill:#e1f5ff
+    style L fill:#d4edda
+```
+
+### Detailed Flow: applyCorrection
+
+```mermaid
+flowchart TD
+    A[applyCorrection called] --> B{Channel type?}
+
+    B -->|control/signal| C[Read correctionIndex]
+    B -->|event| D[Read event timestamps]
+
+    C --> E[Read raw data]
+    E --> F[data = data correctionIndex]
+    F --> G[Write displayName.hdf5]
+
+    D --> H{TDT format?}
+    H -->|Yes| I[Read timeRecStart]
+    H -->|No| J[ts -= timeForLightsTurnOn]
+
+    I --> K[ts -= timeRecStart]
+    K --> J
+    J --> L[Write event_region.hdf5]
+
+    style A fill:#e1f5ff
+    style G fill:#d4edda
+    style L fill:#d4edda
+```
+
+### Detailed Flow: Control Channel Creation
+
+```mermaid
+flowchart LR
+    A[add_control_channel] --> B[For each signal without control]
+    B --> C[Copy signal.hdf5 to cntrl_i.hdf5]
+    C --> D[Update storesList.csv]
+
+    D --> E[... timestamp correction ...]
+
+    E --> F[create_control_channel]
+    F --> G[For each cntrl_i placeholder]
+    G --> H[Read signal_{region}.hdf5]
+    H --> I[helper_create_control_channel]
+    I --> J[Savitzky-Golay filter]
+    J --> K[Curve fit to exponential]
+    K --> L[Write control_{region}.hdf5]
+    L --> M[Export to CSV]
+
+    style A fill:#fff3cd
+    style M fill:#d4edda
+```
+
+## Execution Order in execute_timestamp_correction
+
+```python
+# preprocess.py:212-247
+for each session in folderNames:
+    for each output_folder in session:
+        # Step 1: Read metadata
+        storesList = np.genfromtxt("storesList.csv")
+
+        # Step 2: Add placeholder controls if needed
+        if isosbestic_control == False:
+            storesList = add_control_channel(filepath, storesList)
+
+        # Step 3: Compute correctionIndex and timestampNew
+        if check_TDT(folderName):
+            timestampCorrection_tdt(filepath, timeForLightsTurnOn, storesList)
+        else:
+            timestampCorrection_csv(filepath, timeForLightsTurnOn, storesList)
+
+        # Step 4: Apply corrections to all channels/events
+        for each store in storesList:
+            decide_naming_convention_and_applyCorrection(
+                filepath, timeForLightsTurnOn, storename, displayName, storesList
+            )
+            # ^ This calls applyCorrection for each channel
+
+        # Step 5: Generate synthetic controls via curve fitting
+        if isosbestic_control == False:
+            create_control_channel(filepath, storesList, window=101)
+```
+
+## File I/O Summary
+
+### Files Read
+
+| Function | Files Read | Keys |
+|----------|-----------|------|
+| `add_control_channel` | `signal_*.hdf5` (for copying) | - |
+| `timestampCorrection_csv` | `{storename}.hdf5` | `timestamps`, `sampling_rate` |
+| `timestampCorrection_tdt` | `{storename}.hdf5` | `timestamps`, `npoints`, `sampling_rate` |
+| `check_cntrl_sig_length` | `control_*.hdf5`, `signal_*.hdf5` | `data` |
+| `applyCorrection` | `timeCorrection_{region}.hdf5`<br>`{event}.hdf5` | `correctionIndex`, `timeRecStart` (TDT)<br>`data` or `timestamps` |
+| `create_control_channel` | `signal_{region}.hdf5`<br>`timeCorrection_{region}.hdf5` | `data`<br>`timestampNew`, `sampling_rate` |
+
+### Files Written
+
+| Function | Files Written | Keys | Notes |
+|----------|--------------|------|-------|
+| `add_control_channel` | `storesList.csv`<br>`cntrl{i}.hdf5` | -<br>(copy of signal) | Placeholder files |
+| `timestampCorrection_csv` | `timeCorrection_{region}.hdf5` | `timestampNew`, `correctionIndex`, `sampling_rate` | One per region |
+| `timestampCorrection_tdt` | `timeCorrection_{region}.hdf5` | `timeRecStart`, `timestampNew`, `correctionIndex`, `sampling_rate` | TDT-specific |
+| `applyCorrection` | `{displayName}.hdf5`<br>`{event}_{region}.hdf5` | `data`<br>`ts` | Overwrites with corrected data |
+| `create_control_channel` | `control_{region}.hdf5`<br>`{raw_name}.csv` | `data`<br>timestamps, data, sampling_rate | Replaces placeholder |
+
+## Key Transformations
+
+### 1. Timestamp Expansion (TDT only)
+
+**Input:** Block timestamps (one per acquisition block)
+**Algorithm:**
+```python
+timeRecStart = timestamp[0]
+timestamps = timestamp - timeRecStart  # Zero-base
+adder = np.arange(npoints) / sampling_rate  # Within-block offsets [0, 1/fs, 2/fs, ...]
+# Matrix multiplication to expand:
+timestampNew = zeros((n_blocks, npoints))
+for i in range(npoints):
+    timestampNew[:, i] = timestamps + adder[i]
+timestampNew = timestampNew.T.reshape(-1, order='F')  # Column-major flatten
+```
+**Output:** Continuous timestamps at full sampling rate
+
+### 2. Correction Index Computation
+
+**Input:** Timestamps array, `timeForLightsTurnOn`
+**Algorithm:**
+```python
+correctionIndex = np.where(timestamp >= timeForLightsTurnOn)[0]
+```
+**Output:** Indices of timestamps to keep (after eliminating first N seconds)
+
+### 3. Data Cropping
+
+**Applied to:** Control/signal data channels
+**Algorithm:**
+```python
+data_corrected = data[correctionIndex]
+```
+
+### 4. Event Timestamp Adjustment
+
+**Applied to:** Event markers (TTL pulses)
+**Algorithm:**
+```python
+# CSV format:
+ts_corrected = ts - timeForLightsTurnOn
+
+# TDT format (if ts >= timeRecStart):
+ts_corrected = ts - timeRecStart - timeForLightsTurnOn
+```
+
+### 5. Synthetic Control Generation
+
+**Input:** Signal channel (already corrected)
+**Algorithm:**
+1. Apply Savitzky-Golay filter: `filtered_signal = savgol_filter(signal, window, polyorder=3)`
+2. Curve fit to exponential: `control = a + b * exp(-(1/c) * t)`
+3. Return fitted curve as synthetic control
+
+## Analysis for I/O Separation
+
+### Pure Analysis Functions (Minimal I/O)
+These could be extracted with I/O injected:
+- ❌ None - all functions perform substantial I/O
+
+### Orchestration Functions (Heavy I/O, Light Analysis)
+These coordinate reading/writing and delegate computation:
+- `add_control_channel` - File copying and CSV writing
+- `decide_naming_convention_and_applyCorrection` - Loops and delegates
+- `create_control_channel` - Orchestrates read → process → write
+
+### Mixed Functions (I/O + Analysis)
+These perform both I/O and computation inline:
+- `timestampCorrection_csv` - Reads data, computes correctionIndex, writes results
+- `timestampCorrection_tdt` - Reads data, expands timestamps, computes correctionIndex, writes
+- `applyCorrection` - Reads multiple files, applies transformations, writes
+- `check_cntrl_sig_length` - Reads data just to compare lengths
+
+## Refactoring Recommendations for I/O Separation
+
+### Option 1: Extract Pure Computation Functions
+
+Create new pure functions:
+```python
+# Pure analysis (no I/O)
+def compute_correction_index(timestamps, timeForLightsTurnOn):
+    return np.where(timestamps >= timeForLightsTurnOn)[0]
+
+def expand_tdt_timestamps(block_timestamps, npoints, sampling_rate):
+    # TDT expansion algorithm
+    ...
+    return expanded_timestamps
+
+def crop_data_by_index(data, correctionIndex):
+    return data[correctionIndex]
+
+def adjust_event_timestamps(ts, timeRecStart, timeForLightsTurnOn, is_tdt):
+    # Event adjustment logic
+    ...
+    return adjusted_ts
+```
+
+Then modify existing functions to use these pure functions, keeping I/O separate.
+
+### Option 2: Reader/Writer Pattern
+
+Create dedicated I/O classes:
+```python
+class TimestampCorrectionReader:
+    def read_raw_timestamps(self, filepath, storename):
+        ...
+
+    def read_correction_data(self, filepath, region):
+        ...
+
+class TimestampCorrectionWriter:
+    def write_correction_file(self, filepath, region, data):
+        ...
+
+    def write_corrected_data(self, filepath, displayName, data):
+        ...
+```
+
+### Option 3: Data Class Pattern
+
+Return data objects instead of writing directly:
+```python
+@dataclass
+class TimestampCorrection:
+    timestampNew: np.ndarray
+    correctionIndex: np.ndarray
+    sampling_rate: float
+    timeRecStart: Optional[float] = None  # TDT only
+
+def timestampCorrection_tdt(...) -> TimestampCorrection:
+    # Compute all values
+    return TimestampCorrection(
+        timestampNew=...,
+        correctionIndex=...,
+        sampling_rate=...,
+        timeRecStart=...
+    )
+
+# Separate writer function
+def write_timestamp_correction(filepath, region, correction: TimestampCorrection):
+    write_hdf5(correction.timestampNew, f"timeCorrection_{region}", filepath, "timestampNew")
+    # ... etc
+```
+
+## Current I/O Patterns to Refactor
+
+1. **Inline writes in computation functions:**
+   - `timestampCorrection_csv` and `timestampCorrection_tdt` compute AND write
+   - Should separate: compute → return data → write in caller
+
+2. **Reading for validation only:**
+   - `check_cntrl_sig_length` reads full data arrays just to compare shapes
+   - Could be optimized to read only array metadata/shapes
+
+3. **Side-effect file creation:**
+   - `add_control_channel` creates files as side effect
+   - `create_control_channel` both generates data AND writes multiple formats (HDF5 + CSV)
+
+4. **Mixed responsibilities in applyCorrection:**
+   - Handles both control/signal cropping AND event timestamp adjustment
+   - Could be split into two separate functions