From ba71cf520ed172233690f7275f0f2d7edae8574c Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Wed, 19 Nov 2025 11:43:08 +0000 Subject: [PATCH 1/4] Initial plan From 95766807008946751bdc0ed62b20fe119919a560 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Wed, 19 Nov 2025 11:52:24 +0000 Subject: [PATCH 2/4] Add ion mobility and mobilogram support - Added float_data_arrays property to Py_MSSpectrum for accessing/setting float data arrays - Added ion_mobility property for convenient access to ion mobility data - Added drift_time property for spectrum-level drift time - Updated to_dataframe() to include float data arrays as columns - Updated from_dataframe() to create float data arrays from DataFrame columns - Created Py_Mobilogram wrapper class for MSChromatogram (mobilograms) - Added comprehensive tests for all new functionality - All tests passing (93 tests, 86% coverage) Co-authored-by: timosachsenberg <5803621+timosachsenberg@users.noreply.github.com> --- openms_python/__init__.py | 2 + openms_python/py_mobilogram.py | 297 ++++++++++++++++++++++++++++++++ openms_python/py_msspectrum.py | 301 +++++++++++++++++++++++---------- tests/test_py_mobilogram.py | 188 ++++++++++++++++++++ tests/test_py_msspectrum.py | 136 +++++++++++++++ 5 files changed, 839 insertions(+), 85 deletions(-) create mode 100644 openms_python/py_mobilogram.py create mode 100644 tests/test_py_mobilogram.py diff --git a/openms_python/__init__.py b/openms_python/__init__.py index 750d0d7..fe9bfaa 100644 --- a/openms_python/__init__.py +++ b/openms_python/__init__.py @@ -23,6 +23,7 @@ from .py_msexperiment import Py_MSExperiment from .py_msspectrum import Py_MSSpectrum +from .py_mobilogram import Py_Mobilogram from .py_feature import Py_Feature from .py_featuremap import Py_FeatureMap from .py_consensusmap import Py_ConsensusMap @@ -99,6 +100,7 @@ def get_example(name: str, *, load: bool = False, target_dir: Union[str, Path, N __all__ = [ "Py_MSExperiment", "Py_MSSpectrum", + "Py_Mobilogram", "Py_Feature", "Py_FeatureMap", "Py_ConsensusMap", diff --git a/openms_python/py_mobilogram.py b/openms_python/py_mobilogram.py new file mode 100644 index 0000000..e00a924 --- /dev/null +++ b/openms_python/py_mobilogram.py @@ -0,0 +1,297 @@ +""" +Pythonic wrapper for pyOpenMS MSChromatogram for mobilogram representation. + +A mobilogram is a chromatogram in the ion mobility dimension, representing +intensity vs. drift time for a specific m/z value. +""" + +from typing import Tuple, Optional +import numpy as np +import pandas as pd +import pyopenms as oms + +from ._meta_mapping import MetaInfoMappingMixin + + +class Py_Mobilogram(MetaInfoMappingMixin): + """ + A Pythonic wrapper around pyOpenMS MSChromatogram for mobilograms. + + A mobilogram represents the ion mobility dimension for a specific m/z, + showing intensity vs. drift time (or other ion mobility values). + + Example: + >>> mob = Py_Mobilogram(native_chromatogram) + >>> print(f"m/z: {mob.mz:.4f}") + >>> print(f"Number of points: {len(mob)}") + >>> drift_times, intensities = mob.peaks + >>> df = mob.to_dataframe() + """ + + def __init__(self, native_chromatogram: oms.MSChromatogram): + """ + Initialize Mobilogram wrapper. + + Args: + native_chromatogram: pyOpenMS MSChromatogram object + """ + self._chromatogram = native_chromatogram + + # ==================== Meta-info support ==================== + + def _meta_object(self) -> oms.MetaInfoInterface: + return self._chromatogram + + # ==================== Pythonic Properties ==================== + + @property + def name(self) -> str: + """Get the name of this mobilogram.""" + return self._chromatogram.getName() + + @name.setter + def name(self, value: str): + """Set the name of this mobilogram.""" + self._chromatogram.setName(value) + + @property + def mz(self) -> Optional[float]: + """ + Get the m/z value this mobilogram represents. + + Returns from metadata if available, None otherwise. + """ + if self._chromatogram.metaValueExists("mz"): + return float(self._chromatogram.getMetaValue("mz")) + return None + + @mz.setter + def mz(self, value: float): + """Set the m/z value this mobilogram represents.""" + self._chromatogram.setMetaValue("mz", value) + + @property + def drift_time(self) -> np.ndarray: + """ + Get drift time values as a NumPy array. + + Returns: + NumPy array of drift time values + """ + rt, _ = self.peaks + return rt + + @property + def intensity(self) -> np.ndarray: + """ + Get intensity values as a NumPy array. + + Returns: + NumPy array of intensity values + """ + _, intensity = self.peaks + return intensity + + @property + def peaks(self) -> Tuple[np.ndarray, np.ndarray]: + """ + Get mobilogram data as NumPy arrays. + + Returns: + Tuple of (drift_time_array, intensity_array) + """ + rt, intensity = self._chromatogram.get_peaks() + return np.array(rt), np.array(intensity) + + @peaks.setter + def peaks(self, values: Tuple[np.ndarray, np.ndarray]): + """ + Set mobilogram data from NumPy arrays. + + Args: + values: Tuple of (drift_time_array, intensity_array) + """ + drift_time, intensity = values + # Clear existing peaks + self._chromatogram.clear(False) + # Add new peaks + for dt, i in zip(drift_time, intensity): + peak = oms.ChromatogramPeak() + peak.setRT(float(dt)) + peak.setIntensity(float(i)) + self._chromatogram.push_back(peak) + + @property + def total_ion_current(self) -> float: + """Get total ion current (sum of all intensities).""" + return float(np.sum(self.intensity)) + + @property + def base_peak_drift_time(self) -> Optional[float]: + """Get drift time of the base peak (most intense point).""" + if len(self) == 0: + return None + drift_time, intensities = self.peaks + return float(drift_time[np.argmax(intensities)]) + + @property + def base_peak_intensity(self) -> Optional[float]: + """Get intensity of the base peak.""" + if len(self) == 0: + return None + return float(np.max(self.intensity)) + + # ==================== Magic Methods ==================== + + def __len__(self) -> int: + """Return number of points in the mobilogram.""" + return self._chromatogram.size() + + def __repr__(self) -> str: + """Return string representation.""" + mz_str = f"m/z={self.mz:.4f}" if self.mz is not None else "m/z=unset" + return f"Mobilogram({mz_str}, points={len(self)}, " f"TIC={self.total_ion_current:.2e})" + + def __str__(self) -> str: + """Return human-readable string.""" + return self.__repr__() + + # ==================== Conversion Methods ==================== + + def to_numpy(self) -> np.ndarray: + """ + Convert mobilogram to NumPy arrays. + + Returns: + Tuple of (drift_time_array, intensity_array) + """ + return np.array(self.peaks) + + def to_dataframe(self) -> pd.DataFrame: + """ + Convert mobilogram to pandas DataFrame. + + Returns: + DataFrame with columns: drift_time, intensity + + Example: + >>> df = mob.to_dataframe() + >>> df.head() + drift_time intensity + 0 1.5 100.0 + 1 2.0 150.0 + ... + """ + drift_time, intensity = self.peaks + data = {"drift_time": drift_time, "intensity": intensity} + + # Add m/z as a column if available + if self.mz is not None: + data["mz"] = self.mz + + return pd.DataFrame(data) + + @classmethod + def from_dataframe(cls, df: pd.DataFrame, **metadata) -> "Py_Mobilogram": + """ + Create mobilogram from pandas DataFrame. + + Args: + df: DataFrame with 'drift_time' and 'intensity' columns + **metadata: Optional metadata (name, mz, etc.) + + Returns: + Mobilogram object + + Example: + >>> df = pd.DataFrame({ + ... 'drift_time': [1.5, 2.0, 2.5], + ... 'intensity': [100, 150, 120] + ... }) + >>> mob = Py_Mobilogram.from_dataframe(df, mz=500.0, name="mobilogram_500") + """ + chrom = oms.MSChromatogram() + + # Add peaks + for dt, intensity in zip(df["drift_time"].values, df["intensity"].values): + peak = oms.ChromatogramPeak() + peak.setRT(float(dt)) + peak.setIntensity(float(intensity)) + chrom.push_back(peak) + + # Set metadata + if "name" in metadata: + chrom.setName(metadata["name"]) + + # Get m/z from metadata or DataFrame + mz_value = metadata.get("mz") + if mz_value is None and "mz" in df.columns: + # Extract m/z from DataFrame (take first value if it's a column) + mz_value = float(df["mz"].iloc[0]) + + if mz_value is not None: + chrom.setMetaValue("mz", float(mz_value)) + + # Mark as mobilogram type + chrom.setMetaValue("chromatogram_type", "mobilogram") + + return cls(chrom) + + @classmethod + def from_arrays( + cls, + drift_time: np.ndarray, + intensity: np.ndarray, + mz: Optional[float] = None, + name: Optional[str] = None, + ) -> "Py_Mobilogram": + """ + Create mobilogram from NumPy arrays. + + Args: + drift_time: Array of drift time values + intensity: Array of intensity values + mz: Optional m/z value this mobilogram represents + name: Optional name for the mobilogram + + Returns: + Mobilogram object + + Example: + >>> mob = Py_Mobilogram.from_arrays( + ... np.array([1.5, 2.0, 2.5]), + ... np.array([100, 150, 120]), + ... mz=500.0 + ... ) + """ + chrom = oms.MSChromatogram() + + # Add peaks + for dt, i in zip(drift_time, intensity): + peak = oms.ChromatogramPeak() + peak.setRT(float(dt)) + peak.setIntensity(float(i)) + chrom.push_back(peak) + + # Set metadata + if name: + chrom.setName(name) + if mz is not None: + chrom.setMetaValue("mz", float(mz)) + + # Mark as mobilogram type + chrom.setMetaValue("chromatogram_type", "mobilogram") + + return cls(chrom) + + # ==================== Access to Native Object ==================== + + @property + def native(self) -> oms.MSChromatogram: + """ + Get the underlying pyOpenMS MSChromatogram object. + + Use this when you need to access pyOpenMS-specific methods + not wrapped by this class. + """ + return self._chromatogram diff --git a/openms_python/py_msspectrum.py b/openms_python/py_msspectrum.py index 77713a2..bcfa9b3 100644 --- a/openms_python/py_msspectrum.py +++ b/openms_python/py_msspectrum.py @@ -14,10 +14,10 @@ class Py_MSSpectrum(MetaInfoMappingMixin): """ A Pythonic wrapper around pyOpenMS MSSpectrum. - + This class provides intuitive properties and methods for working with mass spectra, hiding the verbose C++ API underneath. - + Example: >>> spec = Spectrum(native_spectrum) >>> print(f"RT: {spec.retention_time:.2f} seconds") @@ -27,11 +27,11 @@ class Py_MSSpectrum(MetaInfoMappingMixin): ... print("This is an MS1 spectrum") >>> peaks_df = spec.to_dataframe() """ - + def __init__(self, native_spectrum: oms.MSSpectrum): """ Initialize Spectrum wrapper. - + Args: native_spectrum: pyOpenMS MSSpectrum object """ @@ -41,39 +41,39 @@ def __init__(self, native_spectrum: oms.MSSpectrum): def _meta_object(self) -> oms.MetaInfoInterface: return self._spectrum - + # ==================== Pythonic Properties ==================== - + @property def retention_time(self) -> float: """Get retention time in seconds.""" return self._spectrum.getRT() - + @retention_time.setter def retention_time(self, value: float): """Set retention time in seconds.""" self._spectrum.setRT(value) - + @property def ms_level(self) -> int: """Get MS level (1 for MS1, 2 for MS2, etc.).""" return self._spectrum.getMSLevel() - + @ms_level.setter def ms_level(self, value: int): """Set MS level.""" self._spectrum.setMSLevel(value) - + @property def is_ms1(self) -> bool: """Check if this is an MS1 spectrum.""" return self.ms_level == 1 - + @property def is_ms2(self) -> bool: """Check if this is an MS2 spectrum.""" return self.ms_level == 2 - + @property def precursor_mz(self) -> Optional[float]: """Get precursor m/z for MS2+ spectra, None for MS1.""" @@ -83,7 +83,7 @@ def precursor_mz(self) -> Optional[float]: if precursors: return precursors[0].getMZ() return None - + @property def precursor_charge(self) -> Optional[int]: """Get precursor charge for MS2+ spectra, None for MS1.""" @@ -118,7 +118,7 @@ def precursor_mass(self) -> Optional[float]: def native_id(self) -> str: """Get native ID of the spectrum.""" return self._spectrum.getNativeID() - + @property def scan_number(self) -> int: """Get scan number (extracted from native ID or -1 if not available).""" @@ -130,13 +130,13 @@ def scan_number(self) -> int: except (ValueError, IndexError): pass return -1 - + @property def total_ion_current(self) -> float: """Get total ion current (sum of all peak intensities).""" _, intensities = self.peaks return float(np.sum(intensities)) - + @property def base_peak_mz(self) -> Optional[float]: """Get m/z of the base peak (most intense peak).""" @@ -144,7 +144,7 @@ def base_peak_mz(self) -> Optional[float]: return None mz, intensities = self.peaks return float(mz[np.argmax(intensities)]) - + @property def base_peak_intensity(self) -> Optional[float]: """Get intensity of the base peak.""" @@ -152,23 +152,23 @@ def base_peak_intensity(self) -> Optional[float]: return None _, intensities = self.peaks return float(np.max(intensities)) - + @property def peaks(self) -> Tuple[np.ndarray, np.ndarray]: """ Get peaks as NumPy arrays. - + Returns: Tuple of (mz_array, intensity_array) """ mz, intensity = self._spectrum.get_peaks() return np.array(mz), np.array(intensity) - + @peaks.setter def peaks(self, values: Tuple[np.ndarray, np.ndarray]): """ Set peaks from NumPy arrays. - + Args: values: Tuple of (mz_array, intensity_array) """ @@ -186,14 +186,122 @@ def intensity(self) -> np.ndarray: """Get intensity values as a NumPy array.""" _, intensity = self.peaks return intensity - + + @property + def float_data_arrays(self) -> list: + """ + Get float data arrays attached to this spectrum. + + Returns: + List of pyOpenMS FloatDataArray objects + + Example: + >>> arrays = spec.float_data_arrays + >>> for arr in arrays: + ... print(f"Array: {arr.getName()}, size: {arr.size()}") + """ + return self._spectrum.getFloatDataArrays() + + @float_data_arrays.setter + def float_data_arrays(self, arrays: list): + """ + Set float data arrays on this spectrum. + + Args: + arrays: List of pyOpenMS FloatDataArray objects + """ + self._spectrum.setFloatDataArrays(arrays) + + @property + def ion_mobility(self) -> Optional[np.ndarray]: + """ + Get ion mobility values as a NumPy array if available. + + Returns: + NumPy array of ion mobility values, or None if not available + + Example: + >>> if spec.ion_mobility is not None: + ... print(f"Ion mobility: {spec.ion_mobility}") + """ + float_arrays = self.float_data_arrays + for arr in float_arrays: + name = arr.getName().lower() + if "ion" in name and "mobility" in name or name == "ion_mobility": + return np.array([arr[i] for i in range(arr.size())]) + return None + + @ion_mobility.setter + def ion_mobility(self, values: np.ndarray): + """ + Set ion mobility values for this spectrum. + + Creates or updates a FloatDataArray named 'ion_mobility' with the provided values. + The array should have the same length as the number of peaks. + + Args: + values: NumPy array of ion mobility values + + Example: + >>> spec.ion_mobility = np.array([1.5, 2.3, 3.1]) + """ + if len(values) != len(self): + raise ValueError( + f"Ion mobility array length ({len(values)}) must match " + f"number of peaks ({len(self)})" + ) + + # Get existing arrays + float_arrays = self.float_data_arrays + + # Find or create ion mobility array + ion_mobility_array = None + ion_mobility_index = -1 + for i, arr in enumerate(float_arrays): + name = arr.getName().lower() + if "ion" in name and "mobility" in name or name == "ion_mobility": + ion_mobility_array = arr + ion_mobility_index = i + break + + if ion_mobility_array is None: + # Create new array + ion_mobility_array = oms.FloatDataArray() + ion_mobility_array.setName("ion_mobility") + float_arrays.append(ion_mobility_array) + + # Clear and set new values + ion_mobility_array.clear() + for val in values: + ion_mobility_array.push_back(float(val)) + + # Update the arrays on the spectrum + if ion_mobility_index >= 0: + float_arrays[ion_mobility_index] = ion_mobility_array + + self.float_data_arrays = float_arrays + + @property + def drift_time(self) -> float: + """ + Get the drift time of this spectrum. + + Returns: + Drift time value, or -1.0 if not set + """ + return self._spectrum.getDriftTime() + + @drift_time.setter + def drift_time(self, value: float): + """Set the drift time of this spectrum.""" + self._spectrum.setDriftTime(value) # ==================== Magic Methods ==================== - + def __len__(self) -> int: """Return number of peaks in the spectrum.""" return self._spectrum.size() - + def __repr__(self) -> str: """Return string representation.""" ms_info = f"MS{self.ms_level}" @@ -203,59 +311,66 @@ def __repr__(self) -> str: f"Spectrum(rt={self.retention_time:.2f}s, {ms_info}, " f"peaks={len(self)}, TIC={self.total_ion_current:.2e})" ) - + def __str__(self) -> str: """Return human-readable string.""" return self.__repr__() - def __iter__(self): """Allow dict(self) and list(self) conversions.""" yield "mz", self.mz.tolist() yield "intens", self.intensity.tolist() - - # ==================== Conversion Methods ==================== def to_numpy(self) -> np.ndarray: """ Convert spectrum peaks to NumPy arrays. - + Returns: Tuple of (mz_array, intensity_array) - + Example: >>> mz, intensity = spec.to_numpy() """ return np.array(self.peaks) - def to_dataframe(self) -> pd.DataFrame: + def to_dataframe(self, include_float_arrays: bool = True) -> pd.DataFrame: """ Convert spectrum peaks to pandas DataFrame. - + + Args: + include_float_arrays: If True, include float data arrays as columns + Returns: - DataFrame with columns: mz, intensity - + DataFrame with columns: mz, intensity, and any float data arrays + Example: >>> df = spec.to_dataframe() >>> df.head() - mz intensity - 0 100.0500 1250.5 - 1 200.1234 5678.2 + mz intensity ion_mobility + 0 100.0500 1250.5 1.5 + 1 200.1234 5678.2 2.3 ... """ mz, intensity = self.peaks - return pd.DataFrame({ - 'mz': mz, - 'intensity': intensity - }) - + data = {"mz": mz, "intensity": intensity} + + # Add float data arrays if requested + if include_float_arrays: + float_arrays = self.float_data_arrays + for arr in float_arrays: + name = arr.getName() + if name and arr.size() == len(mz): + data[name] = np.array([arr[i] for i in range(arr.size())]) + + return pd.DataFrame(data) + @classmethod - def from_numpy(mz: np.ndarray, intensity: np.ndarray) -> 'Py_MSSpectrum': + def from_numpy(mz: np.ndarray, intensity: np.ndarray) -> "Py_MSSpectrum": """ Create spectrum from NumPy arrays. - + Args: mz: Array of m/z values intensity: Array of intensity values @@ -264,99 +379,115 @@ def from_numpy(mz: np.ndarray, intensity: np.ndarray) -> 'Py_MSSpectrum': spec.set_peaks((mz.tolist(), intensity.tolist())) return Py_MSSpectrum(spec) - @classmethod - def from_dataframe(cls, df: pd.DataFrame, **metadata) -> 'Py_MSSpectrum': + def from_dataframe(cls, df: pd.DataFrame, **metadata) -> "Py_MSSpectrum": """ Create spectrum from pandas DataFrame. - + Args: - df: DataFrame with 'mz' and 'intensity' columns + df: DataFrame with 'mz' and 'intensity' columns, and optionally other float data arrays **metadata: Optional metadata (retention_time, ms_level, etc.) - + Returns: Spectrum object - + Example: - >>> df = pd.DataFrame({'mz': [100, 200], 'intensity': [50, 100]}) + >>> df = pd.DataFrame({ + ... 'mz': [100, 200], + ... 'intensity': [50, 100], + ... 'ion_mobility': [1.5, 2.3] + ... }) >>> spec = Spectrum.from_dataframe(df, retention_time=60.5, ms_level=1) """ spec = oms.MSSpectrum() - spec.set_peaks((df['mz'].values.tolist(), df['intensity'].values.tolist())) - + spec.set_peaks((df["mz"].values.tolist(), df["intensity"].values.tolist())) + # Set metadata - if 'retention_time' in metadata: - spec.setRT(metadata['retention_time']) - if 'ms_level' in metadata: - spec.setMSLevel(metadata['ms_level']) - if 'native_id' in metadata: - spec.setNativeID(metadata['native_id']) - + if "retention_time" in metadata: + spec.setRT(metadata["retention_time"]) + if "ms_level" in metadata: + spec.setMSLevel(metadata["ms_level"]) + if "native_id" in metadata: + spec.setNativeID(metadata["native_id"]) + + # Add float data arrays from DataFrame columns (excluding mz and intensity) + float_arrays = [] + for col in df.columns: + if col not in ["mz", "intensity"]: + fda = oms.FloatDataArray() + fda.setName(col) + for val in df[col].values: + fda.push_back(float(val)) + float_arrays.append(fda) + + if float_arrays: + spec.setFloatDataArrays(float_arrays) + return cls(spec) - + # ==================== Data Manipulation ==================== - def filter_by_mz(self, min_mz: float, max_mz: float) -> 'Py_MSSpectrum': + def filter_by_mz(self, min_mz: float, max_mz: float) -> "Py_MSSpectrum": """ Filter peaks by m/z range. - + Args: min_mz: Minimum m/z value max_mz: Maximum m/z value - + Returns: New Spectrum with filtered peaks """ mz, intensity = self.peaks mask = (mz >= min_mz) & (mz <= max_mz) - + new_spec = oms.MSSpectrum() new_spec.set_peaks((mz[mask].tolist(), intensity[mask].tolist())) new_spec.setRT(self.retention_time) new_spec.setMSLevel(self.ms_level) new_spec.setNativeID(self.native_id) - + return Py_MSSpectrum(new_spec) - - def filter_by_intensity(self, min_intensity: float) -> 'Py_MSSpectrum': + + def filter_by_intensity(self, min_intensity: float) -> "Py_MSSpectrum": """ Filter peaks by minimum intensity. - + Args: min_intensity: Minimum intensity threshold - + Returns: New Spectrum with filtered peaks """ mz, intensity = self.peaks mask = intensity >= min_intensity - + new_spec = oms.MSSpectrum() new_spec.set_peaks((mz[mask].tolist(), intensity[mask].tolist())) new_spec.setRT(self.retention_time) new_spec.setMSLevel(self.ms_level) new_spec.setNativeID(self.native_id) - + return Py_MSSpectrum(new_spec) - def top_n_peaks(self, n: int) -> 'Py_MSSpectrum': + def top_n_peaks(self, n: int) -> "Py_MSSpectrum": """ Keep only the top N most intense peaks. - + Args: n: Number of peaks to keep - + Returns: New Spectrum with top N peaks """ mz, intensity = self.peaks if len(mz) <= n: return self - + # Get indices of top N peaks top_indices = np.argsort(intensity)[-n:] top_indices = np.sort(top_indices) # Keep m/z order - + new_spec = oms.MSSpectrum() new_spec.set_peaks((mz[top_indices].tolist(), intensity[top_indices].tolist())) new_spec.setRT(self.retention_time) @@ -365,7 +496,7 @@ def top_n_peaks(self, n: int) -> 'Py_MSSpectrum': return Py_MSSpectrum(new_spec) - def normalize_to_tic(self) -> 'Py_MSSpectrum': + def normalize_to_tic(self) -> "Py_MSSpectrum": """Scale intensities so their sum equals one.""" mz, intensity = self.peaks @@ -377,22 +508,22 @@ def normalize_to_tic(self) -> 'Py_MSSpectrum': normalized_spec.set_peaks((mz.tolist(), (intensity / total).tolist())) return Py_MSSpectrum(normalized_spec) - def normalize_intensity(self, max_value: float = 100.0) -> 'Py_MSSpectrum': + def normalize_intensity(self, max_value: float = 100.0) -> "Py_MSSpectrum": """ Normalize peak intensities to a maximum value. - + Args: max_value: Target maximum intensity (default: 100.0) - + Returns: New Spectrum with normalized intensities """ mz, intensity = self.peaks if len(intensity) == 0 or np.max(intensity) == 0: return self - + normalized = intensity * (max_value / np.max(intensity)) - + new_spec = oms.MSSpectrum() new_spec.set_peaks((mz.tolist(), normalized.tolist())) new_spec.setRT(self.retention_time) @@ -402,12 +533,12 @@ def normalize_intensity(self, max_value: float = 100.0) -> 'Py_MSSpectrum': return Py_MSSpectrum(new_spec) # ==================== Access to Native Object ==================== - + @property def native(self) -> oms.MSSpectrum: """ Get the underlying pyOpenMS MSSpectrum object. - + Use this when you need to access pyOpenMS-specific methods not wrapped by this class. """ diff --git a/tests/test_py_mobilogram.py b/tests/test_py_mobilogram.py new file mode 100644 index 0000000..2c71e05 --- /dev/null +++ b/tests/test_py_mobilogram.py @@ -0,0 +1,188 @@ +import numpy as np +import pandas as pd +import pytest + +oms = pytest.importorskip("pyopenms") + +from openms_python.py_mobilogram import Py_Mobilogram + + +def create_native_chromatogram(): + """Create a native pyOpenMS MSChromatogram for testing.""" + chrom = oms.MSChromatogram() + chrom.setName("test_mobilogram") + chrom.setMetaValue("mz", 500.0) + chrom.setMetaValue("chromatogram_type", "mobilogram") + + # Add some peaks + for dt, intensity in [(1.5, 100.0), (2.0, 150.0), (2.5, 120.0), (3.0, 80.0)]: + peak = oms.ChromatogramPeak() + peak.setRT(dt) + peak.setIntensity(intensity) + chrom.push_back(peak) + + return chrom + + +def test_py_mobilogram_properties(): + """Test basic mobilogram properties.""" + wrapper = Py_Mobilogram(create_native_chromatogram()) + + assert wrapper.name == "test_mobilogram" + assert wrapper.mz == pytest.approx(500.0) + assert len(wrapper) == 4 + + # Test drift time and intensity + drift_time = wrapper.drift_time + intensity = wrapper.intensity + + assert len(drift_time) == 4 + assert len(intensity) == 4 + assert np.allclose(drift_time, [1.5, 2.0, 2.5, 3.0]) + assert np.allclose(intensity, [100.0, 150.0, 120.0, 80.0]) + + # Test peaks property + dt, i = wrapper.peaks + assert np.allclose(dt, drift_time) + assert np.allclose(i, intensity) + + +def test_py_mobilogram_setters(): + """Test mobilogram setters.""" + chrom = oms.MSChromatogram() + wrapper = Py_Mobilogram(chrom) + + # Set name + wrapper.name = "my_mobilogram" + assert wrapper.name == "my_mobilogram" + + # Set m/z + wrapper.mz = 600.0 + assert wrapper.mz == pytest.approx(600.0) + + # Set peaks + new_dt = np.array([1.0, 2.0, 3.0]) + new_intensity = np.array([50.0, 100.0, 75.0]) + wrapper.peaks = (new_dt, new_intensity) + + assert len(wrapper) == 3 + assert np.allclose(wrapper.drift_time, new_dt) + assert np.allclose(wrapper.intensity, new_intensity) + + +def test_py_mobilogram_statistics(): + """Test mobilogram statistical properties.""" + wrapper = Py_Mobilogram(create_native_chromatogram()) + + assert wrapper.total_ion_current == pytest.approx(450.0) + assert wrapper.base_peak_drift_time == pytest.approx(2.0) + assert wrapper.base_peak_intensity == pytest.approx(150.0) + + +def test_py_mobilogram_to_dataframe(): + """Test conversion to pandas DataFrame.""" + wrapper = Py_Mobilogram(create_native_chromatogram()) + + df = wrapper.to_dataframe() + + assert "drift_time" in df.columns + assert "intensity" in df.columns + assert "mz" in df.columns + + assert len(df) == 4 + assert np.allclose(df["drift_time"], [1.5, 2.0, 2.5, 3.0]) + assert np.allclose(df["intensity"], [100.0, 150.0, 120.0, 80.0]) + assert np.all(df["mz"] == 500.0) + + +def test_py_mobilogram_from_dataframe(): + """Test creation from pandas DataFrame.""" + df = pd.DataFrame({ + "drift_time": [1.5, 2.0, 2.5], + "intensity": [100.0, 150.0, 120.0] + }) + + wrapper = Py_Mobilogram.from_dataframe(df, mz=500.0, name="test_mob") + + assert wrapper.name == "test_mob" + assert wrapper.mz == pytest.approx(500.0) + assert len(wrapper) == 3 + assert np.allclose(wrapper.drift_time, [1.5, 2.0, 2.5]) + assert np.allclose(wrapper.intensity, [100.0, 150.0, 120.0]) + + +def test_py_mobilogram_from_arrays(): + """Test creation from NumPy arrays.""" + drift_time = np.array([1.0, 2.0, 3.0, 4.0]) + intensity = np.array([50.0, 100.0, 75.0, 25.0]) + + wrapper = Py_Mobilogram.from_arrays(drift_time, intensity, mz=600.0, name="array_mob") + + assert wrapper.name == "array_mob" + assert wrapper.mz == pytest.approx(600.0) + assert len(wrapper) == 4 + assert np.allclose(wrapper.drift_time, drift_time) + assert np.allclose(wrapper.intensity, intensity) + + +def test_py_mobilogram_roundtrip(): + """Test DataFrame round-trip conversion.""" + # Create from arrays + drift_time = np.array([1.5, 2.0, 2.5, 3.0]) + intensity = np.array([100.0, 150.0, 120.0, 80.0]) + + mob1 = Py_Mobilogram.from_arrays(drift_time, intensity, mz=500.0) + + # Convert to DataFrame + df = mob1.to_dataframe() + + # Create from DataFrame + mob2 = Py_Mobilogram.from_dataframe(df, name="roundtrip_mob") + + # Verify they match + assert len(mob1) == len(mob2) + assert np.allclose(mob1.drift_time, mob2.drift_time) + assert np.allclose(mob1.intensity, mob2.intensity) + assert mob1.mz == mob2.mz + + +def test_py_mobilogram_repr(): + """Test string representation.""" + wrapper = Py_Mobilogram(create_native_chromatogram()) + + repr_str = repr(wrapper) + assert "Mobilogram" in repr_str + assert "m/z=500" in repr_str + assert "points=4" in repr_str + + # Test mobilogram without m/z + chrom = oms.MSChromatogram() + peak = oms.ChromatogramPeak() + peak.setRT(1.0) + peak.setIntensity(100.0) + chrom.push_back(peak) + + wrapper_no_mz = Py_Mobilogram(chrom) + repr_str_no_mz = repr(wrapper_no_mz) + assert "m/z=unset" in repr_str_no_mz + + +def test_py_mobilogram_empty(): + """Test empty mobilogram.""" + chrom = oms.MSChromatogram() + wrapper = Py_Mobilogram(chrom) + + assert len(wrapper) == 0 + assert wrapper.base_peak_drift_time is None + assert wrapper.base_peak_intensity is None + assert wrapper.total_ion_current == 0.0 + + +def test_py_mobilogram_native_access(): + """Test access to native pyOpenMS object.""" + chrom = create_native_chromatogram() + wrapper = Py_Mobilogram(chrom) + + native = wrapper.native + assert isinstance(native, oms.MSChromatogram) + assert native.size() == 4 diff --git a/tests/test_py_msspectrum.py b/tests/test_py_msspectrum.py index 14a93d7..919578e 100644 --- a/tests/test_py_msspectrum.py +++ b/tests/test_py_msspectrum.py @@ -84,3 +84,139 @@ def test_py_msspectrum_dataframe_helpers_round_trip(): assert wrapper.ms_level == 1 assert wrapper.native_id == "scan=1" + +def test_py_msspectrum_float_data_arrays(): + """Test float data array support.""" + spec = oms.MSSpectrum() + spec.set_peaks(([100.0, 200.0, 300.0], [50.0, 100.0, 75.0])) + + wrapper = Py_MSSpectrum(spec) + + # Initially no float arrays + assert len(wrapper.float_data_arrays) == 0 + assert wrapper.ion_mobility is None + + # Add a float data array + fda = oms.FloatDataArray() + fda.setName("test_array") + fda.push_back(1.0) + fda.push_back(2.0) + fda.push_back(3.0) + + float_arrays = wrapper.float_data_arrays + float_arrays.append(fda) + wrapper.float_data_arrays = float_arrays + + # Verify it was added + assert len(wrapper.float_data_arrays) == 1 + assert wrapper.float_data_arrays[0].getName() == "test_array" + assert wrapper.float_data_arrays[0].size() == 3 + + +def test_py_msspectrum_ion_mobility(): + """Test ion mobility convenience properties.""" + spec = oms.MSSpectrum() + spec.set_peaks(([100.0, 200.0, 300.0], [50.0, 100.0, 75.0])) + + wrapper = Py_MSSpectrum(spec) + + # Initially no ion mobility + assert wrapper.ion_mobility is None + + # Set ion mobility + im_values = np.array([1.5, 2.3, 3.1]) + wrapper.ion_mobility = im_values + + # Verify it was set + assert wrapper.ion_mobility is not None + assert len(wrapper.ion_mobility) == 3 + assert np.allclose(wrapper.ion_mobility, im_values, rtol=1e-5) + + # Verify float array was created + assert len(wrapper.float_data_arrays) == 1 + assert wrapper.float_data_arrays[0].getName() == "ion_mobility" + + +def test_py_msspectrum_ion_mobility_wrong_length(): + """Test that setting ion mobility with wrong length raises error.""" + spec = oms.MSSpectrum() + spec.set_peaks(([100.0, 200.0, 300.0], [50.0, 100.0, 75.0])) + + wrapper = Py_MSSpectrum(spec) + + # Try to set ion mobility with wrong length + with pytest.raises(ValueError, match="Ion mobility array length"): + wrapper.ion_mobility = np.array([1.5, 2.3]) # Only 2 values, should be 3 + + +def test_py_msspectrum_drift_time(): + """Test drift time property.""" + spec = oms.MSSpectrum() + spec.set_peaks(([100.0, 200.0], [50.0, 100.0])) + + wrapper = Py_MSSpectrum(spec) + + # Initially -1.0 (not set) + assert wrapper.drift_time == -1.0 + + # Set drift time + wrapper.drift_time = 5.5 + assert wrapper.drift_time == pytest.approx(5.5) + + +def test_py_msspectrum_dataframe_with_float_arrays(): + """Test DataFrame conversion with float data arrays.""" + df = pd.DataFrame({ + "mz": [100.0, 200.0, 300.0], + "intensity": [50.0, 100.0, 75.0], + "ion_mobility": [1.5, 2.3, 3.1] + }) + + # Create spectrum from DataFrame + wrapper = Py_MSSpectrum.from_dataframe(df, retention_time=12.3, ms_level=1) + + # Verify peaks were set + assert len(wrapper) == 3 + assert np.allclose(wrapper.mz, [100.0, 200.0, 300.0]) + assert np.allclose(wrapper.intensity, [50.0, 100.0, 75.0]) + + # Verify ion mobility was set + assert wrapper.ion_mobility is not None + assert np.allclose(wrapper.ion_mobility, [1.5, 2.3, 3.1], rtol=1e-5) + + # Convert back to DataFrame + df_back = wrapper.to_dataframe(include_float_arrays=True) + assert "mz" in df_back.columns + assert "intensity" in df_back.columns + assert "ion_mobility" in df_back.columns + assert np.allclose(df_back["ion_mobility"], [1.5, 2.3, 3.1], rtol=1e-5) + + # Test without float arrays + df_no_arrays = wrapper.to_dataframe(include_float_arrays=False) + assert "mz" in df_no_arrays.columns + assert "intensity" in df_no_arrays.columns + assert "ion_mobility" not in df_no_arrays.columns + + +def test_py_msspectrum_dataframe_with_multiple_float_arrays(): + """Test DataFrame conversion with multiple float data arrays.""" + df = pd.DataFrame({ + "mz": [100.0, 200.0], + "intensity": [50.0, 100.0], + "ion_mobility": [1.5, 2.3], + "custom_array": [10.0, 20.0] + }) + + # Create spectrum from DataFrame + wrapper = Py_MSSpectrum.from_dataframe(df) + + # Verify both float arrays were created + assert len(wrapper.float_data_arrays) == 2 + + # Convert back to DataFrame + df_back = wrapper.to_dataframe(include_float_arrays=True) + assert "ion_mobility" in df_back.columns + assert "custom_array" in df_back.columns + assert np.allclose(df_back["ion_mobility"], [1.5, 2.3], rtol=1e-5) + assert np.allclose(df_back["custom_array"], [10.0, 20.0], rtol=1e-5) + From 5e9f88cae0175d789c258f39078ea76af730c0f7 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Wed, 19 Nov 2025 11:53:59 +0000 Subject: [PATCH 3/4] Update README with ion mobility and mobilogram documentation Co-authored-by: timosachsenberg <5803621+timosachsenberg@users.noreply.github.com> --- README.md | 93 +++++++++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 91 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 1a2ecb8..112f987 100644 --- a/README.md +++ b/README.md @@ -299,6 +299,77 @@ peaks_df = spec.to_dataframe() print(peaks_df.head()) ``` +### Ion Mobility Support + +`openms-python` provides comprehensive support for ion mobility data through float data arrays and mobilograms. + +#### Float Data Arrays + +Spectra can have additional data arrays (e.g., ion mobility values) associated with each peak: + +```python +from openms_python import Py_MSSpectrum +import pandas as pd +import numpy as np + +# Create a spectrum with ion mobility data +df = pd.DataFrame({ + 'mz': [100.0, 200.0, 300.0], + 'intensity': [50.0, 100.0, 75.0], + 'ion_mobility': [1.5, 2.3, 3.1] +}) + +spec = Py_MSSpectrum.from_dataframe(df, retention_time=60.5, ms_level=1) + +# Access ion mobility values +print(spec.ion_mobility) # array([1.5, 2.3, 3.1]) + +# Set ion mobility values +spec.ion_mobility = np.array([1.6, 2.4, 3.2]) + +# Convert to DataFrame with float arrays +df = spec.to_dataframe(include_float_arrays=True) +print(df) +# mz intensity ion_mobility +# 0 100.0 50.0 1.6 +# 1 200.0 100.0 2.4 +# 2 300.0 75.0 3.2 +``` + +#### Mobilograms + +Mobilograms represent the ion mobility dimension, showing intensity vs. drift time for a specific m/z: + +```python +from openms_python import Py_Mobilogram +import numpy as np + +# Create a mobilogram from arrays +drift_times = np.array([1.0, 1.5, 2.0, 2.5, 3.0]) +intensities = np.array([100.0, 150.0, 200.0, 180.0, 120.0]) + +mob = Py_Mobilogram.from_arrays(drift_times, intensities, mz=500.0) + +print(f"m/z: {mob.mz}") +print(f"Points: {len(mob)}") +print(f"Base peak drift time: {mob.base_peak_drift_time}") + +# Convert to DataFrame +df = mob.to_dataframe() +print(df.head()) +# drift_time intensity mz +# 0 1.0 100.0 500.0 +# 1 1.5 150.0 500.0 +# 2 2.0 200.0 500.0 + +# Create from DataFrame +df = pd.DataFrame({ + 'drift_time': [1.0, 2.0, 3.0], + 'intensity': [50.0, 100.0, 75.0] +}) +mob = Py_Mobilogram.from_dataframe(df, mz=600.0) +``` + ## Workflow helpers `openms_python` now exposes opinionated utilities that combine the primitive @@ -696,16 +767,34 @@ plt.show() - `base_peak_mz`: m/z of most intense peak - `base_peak_intensity`: Intensity of base peak - `peaks`: Tuple of (mz_array, intensity_array) +- `float_data_arrays`: List of FloatDataArray objects +- `ion_mobility`: Ion mobility values as NumPy array +- `drift_time`: Spectrum-level drift time value **Methods:** - `from_dataframe(df, **metadata)`: Create from DataFrame (class method) -- `to_dataframe()`: Convert to DataFrame +- `to_dataframe(include_float_arrays=True)`: Convert to DataFrame - `filter_by_mz(min_mz, max_mz)`: Filter peaks by m/z - `filter_by_intensity(min_intensity)`: Filter peaks by intensity - `top_n_peaks(n)`: Keep top N peaks - `normalize_intensity(max_value)`: Normalize intensities -- `normalize_intensity(max_value)`: Normalize intensities +### Py_Mobilogram + +**Properties:** +- `name`: Name of the mobilogram +- `mz`: m/z value this mobilogram represents +- `drift_time`: Drift time values as NumPy array +- `intensity`: Intensity values as NumPy array +- `peaks`: Tuple of (drift_time_array, intensity_array) +- `total_ion_current`: Sum of intensities +- `base_peak_drift_time`: Drift time of most intense point +- `base_peak_intensity`: Intensity of base peak + +**Methods:** +- `from_arrays(drift_time, intensity, mz=None, name=None)`: Create from arrays (class method) +- `from_dataframe(df, **metadata)`: Create from DataFrame (class method) +- `to_dataframe()`: Convert to DataFrame ### Identifications, ProteinIdentifications & PeptideIdentifications From da884719d28ec0df40cf99340ec5fbf7a358366b Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Wed, 19 Nov 2025 12:09:02 +0000 Subject: [PATCH 4/4] Add note about native OpenMS Mobilogram class Added documentation note that OpenMS C++ has a native Mobilogram class that may not yet be wrapped in pyopenms, and this wrapper uses MSChromatogram as the underlying representation. Co-authored-by: timosachsenberg <5803621+timosachsenberg@users.noreply.github.com> --- README.md | 4 +++- openms_python/py_mobilogram.py | 8 ++++++++ 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 112f987..5716afc 100644 --- a/README.md +++ b/README.md @@ -338,7 +338,9 @@ print(df) #### Mobilograms -Mobilograms represent the ion mobility dimension, showing intensity vs. drift time for a specific m/z: +Mobilograms represent the ion mobility dimension, showing intensity vs. drift time for a specific m/z. + +**Note:** OpenMS C++ has a native `Mobilogram` class that may not yet be wrapped in pyopenms. This wrapper uses `MSChromatogram` as the underlying representation for mobilogram data. ```python from openms_python import Py_Mobilogram diff --git a/openms_python/py_mobilogram.py b/openms_python/py_mobilogram.py index e00a924..0197cef 100644 --- a/openms_python/py_mobilogram.py +++ b/openms_python/py_mobilogram.py @@ -3,6 +3,10 @@ A mobilogram is a chromatogram in the ion mobility dimension, representing intensity vs. drift time for a specific m/z value. + +Note: OpenMS C++ has a native Mobilogram class that may not yet be wrapped +in pyopenms. This wrapper uses MSChromatogram as the underlying representation +for mobilogram data. """ from typing import Tuple, Optional @@ -20,6 +24,10 @@ class Py_Mobilogram(MetaInfoMappingMixin): A mobilogram represents the ion mobility dimension for a specific m/z, showing intensity vs. drift time (or other ion mobility values). + Note: OpenMS C++ has a native Mobilogram class that may not yet be wrapped + in pyopenms. This wrapper uses MSChromatogram as the underlying representation + for mobilogram data. + Example: >>> mob = Py_Mobilogram(native_chromatogram) >>> print(f"m/z: {mob.mz:.4f}")