Skip to content

Commit a616088

Browse files
committed
Implement flatten_timepoint_specific_output_overrides for PEtab v2
Port `flatten_timepoint_specific_output_overrides`, `has_timepoint_specific_overrides`, `unflatten_simulation_df` and their respective tests to PEtab v2.
1 parent 880741a commit a616088

File tree

3 files changed

+573
-6
lines changed

3 files changed

+573
-6
lines changed

petab/v1/core.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -133,15 +133,18 @@ def get_notnull_columns(df: pd.DataFrame, candidates: Iterable):
133133
]
134134

135135

136-
def get_observable_replacement_id(groupvars, groupvar) -> str:
136+
def get_observable_replacement_id(
137+
groupvars: list[str], groupvar: Sequence
138+
) -> str:
137139
"""Get the replacement ID for an observable.
138140
139141
Arguments:
140142
groupvars:
141143
The columns of a PEtab measurement table that should be unique
142144
between observables in a flattened PEtab problem.
143145
groupvar:
144-
A specific grouping of `groupvars`.
146+
A specific grouping of `groupvars`. Same length and order as
147+
`groupvars`.
145148
146149
Returns:
147150
The observable replacement ID.

petab/v2/core.py

Lines changed: 256 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -69,8 +69,20 @@
6969
"Parameter",
7070
"ParameterScale",
7171
"ParameterTable",
72+
"flatten_timepoint_specific_output_overrides",
73+
"unflatten_simulation_df",
7274
]
7375

76+
_POSSIBLE_GROUPVARS_FLATTENED_PROBLEM = [
77+
C.MODEL_ID,
78+
C.EXPERIMENT_ID,
79+
C.OBSERVABLE_ID,
80+
C.OBSERVABLE_PARAMETERS,
81+
C.NOISE_PARAMETERS,
82+
]
83+
84+
logger = logging.getLogger(__name__)
85+
7486

7587
def _is_finite_or_neg_inf(v: float, info: ValidationInfo) -> float:
7688
if not np.isfinite(v) and v != -np.inf:
@@ -1143,7 +1155,11 @@ def __str__(self):
11431155
f"{observables}, {measurements}, {parameters}"
11441156
)
11451157

1146-
def __getitem__(self, key):
1158+
def __getitem__(
1159+
self, key
1160+
) -> (
1161+
Condition | Experiment | Observable | Measurement | Parameter | Mapping
1162+
):
11471163
"""Get PEtab entity by ID.
11481164
11491165
This allows accessing PEtab entities such as conditions, experiments,
@@ -2320,7 +2336,9 @@ def get_output_parameters(
23202336
# filter out symbols that are defined in the model or mapped to
23212337
# such symbols
23222338
for candidate in sorted(candidates):
2323-
if self.model.symbol_allowed_in_observable_formula(candidate):
2339+
if self.model and self.model.symbol_allowed_in_observable_formula(
2340+
candidate
2341+
):
23242342
continue
23252343

23262344
# does it map to a model entity?
@@ -2329,8 +2347,11 @@ def get_output_parameters(
23292347
mapping.petab_id == candidate
23302348
and mapping.model_id is not None
23312349
):
2332-
if self.model.symbol_allowed_in_observable_formula(
2333-
mapping.model_id
2350+
if (
2351+
self.model
2352+
and self.model.symbol_allowed_in_observable_formula(
2353+
mapping.model_id
2354+
)
23342355
):
23352356
break
23362357
else:
@@ -2339,6 +2360,71 @@ def get_output_parameters(
23392360

23402361
return output_parameters
23412362

2363+
def has_timepoint_specific_overrides(
2364+
self,
2365+
ignore_scalar_numeric_noise_parameters: bool = False,
2366+
ignore_scalar_numeric_observable_parameters: bool = False,
2367+
) -> bool:
2368+
"""Check if the measurements have timepoint-specific observable or
2369+
noise parameter overrides.
2370+
2371+
:param ignore_scalar_numeric_noise_parameters:
2372+
ignore scalar numeric assignments to noiseParameter placeholders
2373+
2374+
:param ignore_scalar_numeric_observable_parameters:
2375+
ignore scalar numeric assignments to observableParameter
2376+
placeholders
2377+
2378+
:return: True if the problem has timepoint-specific overrides, False
2379+
otherwise.
2380+
"""
2381+
if not self.measurements:
2382+
return False
2383+
2384+
from ..v1.core import get_notnull_columns
2385+
from ..v1.lint import is_scalar_float
2386+
2387+
measurement_df = self.measurement_df
2388+
2389+
# mask numeric values
2390+
for col, allow_scalar_numeric in [
2391+
(
2392+
C.OBSERVABLE_PARAMETERS,
2393+
ignore_scalar_numeric_observable_parameters,
2394+
),
2395+
(C.NOISE_PARAMETERS, ignore_scalar_numeric_noise_parameters),
2396+
]:
2397+
if col not in measurement_df:
2398+
continue
2399+
2400+
measurement_df[col] = measurement_df[col].apply(str)
2401+
2402+
if allow_scalar_numeric:
2403+
measurement_df.loc[
2404+
measurement_df[col].apply(is_scalar_float), col
2405+
] = ""
2406+
2407+
grouping_cols = get_notnull_columns(
2408+
measurement_df,
2409+
_POSSIBLE_GROUPVARS_FLATTENED_PROBLEM,
2410+
)
2411+
grouped_df = measurement_df.groupby(grouping_cols, dropna=False)
2412+
2413+
grouping_cols = get_notnull_columns(
2414+
measurement_df,
2415+
[
2416+
C.MODEL_ID,
2417+
C.OBSERVABLE_ID,
2418+
C.EXPERIMENT_ID,
2419+
],
2420+
)
2421+
grouped_df2 = measurement_df.groupby(grouping_cols)
2422+
2423+
# data frame has timepoint specific overrides if grouping by noise
2424+
# parameters and observable parameters in addition to observable and
2425+
# experiment id yields more groups
2426+
return len(grouped_df) != len(grouped_df2)
2427+
23422428

23432429
class ModelFile(BaseModel):
23442430
"""A file in the PEtab problem configuration."""
@@ -2457,3 +2543,169 @@ def format_version_tuple(self) -> tuple[int, int, int, str]:
24572543
"""The format version as a tuple of major/minor/patch `int`s and a
24582544
suffix."""
24592545
return parse_version(self.format_version)
2546+
2547+
2548+
def _get_flattened_id_mappings(
2549+
petab_problem: Problem,
2550+
) -> dict[str, str]:
2551+
"""Get mapping from flattened to unflattenedobservable IDs.
2552+
2553+
:param petab_problem:
2554+
The unflattened PEtab problem.
2555+
2556+
:returns:
2557+
A mapping from original observable ID to flattened ID.
2558+
"""
2559+
from ..v1.core import (
2560+
get_notnull_columns,
2561+
get_observable_replacement_id,
2562+
)
2563+
2564+
groupvars = get_notnull_columns(
2565+
petab_problem.measurement_df, _POSSIBLE_GROUPVARS_FLATTENED_PROBLEM
2566+
)
2567+
mappings: dict[str, str] = {}
2568+
2569+
old_observable_ids = {obs.id for obs in petab_problem.observables}
2570+
for groupvar, _ in petab_problem.measurement_df.groupby(
2571+
groupvars, dropna=False
2572+
):
2573+
observable_id = groupvar[groupvars.index(C.OBSERVABLE_ID)]
2574+
observable_replacement_id = get_observable_replacement_id(
2575+
groupvars, groupvar
2576+
)
2577+
2578+
logger.debug(f"Creating synthetic observable {observable_id}")
2579+
if (
2580+
observable_id != observable_replacement_id
2581+
and observable_replacement_id in old_observable_ids
2582+
):
2583+
raise RuntimeError(
2584+
"could not create synthetic observables "
2585+
f"since {observable_replacement_id} was "
2586+
"already present in observable table"
2587+
)
2588+
2589+
mappings[observable_replacement_id] = observable_id
2590+
2591+
return mappings
2592+
2593+
2594+
def flatten_timepoint_specific_output_overrides(
2595+
petab_problem: Problem,
2596+
) -> None:
2597+
"""Flatten timepoint-specific output parameter overrides.
2598+
2599+
If the PEtab problem definition has timepoint-specific
2600+
`observableParameters` or `noiseParameters` for the same observable,
2601+
replace those by replicating the respective observable.
2602+
2603+
This is a helper function for some tools which may not support such
2604+
timepoint-specific mappings. The observable table and measurement table
2605+
are modified in place.
2606+
2607+
:param petab_problem:
2608+
PEtab problem to work on. Modified in place.
2609+
"""
2610+
from ..v1.core import (
2611+
get_notnull_columns,
2612+
get_observable_replacement_id,
2613+
)
2614+
2615+
# Update observables
2616+
def create_new_observable(old_id, new_id) -> Observable:
2617+
if old_id not in petab_problem.observable_df.index:
2618+
raise ValueError(
2619+
f"Observable {old_id} not found in observable table."
2620+
)
2621+
2622+
# copy original observable and update ID
2623+
observable: Observable = copy.deepcopy(petab_problem[old_id])
2624+
observable.id = new_id
2625+
2626+
# update placeholders
2627+
old_obs_placeholders = observable.observable_placeholders or []
2628+
old_noise_placeholders = observable.noise_placeholders or []
2629+
suffix = new_id.removeprefix(old_id)
2630+
observable.observable_placeholders = [
2631+
f"{sym.name}{suffix}" for sym in observable.observable_placeholders
2632+
]
2633+
observable.noise_placeholders = [
2634+
f"{sym.name}{suffix}" for sym in observable.noise_placeholders
2635+
]
2636+
2637+
# placeholders in formulas
2638+
subs = dict(
2639+
zip(
2640+
old_obs_placeholders,
2641+
observable.observable_placeholders,
2642+
strict=False,
2643+
)
2644+
)
2645+
observable.formula = observable.formula.subs(subs)
2646+
subs |= dict(
2647+
zip(
2648+
old_noise_placeholders,
2649+
observable.noise_placeholders,
2650+
strict=False,
2651+
)
2652+
)
2653+
observable.noise_formula = observable.noise_formula.subs(subs)
2654+
2655+
return observable
2656+
2657+
mappings = _get_flattened_id_mappings(petab_problem)
2658+
2659+
petab_problem.observable_tables = [
2660+
ObservableTable(
2661+
[
2662+
create_new_observable(old_id, new_id)
2663+
for new_id, old_id in mappings.items()
2664+
]
2665+
)
2666+
]
2667+
2668+
# Update measurements
2669+
groupvars = get_notnull_columns(
2670+
petab_problem.measurement_df, _POSSIBLE_GROUPVARS_FLATTENED_PROBLEM
2671+
)
2672+
for measurement_table in petab_problem.measurement_tables:
2673+
for measurement in measurement_table.measurements:
2674+
# TODO: inefficient, but ok for a start
2675+
group_vals = (
2676+
MeasurementTable([measurement])
2677+
.to_df()
2678+
.iloc[0][groupvars]
2679+
.tolist()
2680+
)
2681+
new_obs_id = get_observable_replacement_id(groupvars, group_vals)
2682+
measurement.observable_id = new_obs_id
2683+
2684+
2685+
def unflatten_simulation_df(
2686+
simulation_df: pd.DataFrame,
2687+
petab_problem: petab.problem.Problem,
2688+
) -> pd.DataFrame:
2689+
"""Unflatten simulations from a flattened PEtab problem.
2690+
2691+
A flattened PEtab problem is the output of applying
2692+
:func:`flatten_timepoint_specific_output_overrides` to a PEtab problem.
2693+
2694+
:param simulation_df:
2695+
The simulation dataframe. A dataframe in the same format as a PEtab
2696+
measurements table, but with the ``measurement`` column switched
2697+
with a ``simulation`` column.
2698+
:param petab_problem:
2699+
The unflattened PEtab problem.
2700+
2701+
:returns:
2702+
The simulation dataframe for the unflattened PEtab problem.
2703+
"""
2704+
mappings = _get_flattened_id_mappings(petab_problem)
2705+
original_observable_ids = simulation_df[C.OBSERVABLE_ID].replace(mappings)
2706+
unflattened_simulation_df = simulation_df.assign(
2707+
**{
2708+
C.OBSERVABLE_ID: original_observable_ids,
2709+
}
2710+
)
2711+
return unflattened_simulation_df

0 commit comments

Comments
 (0)