Skip to content

Commit 8806a8e

Browse files
committed
petab1->2: create experiment df
1 parent 1d3fda1 commit 8806a8e

File tree

7 files changed

+178
-76
lines changed

7 files changed

+178
-76
lines changed

petab/v1/calculate.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -97,6 +97,9 @@ def calculate_residuals_for_table(
9797
Calculate residuals for a single measurement table.
9898
For the arguments, see `calculate_residuals`.
9999
"""
100+
# below, we rely on a unique index
101+
measurement_df = measurement_df.reset_index(drop=True)
102+
100103
# create residual df as copy of measurement df, change column
101104
residual_df = measurement_df.copy(deep=True).rename(
102105
columns={MEASUREMENT: RESIDUAL}
@@ -120,6 +123,10 @@ def calculate_residuals_for_table(
120123
for col in compared_cols
121124
]
122125
mask = reduce(lambda x, y: x & y, masks)
126+
if mask.sum() == 0:
127+
raise ValueError(
128+
f"Could not find simulation for measurement {row}."
129+
)
123130
simulation = simulation_df.loc[mask][SIMULATION].iloc[0]
124131
if scale:
125132
# apply scaling

petab/v2/C.py

Lines changed: 1 addition & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -13,14 +13,6 @@
1313
#: Experiment ID column in the measurement table
1414
EXPERIMENT_ID = "experimentId"
1515

16-
# TODO: remove
17-
#: Preequilibration condition ID column in the measurement table
18-
PREEQUILIBRATION_CONDITION_ID = "preequilibrationConditionId"
19-
20-
# TODO: remove
21-
#: Simulation condition ID column in the measurement table
22-
SIMULATION_CONDITION_ID = "simulationConditionId"
23-
2416
#: Measurement value column in the measurement table
2517
MEASUREMENT = "measurement"
2618

@@ -45,17 +37,13 @@
4537
#: Mandatory columns of measurement table
4638
MEASUREMENT_DF_REQUIRED_COLS = [
4739
OBSERVABLE_ID,
48-
# TODO: add
49-
# EXPERIMENT_ID,
50-
SIMULATION_CONDITION_ID,
40+
EXPERIMENT_ID,
5141
MEASUREMENT,
5242
TIME,
5343
]
5444

5545
#: Optional columns of measurement table
5646
MEASUREMENT_DF_OPTIONAL_COLS = [
57-
# TODO: remove
58-
PREEQUILIBRATION_CONDITION_ID,
5947
OBSERVABLE_PARAMETERS,
6048
NOISE_PARAMETERS,
6149
DATASET_ID,

petab/v2/__init__.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,10 @@
2727

2828
# import after v1
2929
from ..version import __version__ # noqa: F401, E402
30-
from . import models # noqa: F401, E402
30+
from . import ( # noqa: F401, E402
31+
C, # noqa: F401, E402
32+
models, # noqa: F401, E402
33+
)
3134
from .conditions import * # noqa: F403, F401, E402
3235
from .experiments import ( # noqa: F401, E402
3336
get_experiment_df,

petab/v2/lint.py

Lines changed: 67 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,9 @@
1515
from .. import v2
1616
from ..v1.lint import (
1717
_check_df,
18+
assert_measured_observables_defined,
19+
assert_measurements_not_null,
20+
assert_measurements_numeric,
1821
assert_model_parameters_in_condition_or_parameter_table,
1922
assert_no_leading_trailing_whitespace,
2023
assert_parameter_bounds_are_numeric,
@@ -23,13 +26,16 @@
2326
assert_parameter_prior_parameters_are_valid,
2427
assert_parameter_prior_type_is_valid,
2528
assert_parameter_scale_is_valid,
29+
assert_unique_observable_ids,
2630
assert_unique_parameter_ids,
2731
check_ids,
28-
check_measurement_df,
2932
check_observable_df,
3033
check_parameter_bounds,
3134
)
32-
from ..v1.measurements import split_parameter_replacement_list
35+
from ..v1.measurements import (
36+
assert_overrides_match_parameter_count,
37+
split_parameter_replacement_list,
38+
)
3339
from ..v1.observables import get_output_parameters, get_placeholders
3440
from ..v1.visualize.lint import validate_visualization_df
3541
from ..v2.C import *
@@ -237,8 +243,51 @@ def run(self, problem: Problem) -> ValidationIssue | None:
237243
if problem.measurement_df is None:
238244
return
239245

246+
df = problem.measurement_df
240247
try:
241-
check_measurement_df(problem.measurement_df, problem.observable_df)
248+
_check_df(df, MEASUREMENT_DF_REQUIRED_COLS, "measurement")
249+
250+
for column_name in MEASUREMENT_DF_REQUIRED_COLS:
251+
if not np.issubdtype(df[column_name].dtype, np.number):
252+
assert_no_leading_trailing_whitespace(
253+
df[column_name].values, column_name
254+
)
255+
256+
for column_name in MEASUREMENT_DF_OPTIONAL_COLS:
257+
if column_name in df and not np.issubdtype(
258+
df[column_name].dtype, np.number
259+
):
260+
assert_no_leading_trailing_whitespace(
261+
df[column_name].values, column_name
262+
)
263+
264+
if problem.observable_df is not None:
265+
assert_measured_observables_defined(df, problem.observable_df)
266+
assert_overrides_match_parameter_count(
267+
df, problem.observable_df
268+
)
269+
270+
if OBSERVABLE_TRANSFORMATION in problem.observable_df:
271+
# Check for positivity of measurements in case of
272+
# log-transformation
273+
assert_unique_observable_ids(problem.observable_df)
274+
# If the above is not checked, in the following loop
275+
# trafo may become a pandas Series
276+
for measurement, obs_id in zip(
277+
df[MEASUREMENT], df[OBSERVABLE_ID], strict=True
278+
):
279+
trafo = problem.observable_df.loc[
280+
obs_id, OBSERVABLE_TRANSFORMATION
281+
]
282+
if measurement <= 0.0 and trafo in [LOG, LOG10]:
283+
raise ValueError(
284+
"Measurements with observable "
285+
f"transformation {trafo} must be "
286+
f"positive, but {measurement} <= 0."
287+
)
288+
289+
assert_measurements_not_null(df)
290+
assert_measurements_numeric(df)
242291
except AssertionError as e:
243292
return ValidationError(str(e))
244293

@@ -247,46 +296,20 @@ def run(self, problem: Problem) -> ValidationIssue | None:
247296
# condition table should be an error if the measurement table refers
248297
# to conditions
249298

250-
# check that measured experiments/conditions exist
251-
# TODO: fully switch to experiment table and remove this:
252-
if SIMULATION_CONDITION_ID in problem.measurement_df:
253-
if problem.condition_df is None:
254-
return
255-
used_conditions = set(
256-
problem.measurement_df[SIMULATION_CONDITION_ID].dropna().values
257-
)
258-
if PREEQUILIBRATION_CONDITION_ID in problem.measurement_df:
259-
used_conditions |= set(
260-
problem.measurement_df[PREEQUILIBRATION_CONDITION_ID]
261-
.dropna()
262-
.values
263-
)
264-
available_conditions = set(
265-
problem.condition_df[CONDITION_ID].unique()
266-
)
267-
if missing_conditions := (used_conditions - available_conditions):
268-
return ValidationError(
269-
"Measurement table references conditions that "
270-
"are not specified in the condition table: "
271-
+ str(missing_conditions)
272-
)
273-
elif EXPERIMENT_ID in problem.measurement_df:
274-
if problem.experiment_df is None:
275-
return
276-
used_experiments = set(
277-
problem.measurement_df[EXPERIMENT_ID].values
278-
)
279-
available_experiments = set(
280-
problem.condition_df[CONDITION_ID].unique()
299+
# check that measured experiments
300+
if problem.experiment_df is None:
301+
return
302+
303+
used_experiments = set(problem.measurement_df[EXPERIMENT_ID].values)
304+
available_experiments = set(
305+
problem.condition_df[CONDITION_ID].unique()
306+
)
307+
if missing_experiments := (used_experiments - available_experiments):
308+
raise AssertionError(
309+
"Measurement table references experiments that "
310+
"are not specified in the experiments table: "
311+
+ str(missing_experiments)
281312
)
282-
if missing_experiments := (
283-
used_experiments - available_experiments
284-
):
285-
raise AssertionError(
286-
"Measurement table references experiments that "
287-
"are not specified in the experiments table: "
288-
+ str(missing_experiments)
289-
)
290313

291314

292315
class CheckConditionTable(ValidationTask):
@@ -771,7 +794,8 @@ def append_overrides(overrides):
771794
)
772795

773796
# parameters that are overridden via the condition table are not allowed
774-
parameter_ids -= set(problem.condition_df[TARGET_ID].unique())
797+
if problem.condition_df is not None:
798+
parameter_ids -= set(problem.condition_df[TARGET_ID].unique())
775799

776800
return parameter_ids
777801

petab/v2/petab1to2.py

Lines changed: 93 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -4,8 +4,8 @@
44
from itertools import chain
55
from pathlib import Path
66
from urllib.parse import urlparse
7+
from uuid import uuid4
78

8-
import numpy as np
99
import pandas as pd
1010
from pandas.io.common import get_handle, is_url
1111

@@ -98,10 +98,81 @@ def petab1to2(yaml_config: Path | str, output_dir: Path | str = None):
9898
condition_df = v1v2_condition_df(condition_df, petab_problem.model)
9999
v2.write_condition_df(condition_df, get_dest_path(condition_file))
100100

101+
# records for the experiment table to be created
102+
experiments = []
103+
104+
def create_experiment_id(sim_cond_id: str, preeq_cond_id: str) -> str:
105+
if not sim_cond_id and not preeq_cond_id:
106+
return ""
107+
if preeq_cond_id:
108+
preeq_cond_id = f"{preeq_cond_id}_"
109+
exp_id = f"experiment_{preeq_cond_id}{sim_cond_id}"
110+
if exp_id in experiments: # noqa: B023
111+
i = 1
112+
while f"{exp_id}_{i}" in experiments: # noqa: B023
113+
i += 1
114+
exp_id = f"{exp_id}_{i}"
115+
return exp_id
116+
117+
measured_experiments = (
118+
petab_problem.get_simulation_conditions_from_measurement_df()
119+
)
120+
for (
121+
_,
122+
row,
123+
) in measured_experiments.iterrows():
124+
# generate a new experiment for each simulation / pre-eq condition
125+
# combination
126+
sim_cond_id = row[v1.C.SIMULATION_CONDITION_ID]
127+
preeq_cond_id = row.get(v1.C.PREEQUILIBRATION_CONDITION_ID, "")
128+
exp_id = create_experiment_id(sim_cond_id, preeq_cond_id)
129+
if preeq_cond_id:
130+
experiments.append(
131+
{
132+
v2.C.EXPERIMENT_ID: exp_id,
133+
v2.C.CONDITION_ID: preeq_cond_id,
134+
v2.C.TIME: float("-inf"),
135+
}
136+
)
137+
experiments.append(
138+
{
139+
v2.C.EXPERIMENT_ID: exp_id,
140+
v2.C.CONDITION_ID: sim_cond_id,
141+
v2.C.TIME: 0,
142+
}
143+
)
144+
if experiments:
145+
exp_table_path = output_dir / "experiments.tsv"
146+
if exp_table_path.exists():
147+
raise ValueError(
148+
f"Experiment table file {exp_table_path} already exists."
149+
)
150+
problem_config[v2.C.EXPERIMENT_FILES] = [exp_table_path.name]
151+
v2.write_experiment_df(
152+
v2.get_experiment_df(pd.DataFrame(experiments)), exp_table_path
153+
)
154+
101155
for measurement_file in problem_config.get(v2.C.MEASUREMENT_FILES, []):
102156
measurement_df = v1.get_measurement_df(
103157
get_src_path(measurement_file)
104158
)
159+
# if there is already an experiment ID column, we rename it
160+
if v2.C.EXPERIMENT_ID in measurement_df.columns:
161+
measurement_df.rename(
162+
columns={v2.C.EXPERIMENT_ID: f"experiment_id_{uuid4()}"},
163+
inplace=True,
164+
)
165+
# add pre-eq condition id if not present or convert to string
166+
# for simplicity
167+
if v1.C.PREEQUILIBRATION_CONDITION_ID in measurement_df.columns:
168+
measurement_df[
169+
v1.C.PREEQUILIBRATION_CONDITION_ID
170+
] = measurement_df[v1.C.PREEQUILIBRATION_CONDITION_ID].astype(
171+
str
172+
)
173+
else:
174+
measurement_df[v1.C.PREEQUILIBRATION_CONDITION_ID] = ""
175+
105176
if (
106177
petab_problem.condition_df is not None
107178
and len(
@@ -110,20 +181,33 @@ def petab1to2(yaml_config: Path | str, output_dir: Path | str = None):
110181
)
111182
== 0
112183
):
113-
# can't have "empty" conditions with no overrides in v2
114-
# TODO: this needs to be done condition wise
115-
measurement_df[v2.C.SIMULATION_CONDITION_ID] = np.nan
184+
# we can't have "empty" conditions with no overrides in v2,
185+
# therefore, we drop the respective condition ID completely
186+
# TODO: or can we?
187+
# TODO: this needs to be checked condition-wise, not globally
188+
measurement_df[v1.C.SIMULATION_CONDITION_ID] = ""
116189
if (
117190
v1.C.PREEQUILIBRATION_CONDITION_ID
118191
in measurement_df.columns
119192
):
120-
measurement_df[v2.C.PREEQUILIBRATION_CONDITION_ID] = np.nan
193+
measurement_df[v1.C.PREEQUILIBRATION_CONDITION_ID] = ""
194+
# condition IDs to experiment IDs
195+
measurement_df.insert(
196+
0,
197+
v2.C.EXPERIMENT_ID,
198+
measurement_df.apply(
199+
lambda row: create_experiment_id(
200+
row[v1.C.SIMULATION_CONDITION_ID],
201+
row.get(v1.C.PREEQUILIBRATION_CONDITION_ID, ""),
202+
),
203+
axis=1,
204+
),
205+
)
206+
del measurement_df[v1.C.SIMULATION_CONDITION_ID]
207+
del measurement_df[v1.C.PREEQUILIBRATION_CONDITION_ID]
121208
v2.write_measurement_df(
122209
measurement_df, get_dest_path(measurement_file)
123210
)
124-
# TODO: Measurements: preequilibration to experiments/timecourses once
125-
# finalized
126-
...
127211

128212
# validate updated Problem
129213
validation_issues = v2.lint_problem(new_yaml_file)
@@ -189,7 +273,7 @@ def v1v2_condition_df(
189273
"""Convert condition table from petab v1 to v2."""
190274
condition_df = condition_df.copy().reset_index()
191275
with suppress(KeyError):
192-
# TODO: are condition names still supported in v2?
276+
# conditionName was dropped in PEtab v2
193277
condition_df.drop(columns=[v2.C.CONDITION_NAME], inplace=True)
194278

195279
condition_df = condition_df.melt(

0 commit comments

Comments
 (0)