Skip to content

Commit 5df89d1

Browse files
committed
petab1->2: create experiment df
1 parent 1d3fda1 commit 5df89d1

File tree

6 files changed

+167
-73
lines changed

6 files changed

+167
-73
lines changed

petab/v2/C.py

Lines changed: 1 addition & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -13,14 +13,6 @@
1313
#: Experiment ID column in the measurement table
1414
EXPERIMENT_ID = "experimentId"
1515

16-
# TODO: remove
17-
#: Preequilibration condition ID column in the measurement table
18-
PREEQUILIBRATION_CONDITION_ID = "preequilibrationConditionId"
19-
20-
# TODO: remove
21-
#: Simulation condition ID column in the measurement table
22-
SIMULATION_CONDITION_ID = "simulationConditionId"
23-
2416
#: Measurement value column in the measurement table
2517
MEASUREMENT = "measurement"
2618

@@ -45,17 +37,13 @@
4537
#: Mandatory columns of measurement table
4638
MEASUREMENT_DF_REQUIRED_COLS = [
4739
OBSERVABLE_ID,
48-
# TODO: add
49-
# EXPERIMENT_ID,
50-
SIMULATION_CONDITION_ID,
40+
EXPERIMENT_ID,
5141
MEASUREMENT,
5242
TIME,
5343
]
5444

5545
#: Optional columns of measurement table
5646
MEASUREMENT_DF_OPTIONAL_COLS = [
57-
# TODO: remove
58-
PREEQUILIBRATION_CONDITION_ID,
5947
OBSERVABLE_PARAMETERS,
6048
NOISE_PARAMETERS,
6149
DATASET_ID,

petab/v2/__init__.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,10 @@
2727

2828
# import after v1
2929
from ..version import __version__ # noqa: F401, E402
30-
from . import models # noqa: F401, E402
30+
from . import ( # noqa: F401, E402
31+
C, # noqa: F401, E402
32+
models, # noqa: F401, E402
33+
)
3134
from .conditions import * # noqa: F403, F401, E402
3235
from .experiments import ( # noqa: F401, E402
3336
get_experiment_df,

petab/v2/lint.py

Lines changed: 65 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,9 @@
1515
from .. import v2
1616
from ..v1.lint import (
1717
_check_df,
18+
assert_measured_observables_defined,
19+
assert_measurements_not_null,
20+
assert_measurements_numeric,
1821
assert_model_parameters_in_condition_or_parameter_table,
1922
assert_no_leading_trailing_whitespace,
2023
assert_parameter_bounds_are_numeric,
@@ -23,13 +26,16 @@
2326
assert_parameter_prior_parameters_are_valid,
2427
assert_parameter_prior_type_is_valid,
2528
assert_parameter_scale_is_valid,
29+
assert_unique_observable_ids,
2630
assert_unique_parameter_ids,
2731
check_ids,
28-
check_measurement_df,
2932
check_observable_df,
3033
check_parameter_bounds,
3134
)
32-
from ..v1.measurements import split_parameter_replacement_list
35+
from ..v1.measurements import (
36+
assert_overrides_match_parameter_count,
37+
split_parameter_replacement_list,
38+
)
3339
from ..v1.observables import get_output_parameters, get_placeholders
3440
from ..v1.visualize.lint import validate_visualization_df
3541
from ..v2.C import *
@@ -237,8 +243,51 @@ def run(self, problem: Problem) -> ValidationIssue | None:
237243
if problem.measurement_df is None:
238244
return
239245

246+
df = problem.measurement_df
240247
try:
241-
check_measurement_df(problem.measurement_df, problem.observable_df)
248+
_check_df(df, MEASUREMENT_DF_REQUIRED_COLS, "measurement")
249+
250+
for column_name in MEASUREMENT_DF_REQUIRED_COLS:
251+
if not np.issubdtype(df[column_name].dtype, np.number):
252+
assert_no_leading_trailing_whitespace(
253+
df[column_name].values, column_name
254+
)
255+
256+
for column_name in MEASUREMENT_DF_OPTIONAL_COLS:
257+
if column_name in df and not np.issubdtype(
258+
df[column_name].dtype, np.number
259+
):
260+
assert_no_leading_trailing_whitespace(
261+
df[column_name].values, column_name
262+
)
263+
264+
if problem.observable_df is not None:
265+
assert_measured_observables_defined(df, problem.observable_df)
266+
assert_overrides_match_parameter_count(
267+
df, problem.observable_df
268+
)
269+
270+
if OBSERVABLE_TRANSFORMATION in problem.observable_df:
271+
# Check for positivity of measurements in case of
272+
# log-transformation
273+
assert_unique_observable_ids(problem.observable_df)
274+
# If the above is not checked, in the following loop
275+
# trafo may become a pandas Series
276+
for measurement, obs_id in zip(
277+
df[MEASUREMENT], df[OBSERVABLE_ID], strict=True
278+
):
279+
trafo = problem.observable_df.loc[
280+
obs_id, OBSERVABLE_TRANSFORMATION
281+
]
282+
if measurement <= 0.0 and trafo in [LOG, LOG10]:
283+
raise ValueError(
284+
"Measurements with observable "
285+
f"transformation {trafo} must be "
286+
f"positive, but {measurement} <= 0."
287+
)
288+
289+
assert_measurements_not_null(df)
290+
assert_measurements_numeric(df)
242291
except AssertionError as e:
243292
return ValidationError(str(e))
244293

@@ -247,46 +296,20 @@ def run(self, problem: Problem) -> ValidationIssue | None:
247296
# condition table should be an error if the measurement table refers
248297
# to conditions
249298

250-
# check that measured experiments/conditions exist
251-
# TODO: fully switch to experiment table and remove this:
252-
if SIMULATION_CONDITION_ID in problem.measurement_df:
253-
if problem.condition_df is None:
254-
return
255-
used_conditions = set(
256-
problem.measurement_df[SIMULATION_CONDITION_ID].dropna().values
257-
)
258-
if PREEQUILIBRATION_CONDITION_ID in problem.measurement_df:
259-
used_conditions |= set(
260-
problem.measurement_df[PREEQUILIBRATION_CONDITION_ID]
261-
.dropna()
262-
.values
263-
)
264-
available_conditions = set(
265-
problem.condition_df[CONDITION_ID].unique()
266-
)
267-
if missing_conditions := (used_conditions - available_conditions):
268-
return ValidationError(
269-
"Measurement table references conditions that "
270-
"are not specified in the condition table: "
271-
+ str(missing_conditions)
272-
)
273-
elif EXPERIMENT_ID in problem.measurement_df:
274-
if problem.experiment_df is None:
275-
return
276-
used_experiments = set(
277-
problem.measurement_df[EXPERIMENT_ID].values
278-
)
279-
available_experiments = set(
280-
problem.condition_df[CONDITION_ID].unique()
299+
# check that measured experiments
300+
if problem.experiment_df is None:
301+
return
302+
303+
used_experiments = set(problem.measurement_df[EXPERIMENT_ID].values)
304+
available_experiments = set(
305+
problem.condition_df[CONDITION_ID].unique()
306+
)
307+
if missing_experiments := (used_experiments - available_experiments):
308+
raise AssertionError(
309+
"Measurement table references experiments that "
310+
"are not specified in the experiments table: "
311+
+ str(missing_experiments)
281312
)
282-
if missing_experiments := (
283-
used_experiments - available_experiments
284-
):
285-
raise AssertionError(
286-
"Measurement table references experiments that "
287-
"are not specified in the experiments table: "
288-
+ str(missing_experiments)
289-
)
290313

291314

292315
class CheckConditionTable(ValidationTask):

petab/v2/petab1to2.py

Lines changed: 92 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -4,8 +4,8 @@
44
from itertools import chain
55
from pathlib import Path
66
from urllib.parse import urlparse
7+
from uuid import uuid4
78

8-
import numpy as np
99
import pandas as pd
1010
from pandas.io.common import get_handle, is_url
1111

@@ -98,10 +98,81 @@ def petab1to2(yaml_config: Path | str, output_dir: Path | str = None):
9898
condition_df = v1v2_condition_df(condition_df, petab_problem.model)
9999
v2.write_condition_df(condition_df, get_dest_path(condition_file))
100100

101+
# records for the experiment table to be created
102+
experiments = []
103+
104+
def create_experiment_id(sim_cond_id: str, preeq_cond_id: str) -> str:
105+
# TODO: can a condition ID be used as an experiment ID if there
106+
# would be only a single condition in the experiment?
107+
if not sim_cond_id and not preeq_cond_id:
108+
return ""
109+
if preeq_cond_id:
110+
preeq_cond_id = f"{preeq_cond_id}_"
111+
exp_id = f"experiment_{preeq_cond_id}{sim_cond_id}"
112+
if exp_id in experiments: # noqa: B023
113+
i = 1
114+
while f"{exp_id}_{i}" in experiments: # noqa: B023
115+
i += 1
116+
exp_id = f"{exp_id}_{i}"
117+
return exp_id
118+
119+
measured_experiments = (
120+
petab_problem.get_simulation_conditions_from_measurement_df()
121+
)
122+
for (
123+
_,
124+
row,
125+
) in measured_experiments.iterrows():
126+
sim_cond_id = row[v1.C.SIMULATION_CONDITION_ID]
127+
preeq_cond_id = row.get(v1.C.PREEQUILIBRATION_CONDITION_ID, "")
128+
exp_id = create_experiment_id(sim_cond_id, preeq_cond_id)
129+
if preeq_cond_id:
130+
experiments.append(
131+
{
132+
v2.C.EXPERIMENT_ID: exp_id,
133+
v2.C.CONDITION_ID: preeq_cond_id,
134+
v2.C.TIME: float("-inf"),
135+
}
136+
)
137+
experiments.append(
138+
{
139+
v2.C.EXPERIMENT_ID: exp_id,
140+
v2.C.CONDITION_ID: sim_cond_id,
141+
v2.C.TIME: 0,
142+
}
143+
)
144+
if experiments:
145+
exp_table_path = output_dir / "experiments.tsv"
146+
if exp_table_path.exists():
147+
raise ValueError(
148+
f"Experiment table file {exp_table_path} already exists."
149+
)
150+
problem_config[v2.C.EXPERIMENT_FILES] = [exp_table_path.name]
151+
v2.write_experiment_df(
152+
v2.get_experiment_df(pd.DataFrame(experiments)), exp_table_path
153+
)
154+
101155
for measurement_file in problem_config.get(v2.C.MEASUREMENT_FILES, []):
102156
measurement_df = v1.get_measurement_df(
103157
get_src_path(measurement_file)
104158
)
159+
# if there is already an experiment ID column, we rename it
160+
if v2.C.EXPERIMENT_ID in measurement_df.columns:
161+
measurement_df.rename(
162+
columns={v2.C.EXPERIMENT_ID: f"experiment_id_{uuid4()}"},
163+
inplace=True,
164+
)
165+
# add pre-eq condition id if not present or convert to string
166+
# for simplicity
167+
if v1.C.PREEQUILIBRATION_CONDITION_ID in measurement_df.columns:
168+
measurement_df[
169+
v1.C.PREEQUILIBRATION_CONDITION_ID
170+
] = measurement_df[v1.C.PREEQUILIBRATION_CONDITION_ID].astype(
171+
str
172+
)
173+
else:
174+
measurement_df[v1.C.PREEQUILIBRATION_CONDITION_ID] = ""
175+
105176
if (
106177
petab_problem.condition_df is not None
107178
and len(
@@ -110,20 +181,33 @@ def petab1to2(yaml_config: Path | str, output_dir: Path | str = None):
110181
)
111182
== 0
112183
):
113-
# can't have "empty" conditions with no overrides in v2
114-
# TODO: this needs to be done condition wise
115-
measurement_df[v2.C.SIMULATION_CONDITION_ID] = np.nan
184+
# we can't have "empty" conditions with no overrides in v2,
185+
# therefore, we drop the respective condition ID completely
186+
# TODO: or can we?
187+
# TODO: this needs to be checked condition-wise, not globally
188+
measurement_df[v1.C.SIMULATION_CONDITION_ID] = ""
116189
if (
117190
v1.C.PREEQUILIBRATION_CONDITION_ID
118191
in measurement_df.columns
119192
):
120-
measurement_df[v2.C.PREEQUILIBRATION_CONDITION_ID] = np.nan
193+
measurement_df[v1.C.PREEQUILIBRATION_CONDITION_ID] = ""
194+
# condition IDs to experiment IDs
195+
measurement_df.insert(
196+
0,
197+
v2.C.EXPERIMENT_ID,
198+
measurement_df.apply(
199+
lambda row: create_experiment_id(
200+
row[v1.C.SIMULATION_CONDITION_ID],
201+
row.get(v1.C.PREEQUILIBRATION_CONDITION_ID, ""),
202+
),
203+
axis=1,
204+
),
205+
)
206+
del measurement_df[v1.C.SIMULATION_CONDITION_ID]
207+
del measurement_df[v1.C.PREEQUILIBRATION_CONDITION_ID]
121208
v2.write_measurement_df(
122209
measurement_df, get_dest_path(measurement_file)
123210
)
124-
# TODO: Measurements: preequilibration to experiments/timecourses once
125-
# finalized
126-
...
127211

128212
# validate updated Problem
129213
validation_issues = v2.lint_problem(new_yaml_file)

petab/v2/problem.py

Lines changed: 3 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -908,27 +908,25 @@ def add_parameter(
908908
def add_measurement(
909909
self,
910910
obs_id: str,
911-
sim_cond_id: str,
911+
experiment_id: str,
912912
time: float,
913913
measurement: float,
914914
observable_parameters: Sequence[str] = None,
915915
noise_parameters: Sequence[str] = None,
916-
preeq_cond_id: str = None,
917916
):
918917
"""Add a measurement to the problem.
919918
920919
Arguments:
921920
obs_id: The observable ID
922-
sim_cond_id: The simulation condition ID
921+
experiment_id: The experiment ID
923922
time: The measurement time
924923
measurement: The measurement value
925924
observable_parameters: The observable parameters
926925
noise_parameters: The noise parameters
927-
preeq_cond_id: The pre-equilibration condition ID
928926
"""
929927
record = {
930928
OBSERVABLE_ID: [obs_id],
931-
SIMULATION_CONDITION_ID: [sim_cond_id],
929+
EXPERIMENT_ID: [experiment_id],
932930
TIME: [time],
933931
MEASUREMENT: [measurement],
934932
}
@@ -940,8 +938,6 @@ def add_measurement(
940938
record[NOISE_PARAMETERS] = [
941939
PARAMETER_SEPARATOR.join(noise_parameters)
942940
]
943-
if preeq_cond_id is not None:
944-
record[PREEQUILIBRATION_CONDITION_ID] = [preeq_cond_id]
945941

946942
tmp_df = pd.DataFrame(record)
947943
self.measurement_df = (

tests/v2/test_problem.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@ def test_load_remote():
3030
"""Test loading remote files"""
3131
yaml_url = (
3232
"https://raw.githubusercontent.com/PEtab-dev/petab_test_suite"
33-
"/update_v2/petabtests/cases/v2.0.0/sbml/0001/_0001.yaml"
33+
"/update_v2/petabtests/cases/v2.0.0/sbml/0010/_0010.yaml"
3434
)
3535
petab_problem = Problem.from_yaml(yaml_url)
3636

@@ -83,7 +83,7 @@ def test_problem_from_yaml_multiple_files():
8383
problem.experiment_df, Path(tmpdir, f"experiments{i}.tsv")
8484
)
8585

86-
problem.add_measurement(f"observable{i}", f"condition{i}", 1, 1)
86+
problem.add_measurement(f"observable{i}", f"experiment{i}", 1, 1)
8787
petab.write_measurement_df(
8888
problem.measurement_df, Path(tmpdir, f"measurements{i}.tsv")
8989
)

0 commit comments

Comments
 (0)