Skip to content
18 changes: 15 additions & 3 deletions examples/Basics/simple_flows_and_runs_tutorial.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,8 @@
# A simple tutorial on how to upload results from a machine learning experiment to OpenML.

# %%
import sklearn
from sklearn.neighbors import KNeighborsClassifier

import sklearn
import openml

# %% [markdown]
Expand Down Expand Up @@ -54,7 +53,17 @@

# %% [markdown]
# ## Upload the machine learning experiments to OpenML
# First, create a fow and fill it with metadata about the machine learning model.
#
# ### Option A: Automatic publishing (simplified)
# The publish function automatically detects the model type and creates the flow:

# %%
knn_flow = openml.publish(clf, tags=["openml_tutorial_knn"])
print(f"Flow was auto-published with ID {knn_flow.flow_id}")

# %% [markdown]
# ### Option B: Manual flow construction (full control)
# For advanced use cases, you can manually construct the flow:

# %%
knn_flow = openml.flows.OpenMLFlow(
Expand All @@ -77,6 +86,9 @@
knn_flow.publish()
print(f"knn_flow was published with the ID {knn_flow.flow_id}")

# %% [markdown]
# Now we'll use the auto-published flow to create and upload a run.

# %% [markdown]
# Second, we create a run to store the results associated with the flow.

Expand Down
4 changes: 4 additions & 0 deletions openml/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@
from .datasets import OpenMLDataFeature, OpenMLDataset
from .evaluations import OpenMLEvaluation
from .flows import OpenMLFlow
from .publish import publish
from .runs import OpenMLRun
from .setups import OpenMLParameter, OpenMLSetup
from .study import OpenMLBenchmarkSuite, OpenMLStudy
Expand Down Expand Up @@ -108,13 +109,16 @@ def populate_cache(
"OpenMLSupervisedTask",
"OpenMLTask",
"__version__",
"__version__",
"_api_calls",
"_api_calls",
"config",
"datasets",
"evaluations",
"exceptions",
"extensions",
"flows",
"publish",
"runs",
"setups",
"study",
Expand Down
102 changes: 102 additions & 0 deletions openml/publish.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,102 @@
# License: BSD 3-Clause
from __future__ import annotations

from collections.abc import Sequence
from typing import Any

from . import extensions
from .base import OpenMLBase


def publish(obj: Any, *, name: str | None = None, tags: Sequence[str] | None = None) -> Any:
"""Publish a common object (flow/model/run/dataset) with minimal friction.

This function provides a unified entry point for publishing various OpenML objects.
It automatically detects the object type and routes to the appropriate publishing
mechanism:

- For OpenML objects (``OpenMLDataset``, ``OpenMLFlow``, ``OpenMLRun``, etc.),
it directly calls their ``publish()`` method.
- For external estimators (e.g., scikit-learn estimators), it uses registered
extensions to convert them to ``OpenMLFlow`` objects before publishing.

Parameters
----------
obj : Any
The object to publish. Can be:
- An OpenML object (OpenMLDataset, OpenMLFlow, OpenMLRun, OpenMLTask)
- An estimator instance from a supported framework (e.g., scikit-learn)
name : str, optional
Override the default name for the published object.
If not provided, uses the object's default naming convention.
tags : Sequence[str], optional
Additional tags to attach to the published object.
Will be merged with any existing tags, removing duplicates while
preserving order.

Returns
-------
Any
The published object (typically with updated ID and metadata).

Raises
------
ValueError
If no extension is registered to handle the provided estimator type.

Examples
--------
Publishing an OpenML dataset:

>>> dataset = openml.datasets.get_dataset(61)
>>> openml.publish(dataset, tags=["example"])

Publishing a scikit-learn estimator:

>>> from sklearn.tree import DecisionTreeClassifier
>>> clf = DecisionTreeClassifier(max_depth=5)
>>> openml.publish(clf, name="MyDecisionTree", tags=["tutorial"])

Publishing an OpenML flow directly:

>>> flow = openml.flows.OpenMLFlow(...)
>>> openml.publish(flow)

Publishing an OpenML run (after execution with predictions):

>>> run = openml.runs.OpenMLRun(
... task_id=1, flow_id=100, dataset_id=61,
... data_content=predictions # predictions from estimator evaluation
... )
>>> openml.publish(run, tags=["experiment"])

Notes
-----
For external estimators (e.g., scikit-learn), the corresponding extension must be
installed (e.g., ``openml-sklearn``). The extension will be automatically imported
if available.
"""
# Case 1: Object is already an OpenML entity
if isinstance(obj, OpenMLBase):
if tags is not None and hasattr(obj, "tags"):
existing = list(getattr(obj, "tags", []) or [])
merged = list(dict.fromkeys([*existing, *tags]))
obj.tags = merged
if name is not None and hasattr(obj, "name"):
obj.name = name
return obj.publish()

# Case 2: Object is an external estimator - use extension registry
extension = extensions.functions.get_extension_by_model(obj, raise_if_no_extension=True)
if extension is None: # Defensive check (should not occur with raise_if_no_extension=True)
raise ValueError("No extension registered to handle the provided object.")
flow = extension.model_to_flow(obj)

if name is not None:
flow.name = name

if tags is not None:
existing_tags = list(getattr(flow, "tags", []) or [])
flow.tags = list(dict.fromkeys([*existing_tags, *tags]))

return flow.publish()
51 changes: 51 additions & 0 deletions tests/test_openml/test_openml.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,3 +41,54 @@ def test_populate_cache(
assert task_mock.call_count == 2
for argument, fixture in zip(task_mock.call_args_list, [(1,), (2,)]):
assert argument[0] == fixture

def test_publish_with_openml_object_merges_tags_and_name(self):
class Dummy(openml.base.OpenMLBase):
def __init__(self) -> None:
self.tags = ["a"]
self.name = "orig"
self.published = False

@property
def id(self):
return None

def _get_repr_body_fields(self):
return []

def _to_dict(self):
return {}

def _parse_publish_response(self, xml_response):
return None

def publish(self):
self.published = True
return self

obj = Dummy()
result = openml.publish(obj, name="new", tags=["b", "a"])
assert result is obj
assert obj.published is True
assert obj.name == "new"
assert obj.tags == ["a", "b"] # dedup and preserve order from original

@mock.patch("openml.extensions.functions.get_extension_by_model")
def test_publish_with_extension(self, get_ext_mock):
flow_mock = mock.MagicMock()
flow_mock.tags = []
flow_mock.publish.return_value = "flow-id"

ext_instance = mock.MagicMock()
ext_instance.model_to_flow.return_value = flow_mock
get_ext_mock.return_value = ext_instance

model = object()
flow_id = openml.publish(model, name="n", tags=["x"])

get_ext_mock.assert_called_once_with(model, raise_if_no_extension=True)
ext_instance.model_to_flow.assert_called_once_with(model)
assert flow_mock.name == "n"
assert flow_mock.tags == ["x"]
flow_mock.publish.assert_called_once_with()
assert flow_id == "flow-id"