Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions openml/_api/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
from openml._api.runtime.core import APIContext


def set_api_version(version: str, *, strict: bool = False) -> None:
api_context.set_version(version=version, strict=strict)


api_context = APIContext()
5 changes: 5 additions & 0 deletions openml/_api/config.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
from __future__ import annotations

API_V1_SERVER = "https://www.openml.org/api/v1/xml"
API_V2_SERVER = "http://127.0.0.1:8001"
API_KEY = "..."
3 changes: 3 additions & 0 deletions openml/_api/http/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
from openml._api.http.client import HTTPClient

__all__ = ["HTTPClient"]
39 changes: 39 additions & 0 deletions openml/_api/http/client.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
from __future__ import annotations

from typing import Any, Mapping

import requests
from requests import Response

from openml.__version__ import __version__


class HTTPClient:
def __init__(self, base_url: str) -> None:
self.base_url = base_url
self.headers: dict[str, str] = {"user-agent": f"openml-python/{__version__}"}

def get(
self,
path: str,
params: Mapping[str, Any] | None = None,
) -> Response:
url = f"{self.base_url}/{path}"
return requests.get(url, params=params, headers=self.headers, timeout=10)

def post(
self,
path: str,
data: Mapping[str, Any] | None = None,
files: Any = None,
) -> Response:
url = f"{self.base_url}/{path}"
return requests.post(url, data=data, files=files, headers=self.headers, timeout=10)

def delete(
self,
path: str,
params: Mapping[str, Any] | None = None,
) -> Response:
url = f"{self.base_url}/{path}"
return requests.delete(url, params=params, headers=self.headers, timeout=10)
Empty file added openml/_api/http/utils.py
Empty file.
5 changes: 5 additions & 0 deletions openml/_api/resources/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
from openml._api.resources.datasets import DatasetsV1, DatasetsV2
from openml._api.resources.evaluations import EvaluationsV1, EvaluationsV2
from openml._api.resources.tasks import TasksV1, TasksV2

__all__ = ["DatasetsV1", "DatasetsV2", "TasksV1", "TasksV2", "EvaluationsV1", "EvaluationsV2"]
45 changes: 45 additions & 0 deletions openml/_api/resources/base.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
from __future__ import annotations

from abc import ABC, abstractmethod
from typing import TYPE_CHECKING, Any

if TYPE_CHECKING:
from requests import Response

from openml._api.http import HTTPClient
from openml.datasets.dataset import OpenMLDataset
from openml.tasks.task import OpenMLTask


class ResourceAPI:
def __init__(self, http: HTTPClient):
self._http = http


class DatasetsAPI(ResourceAPI, ABC):
@abstractmethod
def get(self, dataset_id: int) -> OpenMLDataset | tuple[OpenMLDataset, Response]: ...


class TasksAPI(ResourceAPI, ABC):
@abstractmethod
def get(
self,
task_id: int,
*,
return_response: bool = False,
) -> OpenMLTask | tuple[OpenMLTask, Response]: ...


class EvaluationsAPI(ResourceAPI, ABC):
@abstractmethod
def list(
self,
limit: int,
offset: int,
function: str,
**kwargs: Any,
) -> dict: ...

@abstractmethod
def get_users(self, uploader_ids: list[str]) -> dict: ...
20 changes: 20 additions & 0 deletions openml/_api/resources/datasets.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
from __future__ import annotations

from typing import TYPE_CHECKING

from openml._api.resources.base import DatasetsAPI

if TYPE_CHECKING:
from responses import Response

from openml.datasets.dataset import OpenMLDataset


class DatasetsV1(DatasetsAPI):
def get(self, dataset_id: int) -> OpenMLDataset | tuple[OpenMLDataset, Response]:
raise NotImplementedError


class DatasetsV2(DatasetsAPI):
def get(self, dataset_id: int) -> OpenMLDataset | tuple[OpenMLDataset, Response]:
raise NotImplementedError
206 changes: 206 additions & 0 deletions openml/_api/resources/evaluations.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,206 @@
from __future__ import annotations

from typing import Any

import xmltodict

from openml._api.resources.base import EvaluationsAPI


class EvaluationsV1(EvaluationsAPI):
"""V1 API implementation for evaluations.
Fetches evaluations from the v1 XML API endpoint.
"""

def list(
self,
limit: int,
offset: int,
function: str,
**kwargs: Any,
) -> dict:
"""Retrieve evaluations from the OpenML v1 XML API.

This method builds an evaluation query URL based on the provided
filters, sends a request to the OpenML v1 endpoint, parses the XML
response into a dictionary, and enriches the result with uploader
usernames.

Parameters
----------
limit : int
Maximum number of evaluations to return.
offset : int
Offset for pagination.
function : str
the evaluation function. e.g., predictive_accuracy
**kwargs
Optional filters supported by the OpenML evaluation API, such as:
- tasks
- setups
- flows
- runs
- uploaders
- tag
- study
- sort_order

Returns
-------
dict
A dictionary containing:
- Parsed evaluation data from the XML response
- A "users" key mapping uploader IDs to usernames

Raises
------
ValueError
If the XML response does not contain the expected structure.
AssertionError
If the evaluation data is not in list format as expected.

Notes
-----
This method performs two API calls:
1. Fetches evaluation data from the specified endpoint
2. Fetches user information for all uploaders in the evaluation data

The user information is used to map uploader IDs to usernames.
"""
api_call = self._build_url(limit, offset, function, **kwargs)
eval_response = self._http.get(api_call)
xml_content = eval_response.text

evals_dict: dict[str, Any] = xmltodict.parse(xml_content, force_list=("oml:evaluation",))
# Minimalistic check if the XML is useful
if "oml:evaluations" not in evals_dict:
raise ValueError(
"Error in return XML, does not contain " f'"oml:evaluations": {evals_dict!s}',
)

assert isinstance(evals_dict["oml:evaluations"]["oml:evaluation"], list), (
"Expected 'oml:evaluation' to be a list, but got "
f"{type(evals_dict['oml:evaluations']['oml:evaluation']).__name__}. "
)

uploader_ids = list(
{eval_["oml:uploader"] for eval_ in evals_dict["oml:evaluations"]["oml:evaluation"]},
)
user_dict = self.get_users(uploader_ids)
evals_dict["users"] = user_dict

return evals_dict

def get_users(self, uploader_ids: list[str]) -> dict:
"""
Retrieve usernames for a list of OpenML user IDs.

Parameters
----------
uploader_ids : list[str]
List of OpenML user IDs.

Returns
-------
dict
A mapping from user ID (str) to username (str).
"""
api_users = "user/list/user_id/" + ",".join(uploader_ids)
user_response = self._http.get(api_users)
xml_content_user = user_response.text

users = xmltodict.parse(xml_content_user, force_list=("oml:user",))
return {user["oml:id"]: user["oml:username"] for user in users["oml:users"]["oml:user"]}

def _build_url(
self,
limit: int,
offset: int,
function: str,
**kwargs: Any,
) -> str:
"""
Construct an OpenML evaluation API URL with filtering parameters.

Parameters
----------
limit : int
Maximum number of evaluations to return.
offset : int
Offset for pagination.
function : str
the evaluation function. e.g., predictive_accuracy
**kwargs
Evaluation filters such as task IDs, flow IDs,
uploader IDs, study name, and sorting options.

Returns
-------
str
A relative API path suitable for an OpenML HTTP request.
"""
api_call = f"evaluation/list/function/{function}"
if limit is not None:
api_call += f"/limit/{limit}"
if offset is not None:
api_call += f"/offset/{offset}"

# List-based filters
list_filters = {
"task": kwargs.get("tasks"),
"setup": kwargs.get("setups"),
"flow": kwargs.get("flows"),
"run": kwargs.get("runs"),
"uploader": kwargs.get("uploaders"),
}

for name, values in list_filters.items():
if values is not None:
api_call += f"/{name}/" + ",".join(str(int(v)) for v in values)

# Single-value filters
if kwargs.get("study") is not None:
api_call += f"/study/{kwargs['study']}"

if kwargs.get("sort_order") is not None:
api_call += f"/sort_order/{kwargs['sort_order']}"

# Extra filters (tag, per_fold, future-proof)
for key in ("tag", "per_fold"):
value = kwargs.get(key)
if value is not None:
api_call += f"/{key}/{value}"

return api_call


class EvaluationsV2(EvaluationsAPI):
"""V2 API implementation for evaluations.
Fetches evaluations from the v2 json API endpoint.
"""

def list(
self,
limit: int,
offset: int,
function: str,
**kwargs: Any,
) -> dict:
"""
Retrieve evaluation results from the OpenML v2 JSON API.

Notes
-----
This method is not yet implemented.
"""
raise NotImplementedError("V2 API implementation is not yet available")

def get_users(self, uploader_ids: list[str]) -> dict:
"""
Retrieve usernames for a list of OpenML user IDs using the v2 API.

Notes
-----
This method is not yet implemented.
"""
raise NotImplementedError("V2 API implementation is not yet available")
Loading