Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 14 additions & 7 deletions monai/apps/auto3dseg/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
import os

from monai.apps.auto3dseg.bundle_gen import BundleAlgo
from monai.auto3dseg import algo_from_pickle, algo_to_pickle
from monai.auto3dseg import algo_from_json, algo_to_json
from monai.utils.enums import AlgoKeys

__all__ = ["import_bundle_algo_history", "export_bundle_algo_history", "get_name_from_algo_id"]
Expand Down Expand Up @@ -42,11 +42,18 @@ def import_bundle_algo_history(
if not os.path.isdir(write_path):
continue

obj_filename = os.path.join(write_path, "algo_object.pkl")
if not os.path.isfile(obj_filename): # saved mode pkl
# Prefer JSON format, fall back to legacy pickle
json_filename = os.path.join(write_path, "algo_object.json")
pkl_filename = os.path.join(write_path, "algo_object.pkl")

if os.path.isfile(json_filename):
obj_filename = json_filename
elif os.path.isfile(pkl_filename):
obj_filename = pkl_filename
else:
continue

algo, algo_meta_data = algo_from_pickle(obj_filename, template_path=template_path)
algo, algo_meta_data = algo_from_json(obj_filename, template_path=template_path)

best_metric = algo_meta_data.get(AlgoKeys.SCORE, None)
if best_metric is None:
Expand All @@ -57,7 +64,7 @@ def import_bundle_algo_history(

is_trained = best_metric is not None

if (only_trained and is_trained) or not only_trained:
if is_trained or not only_trained:
history.append(
{AlgoKeys.ID: name, AlgoKeys.ALGO: algo, AlgoKeys.SCORE: best_metric, AlgoKeys.IS_TRAINED: is_trained}
)
Expand All @@ -67,14 +74,14 @@ def import_bundle_algo_history(

def export_bundle_algo_history(history: list[dict[str, BundleAlgo]]) -> None:
"""
Save all the BundleAlgo in the history to algo_object.pkl in each individual folder
Save all the BundleAlgo in the history to algo_object.json in each individual folder.

Args:
history: a List of Bundle. Typically, the history can be obtained from BundleGen get_history method
"""
for algo_dict in history:
algo = algo_dict[AlgoKeys.ALGO]
algo_to_pickle(algo, template_path=algo.template_path)
algo_to_json(algo, template_path=algo.template_path)


def get_name_from_algo_id(id: str) -> str:
Expand Down
2 changes: 2 additions & 0 deletions monai/auto3dseg/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,9 @@
from .operations import Operations, SampleOperations, SummaryOperations
from .seg_summarizer import SegSummarizer
from .utils import (
algo_from_json,
algo_from_pickle,
algo_to_json,
algo_to_pickle,
concat_multikeys_to_dict,
concat_val_to_np,
Expand Down
248 changes: 197 additions & 51 deletions monai/auto3dseg/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,11 +11,12 @@

from __future__ import annotations

import json
import logging
import os
import pickle
import subprocess
import sys
import warnings
from copy import deepcopy
from numbers import Number
from typing import Any, cast
Expand All @@ -30,6 +31,7 @@
from monai.data.meta_tensor import MetaTensor
from monai.transforms import CropForeground, ToCupy
from monai.utils import min_version, optional_import, run_cmd
from monai.utils.deprecate_utils import deprecated

__all__ = [
"get_foreground_image",
Expand All @@ -39,8 +41,10 @@
"concat_multikeys_to_dict",
"datafold_read",
"verify_report_format",
"algo_to_pickle",
"algo_from_pickle",
"algo_to_json",
"algo_from_json",
"algo_to_pickle", # deprecated alias
"algo_from_pickle", # deprecated alias
]

measure_np, has_measure = optional_import("skimage.measure", "0.14.2", min_version)
Expand Down Expand Up @@ -274,48 +278,86 @@ def verify_report_format(report: dict, report_format: dict) -> bool:
return True


def algo_to_pickle(algo: Algo, template_path: PathLike | None = None, **algo_meta_data: Any) -> str:
def _make_json_serializable(value: Any) -> Any:
"""
Convert a value to a JSON-serializable type.

Handles numpy arrays, Path objects, torch tensors, and other common types.
"""
Export the Algo object to pickle file.
if value is None:
return None
if isinstance(value, (str, int, float, bool)):
return value
if isinstance(value, (list, tuple)):
return [_make_json_serializable(v) for v in value]
if isinstance(value, dict):
return {k: _make_json_serializable(v) for k, v in value.items()}
if hasattr(value, "__fspath__"): # Path-like objects
return str(value)
if isinstance(value, np.ndarray):
return value.tolist()
if isinstance(value, (np.integer, np.floating)):
return value.item()
if isinstance(value, torch.Tensor):
return value.detach().cpu().numpy().tolist()
# Fallback to string representation
return str(value)


def _add_path_with_parent(paths: list[str], path: str | None) -> None:
"""Add a path and its parent directory to the list if the path is a valid directory."""
if path and os.path.isdir(str(path)):
abs_path = os.path.abspath(str(path))
paths.append(abs_path)
paths.append(os.path.abspath(os.path.join(abs_path, "..")))


def algo_to_json(algo: Algo, template_path: PathLike | None = None, **algo_meta_data: Any) -> str:
"""
Export the Algo object to a JSON file (pickle-free serialization).

Args:
algo: Algo-like object.
template_path: a str path that is needed to be added to the sys.path to instantiate the class.
algo_meta_data: additional keyword to save into the dictionary, for example, model training info
such as acc/best_metrics
algo: Algo-like object (typically BundleAlgo or subclass).
template_path: path needed for sys.path setup when loading custom Algo classes.
algo_meta_data: additional metadata to save (e.g., best_metric, score).

Returns:
filename of the pickled Algo object
Filename of the saved Algo object (algo_object.json).
"""
data = {"algo_bytes": pickle.dumps(algo), "template_path": str(template_path)}
pkl_filename = os.path.join(algo.get_output_path(), "algo_object.pkl")
for k, v in algo_meta_data.items():
data.update({k: v})
data_bytes = pickle.dumps(data)
with open(pkl_filename, "wb") as f_pi:
f_pi.write(data_bytes)
return pkl_filename
state = {}
for attr in [
"template_path", "data_stats_files", "data_list_file", "mlflow_tracking_uri",
"mlflow_experiment_name", "output_path", "name", "best_metric", "fill_records", "device_setting",
]:
if hasattr(algo, attr):
state[attr] = _make_json_serializable(getattr(algo, attr))

# Build target string for dynamic class instantiation
cls = algo.__class__
target = f"{cls.__module__}.{cls.__name__}"

def algo_from_pickle(pkl_filename: str, template_path: PathLike | None = None, **kwargs: Any) -> Any:
"""
Import the Algo object from a pickle file.
data: dict[str, Any] = {
"_target_": target,
"_state_": state,
"template_path": str(template_path) if template_path else None,
**algo_meta_data,
}

Args:
pkl_filename: the name of the pickle file.
template_path: a folder containing files to instantiate the Algo. Besides the `template_path`,
this function will also attempt to use the `template_path` saved in the pickle file and a directory
named `algorithm_templates` in the parent folder of the folder containing the pickle file.
json_filename = os.path.join(algo.get_output_path(), "algo_object.json")
with open(json_filename, "w") as f:
json.dump(data, f, separators=(",", ":"))

Returns:
algo: the Algo object saved in the pickle file.
algo_meta_data: additional keyword saved in the pickle file, for example, acc/best_metrics.
return json_filename

Raises:
ValueError if the pkl_filename does not contain a dict, or the dict does not contain `algo_bytes`.
ModuleNotFoundError if it is unable to instantiate the Algo class.

def _load_legacy_pickle(pkl_filename: str, template_path: PathLike | None = None) -> Any:
"""
Load an Algo object from a legacy pickle file.

This is an internal function to support backward compatibility with pickle files.
"""
import pickle

with open(pkl_filename, "rb") as f_pi:
data_bytes = f_pi.read()
data = pickle.loads(data_bytes)
Expand All @@ -330,34 +372,31 @@ def algo_from_pickle(pkl_filename: str, template_path: PathLike | None = None, *
algo_template_path = data.pop("template_path", None)

template_paths_candidates: list[str] = []

if os.path.isdir(str(template_path)):
template_paths_candidates.append(os.path.abspath(str(template_path)))
template_paths_candidates.append(os.path.abspath(os.path.join(str(template_path), "..")))

if os.path.isdir(str(algo_template_path)):
template_paths_candidates.append(os.path.abspath(algo_template_path))
template_paths_candidates.append(os.path.abspath(os.path.join(algo_template_path, "..")))
_add_path_with_parent(template_paths_candidates, str(template_path) if template_path else None)
_add_path_with_parent(template_paths_candidates, algo_template_path)

pkl_dir = os.path.dirname(pkl_filename)
algo_template_path_fuzzy = os.path.join(pkl_dir, "..", "algorithm_templates")

if os.path.isdir(algo_template_path_fuzzy):
template_paths_candidates.append(os.path.abspath(algo_template_path_fuzzy))
fuzzy_path = os.path.join(pkl_dir, "..", "algorithm_templates")
if os.path.isdir(fuzzy_path):
template_paths_candidates.append(os.path.abspath(fuzzy_path))

if len(template_paths_candidates) == 0:
# no template_path provided or needed
algo = pickle.loads(algo_bytes)
algo.template_path = None
else:
for i, p in enumerate(template_paths_candidates):
try:
sys.path.append(p)
if p not in sys.path:
sys.path.insert(0, p)
algo = pickle.loads(algo_bytes)
# Clean up sys.path after successful load
if p in sys.path:
sys.path.remove(p)
break
except ModuleNotFoundError as not_found_err:
logging.debug(f"Folder {p} doesn't contain the Algo templates for Algo instantiation.")
sys.path.pop()
if p in sys.path:
sys.path.remove(p)
if i == len(template_paths_candidates) - 1:
raise ValueError(
f"Failed to instantiate {pkl_filename} with {template_paths_candidates}"
Expand All @@ -368,13 +407,109 @@ def algo_from_pickle(pkl_filename: str, template_path: PathLike | None = None, *
logging.debug(f"{algo.get_output_path()} is changed. Now override the Algo output_path with: {pkl_dir}.")
algo.output_path = pkl_dir

algo_meta_data = {}
for k, v in data.items():
algo_meta_data.update({k: v})

algo_meta_data = dict(data)
return algo, algo_meta_data


def algo_from_json(filename: str, template_path: PathLike | None = None, **kwargs: Any) -> Any:
"""
Import the Algo object from a JSON file (pickle-free serialization).

Args:
filename: the name of the saved file (algo_object.json or legacy algo_object.pkl).
template_path: a folder containing files to instantiate the Algo. Besides the `template_path`,
this function will also attempt to use the `template_path` saved in the file and a directory
named `algorithm_templates` in the parent folder of the folder containing the file.
kwargs: additional keyword arguments (reserved for future use).

Returns:
algo: the Algo object saved in the file.
algo_meta_data: additional keyword saved in the file, for example, acc/best_metrics.

Raises:
ValueError: if the file format is invalid or the Algo class cannot be instantiated.
ModuleNotFoundError: if it is unable to instantiate the Algo class.
"""
abs_filename = os.path.abspath(filename)
file_dir = os.path.dirname(abs_filename)

# Check if this is a legacy pickle file
if filename.endswith(".pkl"):
warnings.warn(
"Loading from pickle format (.pkl) is deprecated and will be removed in a future release. "
"Please re-save your algo using algo_to_json() to convert to the new JSON format.",
FutureWarning,
stacklevel=2,
)
return _load_legacy_pickle(filename, template_path)

with open(filename) as f:
data = json.load(f)

if not isinstance(data, dict):
raise ValueError(f"the data object is {data.__class__}. Dict is expected.")

file_template_path = data.pop("template_path", None)

if "_target_" not in data:
raise ValueError(f"Invalid file format: missing '_target_' key in {filename}.")

target = data.pop("_target_")
state = data.pop("_state_", {})

# Build template path candidates for sys.path setup
template_paths: list[str] = []
_add_path_with_parent(template_paths, str(template_path) if template_path else None)
# Handle string "None" from corrupted files
if file_template_path and file_template_path != "None":
_add_path_with_parent(template_paths, file_template_path)
fuzzy_path = os.path.join(file_dir, "..", "algorithm_templates")
if os.path.isdir(fuzzy_path):
template_paths.append(os.path.abspath(fuzzy_path))

# Try each template path to instantiate the class
paths_to_try: list[str | None] = template_paths if template_paths else [None]
algo = None
used_template_path: str | None = None
for path in paths_to_try:
try:
if path and path not in sys.path:
sys.path.insert(0, path)

algo_config: dict[str, Any] = {"_target_": target}
if "template_path" in state and state["template_path"]:
algo_config["template_path"] = state["template_path"]

parser = ConfigParser(algo_config)
algo = parser.get_parsed_content()
used_template_path = path

if path and path in sys.path:
sys.path.remove(path)
break
except (ModuleNotFoundError, ImportError) as e:
if path and path in sys.path:
sys.path.remove(path)
logging.debug(f"Failed to instantiate {target} with path {path}: {e}")
continue
Comment on lines +474 to +494
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟡 Minor

🧩 Analysis chain

🏁 Script executed:

# First, let's see the function containing this code
sed -n '450,500p' monai/auto3dseg/utils.py

Repository: Project-MONAI/MONAI

Length of output: 2103


🏁 Script executed:

# Check what function this is part of
grep -n "^def\|^class" monai/auto3dseg/utils.py | grep -B1 "474"

Repository: Project-MONAI/MONAI

Length of output: 45


🏁 Script executed:

# Search for calls to algo_from_json or algo_from_pickle
rg "algo_from_json|algo_from_pickle" --type py -C 2

Repository: Project-MONAI/MONAI

Length of output: 4135


🏁 Script executed:

# Check if there's any threading or async pattern in this file
rg "Thread|threading|asyncio|concurrent|Executor" monai/auto3dseg/utils.py

Repository: Project-MONAI/MONAI

Length of output: 45


🏁 Script executed:

# Check if the function is exposed in __init__.py or used in tests
rg "algo_from_json|algo_from_pickle" --type py | head -20

Repository: Project-MONAI/MONAI

Length of output: 1560


🏁 Script executed:

# Look for tests of algo_from_json
find . -path ./build -prune -o -name "*test*.py" -type f -exec grep -l "algo_from_json\|algo_from_pickle" {} \;

Repository: Project-MONAI/MONAI

Length of output: 45


🏁 Script executed:

# Check the context of calls in hpo_gen.py to see if concurrent
sed -n '1,100p' monai/apps/auto3dseg/hpo_gen.py | head -60

Repository: Project-MONAI/MONAI

Length of output: 2408


🏁 Script executed:

# Get the full context of algo_from_json and algo_from_pickle function signatures
sed -n '420,510p' monai/auto3dseg/utils.py

Repository: Project-MONAI/MONAI

Length of output: 3791


sys.path manipulation lacks thread-safety guarantees, though current usage is sequential.

While sys.path.insert() and sys.path.remove() are individually atomic due to the GIL, the paired operations aren't atomic together and could race if called concurrently. Current callers invoke this sequentially during initialization (in hpo_gen.py.__init__ and utils.py iteration), so practical risk is low. Consider a lock or context manager if concurrent calls are anticipated.

🧰 Tools
🪛 Ruff (0.14.10)

480-480: Unnecessary key check before dictionary access

Replace with dict.get

(RUF019)

🤖 Prompt for AI Agents
In @monai/auto3dseg/utils.py around lines 474 - 494, The loop over paths_to_try
manipulates sys.path non-atomically (sys.path.insert/remove) around
ConfigParser/ parser.get_parsed_content which can race if concurrent; fix by
introducing a module-level lock (e.g., SYS_PATH_LOCK) and acquire it before any
sys.path modification and release it after cleanup, wrapping the path insert,
parser instantiation (ConfigParser and parser.get_parsed_content) and the
removal in a try/finally so removal always happens and the lock is released;
reference the paths_to_try loop, sys.path.insert/sys.path.remove, ConfigParser,
parser.get_parsed_content, and used_template_path when applying the lock.


if algo is None:
raise ValueError(f"Failed to instantiate Algo from target '{target}' with paths {template_paths}")

# Restore the state
for attr, value in state.items():
if hasattr(algo, attr):
setattr(algo, attr, value)

algo.template_path = used_template_path

if file_dir != os.path.abspath(algo.get_output_path()):
logging.debug(f"{algo.get_output_path()} is changed. Now override the Algo output_path with: {file_dir}.")
algo.output_path = file_dir

return algo, dict(data)


def list_to_python_fire_arg_str(args: list) -> str:
"""
Convert a list of arguments to a string that can be used in python-fire.
Expand Down Expand Up @@ -522,3 +657,14 @@ def _run_cmd_bcprun(cmd: str, **kwargs: Any) -> subprocess.CompletedProcess:
cmd_list += [f"-{arg}", str(params.pop(arg))]
cmd_list.extend(["-c", cmd])
return run_cmd(cmd_list, run_cmd_verbose=True, **params)


# Deprecated aliases for backward compatibility
@deprecated(since="1.6", msg_suffix="Use algo_to_json instead.")
def algo_to_pickle(algo: Algo, template_path: PathLike | None = None, **algo_meta_data: Any) -> str:
return algo_to_json(algo, template_path, **algo_meta_data)


@deprecated(since="1.6", msg_suffix="Use algo_from_json instead.")
def algo_from_pickle(filename: str, template_path: PathLike | None = None, **kwargs: Any) -> Any:
return algo_from_json(filename, template_path, **kwargs)
Loading