diff --git a/monai/apps/auto3dseg/utils.py b/monai/apps/auto3dseg/utils.py index 64e1d2ea2a..fbf9dc101c 100644 --- a/monai/apps/auto3dseg/utils.py +++ b/monai/apps/auto3dseg/utils.py @@ -14,7 +14,7 @@ import os from monai.apps.auto3dseg.bundle_gen import BundleAlgo -from monai.auto3dseg import algo_from_pickle, algo_to_pickle +from monai.auto3dseg import algo_from_json, algo_to_json from monai.utils.enums import AlgoKeys __all__ = ["import_bundle_algo_history", "export_bundle_algo_history", "get_name_from_algo_id"] @@ -42,11 +42,18 @@ def import_bundle_algo_history( if not os.path.isdir(write_path): continue - obj_filename = os.path.join(write_path, "algo_object.pkl") - if not os.path.isfile(obj_filename): # saved mode pkl + # Prefer JSON format, fall back to legacy pickle + json_filename = os.path.join(write_path, "algo_object.json") + pkl_filename = os.path.join(write_path, "algo_object.pkl") + + if os.path.isfile(json_filename): + obj_filename = json_filename + elif os.path.isfile(pkl_filename): + obj_filename = pkl_filename + else: continue - algo, algo_meta_data = algo_from_pickle(obj_filename, template_path=template_path) + algo, algo_meta_data = algo_from_json(obj_filename, template_path=template_path) best_metric = algo_meta_data.get(AlgoKeys.SCORE, None) if best_metric is None: @@ -57,7 +64,7 @@ def import_bundle_algo_history( is_trained = best_metric is not None - if (only_trained and is_trained) or not only_trained: + if is_trained or not only_trained: history.append( {AlgoKeys.ID: name, AlgoKeys.ALGO: algo, AlgoKeys.SCORE: best_metric, AlgoKeys.IS_TRAINED: is_trained} ) @@ -67,14 +74,14 @@ def import_bundle_algo_history( def export_bundle_algo_history(history: list[dict[str, BundleAlgo]]) -> None: """ - Save all the BundleAlgo in the history to algo_object.pkl in each individual folder + Save all the BundleAlgo in the history to algo_object.json in each individual folder. Args: history: a List of Bundle. Typically, the history can be obtained from BundleGen get_history method """ for algo_dict in history: algo = algo_dict[AlgoKeys.ALGO] - algo_to_pickle(algo, template_path=algo.template_path) + algo_to_json(algo, template_path=algo.template_path) def get_name_from_algo_id(id: str) -> str: diff --git a/monai/auto3dseg/__init__.py b/monai/auto3dseg/__init__.py index 4e5d15613b..f35cd332d3 100644 --- a/monai/auto3dseg/__init__.py +++ b/monai/auto3dseg/__init__.py @@ -25,7 +25,9 @@ from .operations import Operations, SampleOperations, SummaryOperations from .seg_summarizer import SegSummarizer from .utils import ( + algo_from_json, algo_from_pickle, + algo_to_json, algo_to_pickle, concat_multikeys_to_dict, concat_val_to_np, diff --git a/monai/auto3dseg/utils.py b/monai/auto3dseg/utils.py index 211f23c415..0864e3ff84 100644 --- a/monai/auto3dseg/utils.py +++ b/monai/auto3dseg/utils.py @@ -11,11 +11,12 @@ from __future__ import annotations +import json import logging import os -import pickle import subprocess import sys +import warnings from copy import deepcopy from numbers import Number from typing import Any, cast @@ -30,6 +31,7 @@ from monai.data.meta_tensor import MetaTensor from monai.transforms import CropForeground, ToCupy from monai.utils import min_version, optional_import, run_cmd +from monai.utils.deprecate_utils import deprecated __all__ = [ "get_foreground_image", @@ -39,8 +41,10 @@ "concat_multikeys_to_dict", "datafold_read", "verify_report_format", - "algo_to_pickle", - "algo_from_pickle", + "algo_to_json", + "algo_from_json", + "algo_to_pickle", # deprecated alias + "algo_from_pickle", # deprecated alias ] measure_np, has_measure = optional_import("skimage.measure", "0.14.2", min_version) @@ -274,48 +278,86 @@ def verify_report_format(report: dict, report_format: dict) -> bool: return True -def algo_to_pickle(algo: Algo, template_path: PathLike | None = None, **algo_meta_data: Any) -> str: +def _make_json_serializable(value: Any) -> Any: + """ + Convert a value to a JSON-serializable type. + + Handles numpy arrays, Path objects, torch tensors, and other common types. """ - Export the Algo object to pickle file. + if value is None: + return None + if isinstance(value, (str, int, float, bool)): + return value + if isinstance(value, (list, tuple)): + return [_make_json_serializable(v) for v in value] + if isinstance(value, dict): + return {k: _make_json_serializable(v) for k, v in value.items()} + if hasattr(value, "__fspath__"): # Path-like objects + return str(value) + if isinstance(value, np.ndarray): + return value.tolist() + if isinstance(value, (np.integer, np.floating)): + return value.item() + if isinstance(value, torch.Tensor): + return value.detach().cpu().numpy().tolist() + # Fallback to string representation + return str(value) + + +def _add_path_with_parent(paths: list[str], path: str | None) -> None: + """Add a path and its parent directory to the list if the path is a valid directory.""" + if path and os.path.isdir(str(path)): + abs_path = os.path.abspath(str(path)) + paths.append(abs_path) + paths.append(os.path.abspath(os.path.join(abs_path, ".."))) + + +def algo_to_json(algo: Algo, template_path: PathLike | None = None, **algo_meta_data: Any) -> str: + """ + Export the Algo object to a JSON file (pickle-free serialization). Args: - algo: Algo-like object. - template_path: a str path that is needed to be added to the sys.path to instantiate the class. - algo_meta_data: additional keyword to save into the dictionary, for example, model training info - such as acc/best_metrics + algo: Algo-like object (typically BundleAlgo or subclass). + template_path: path needed for sys.path setup when loading custom Algo classes. + algo_meta_data: additional metadata to save (e.g., best_metric, score). Returns: - filename of the pickled Algo object + Filename of the saved Algo object (algo_object.json). """ - data = {"algo_bytes": pickle.dumps(algo), "template_path": str(template_path)} - pkl_filename = os.path.join(algo.get_output_path(), "algo_object.pkl") - for k, v in algo_meta_data.items(): - data.update({k: v}) - data_bytes = pickle.dumps(data) - with open(pkl_filename, "wb") as f_pi: - f_pi.write(data_bytes) - return pkl_filename + state = {} + for attr in [ + "template_path", "data_stats_files", "data_list_file", "mlflow_tracking_uri", + "mlflow_experiment_name", "output_path", "name", "best_metric", "fill_records", "device_setting", + ]: + if hasattr(algo, attr): + state[attr] = _make_json_serializable(getattr(algo, attr)) + # Build target string for dynamic class instantiation + cls = algo.__class__ + target = f"{cls.__module__}.{cls.__name__}" -def algo_from_pickle(pkl_filename: str, template_path: PathLike | None = None, **kwargs: Any) -> Any: - """ - Import the Algo object from a pickle file. + data: dict[str, Any] = { + "_target_": target, + "_state_": state, + "template_path": str(template_path) if template_path else None, + **algo_meta_data, + } - Args: - pkl_filename: the name of the pickle file. - template_path: a folder containing files to instantiate the Algo. Besides the `template_path`, - this function will also attempt to use the `template_path` saved in the pickle file and a directory - named `algorithm_templates` in the parent folder of the folder containing the pickle file. + json_filename = os.path.join(algo.get_output_path(), "algo_object.json") + with open(json_filename, "w") as f: + json.dump(data, f, separators=(",", ":")) - Returns: - algo: the Algo object saved in the pickle file. - algo_meta_data: additional keyword saved in the pickle file, for example, acc/best_metrics. + return json_filename - Raises: - ValueError if the pkl_filename does not contain a dict, or the dict does not contain `algo_bytes`. - ModuleNotFoundError if it is unable to instantiate the Algo class. +def _load_legacy_pickle(pkl_filename: str, template_path: PathLike | None = None) -> Any: """ + Load an Algo object from a legacy pickle file. + + This is an internal function to support backward compatibility with pickle files. + """ + import pickle + with open(pkl_filename, "rb") as f_pi: data_bytes = f_pi.read() data = pickle.loads(data_bytes) @@ -330,34 +372,31 @@ def algo_from_pickle(pkl_filename: str, template_path: PathLike | None = None, * algo_template_path = data.pop("template_path", None) template_paths_candidates: list[str] = [] - - if os.path.isdir(str(template_path)): - template_paths_candidates.append(os.path.abspath(str(template_path))) - template_paths_candidates.append(os.path.abspath(os.path.join(str(template_path), ".."))) - - if os.path.isdir(str(algo_template_path)): - template_paths_candidates.append(os.path.abspath(algo_template_path)) - template_paths_candidates.append(os.path.abspath(os.path.join(algo_template_path, ".."))) + _add_path_with_parent(template_paths_candidates, str(template_path) if template_path else None) + _add_path_with_parent(template_paths_candidates, algo_template_path) pkl_dir = os.path.dirname(pkl_filename) - algo_template_path_fuzzy = os.path.join(pkl_dir, "..", "algorithm_templates") - - if os.path.isdir(algo_template_path_fuzzy): - template_paths_candidates.append(os.path.abspath(algo_template_path_fuzzy)) + fuzzy_path = os.path.join(pkl_dir, "..", "algorithm_templates") + if os.path.isdir(fuzzy_path): + template_paths_candidates.append(os.path.abspath(fuzzy_path)) if len(template_paths_candidates) == 0: - # no template_path provided or needed algo = pickle.loads(algo_bytes) algo.template_path = None else: for i, p in enumerate(template_paths_candidates): try: - sys.path.append(p) + if p not in sys.path: + sys.path.insert(0, p) algo = pickle.loads(algo_bytes) + # Clean up sys.path after successful load + if p in sys.path: + sys.path.remove(p) break except ModuleNotFoundError as not_found_err: logging.debug(f"Folder {p} doesn't contain the Algo templates for Algo instantiation.") - sys.path.pop() + if p in sys.path: + sys.path.remove(p) if i == len(template_paths_candidates) - 1: raise ValueError( f"Failed to instantiate {pkl_filename} with {template_paths_candidates}" @@ -368,13 +407,109 @@ def algo_from_pickle(pkl_filename: str, template_path: PathLike | None = None, * logging.debug(f"{algo.get_output_path()} is changed. Now override the Algo output_path with: {pkl_dir}.") algo.output_path = pkl_dir - algo_meta_data = {} - for k, v in data.items(): - algo_meta_data.update({k: v}) - + algo_meta_data = dict(data) return algo, algo_meta_data +def algo_from_json(filename: str, template_path: PathLike | None = None, **kwargs: Any) -> Any: + """ + Import the Algo object from a JSON file (pickle-free serialization). + + Args: + filename: the name of the saved file (algo_object.json or legacy algo_object.pkl). + template_path: a folder containing files to instantiate the Algo. Besides the `template_path`, + this function will also attempt to use the `template_path` saved in the file and a directory + named `algorithm_templates` in the parent folder of the folder containing the file. + kwargs: additional keyword arguments (reserved for future use). + + Returns: + algo: the Algo object saved in the file. + algo_meta_data: additional keyword saved in the file, for example, acc/best_metrics. + + Raises: + ValueError: if the file format is invalid or the Algo class cannot be instantiated. + ModuleNotFoundError: if it is unable to instantiate the Algo class. + """ + abs_filename = os.path.abspath(filename) + file_dir = os.path.dirname(abs_filename) + + # Check if this is a legacy pickle file + if filename.endswith(".pkl"): + warnings.warn( + "Loading from pickle format (.pkl) is deprecated and will be removed in a future release. " + "Please re-save your algo using algo_to_json() to convert to the new JSON format.", + FutureWarning, + stacklevel=2, + ) + return _load_legacy_pickle(filename, template_path) + + with open(filename) as f: + data = json.load(f) + + if not isinstance(data, dict): + raise ValueError(f"the data object is {data.__class__}. Dict is expected.") + + file_template_path = data.pop("template_path", None) + + if "_target_" not in data: + raise ValueError(f"Invalid file format: missing '_target_' key in {filename}.") + + target = data.pop("_target_") + state = data.pop("_state_", {}) + + # Build template path candidates for sys.path setup + template_paths: list[str] = [] + _add_path_with_parent(template_paths, str(template_path) if template_path else None) + # Handle string "None" from corrupted files + if file_template_path and file_template_path != "None": + _add_path_with_parent(template_paths, file_template_path) + fuzzy_path = os.path.join(file_dir, "..", "algorithm_templates") + if os.path.isdir(fuzzy_path): + template_paths.append(os.path.abspath(fuzzy_path)) + + # Try each template path to instantiate the class + paths_to_try: list[str | None] = template_paths if template_paths else [None] + algo = None + used_template_path: str | None = None + for path in paths_to_try: + try: + if path and path not in sys.path: + sys.path.insert(0, path) + + algo_config: dict[str, Any] = {"_target_": target} + if "template_path" in state and state["template_path"]: + algo_config["template_path"] = state["template_path"] + + parser = ConfigParser(algo_config) + algo = parser.get_parsed_content() + used_template_path = path + + if path and path in sys.path: + sys.path.remove(path) + break + except (ModuleNotFoundError, ImportError) as e: + if path and path in sys.path: + sys.path.remove(path) + logging.debug(f"Failed to instantiate {target} with path {path}: {e}") + continue + + if algo is None: + raise ValueError(f"Failed to instantiate Algo from target '{target}' with paths {template_paths}") + + # Restore the state + for attr, value in state.items(): + if hasattr(algo, attr): + setattr(algo, attr, value) + + algo.template_path = used_template_path + + if file_dir != os.path.abspath(algo.get_output_path()): + logging.debug(f"{algo.get_output_path()} is changed. Now override the Algo output_path with: {file_dir}.") + algo.output_path = file_dir + + return algo, dict(data) + + def list_to_python_fire_arg_str(args: list) -> str: """ Convert a list of arguments to a string that can be used in python-fire. @@ -522,3 +657,14 @@ def _run_cmd_bcprun(cmd: str, **kwargs: Any) -> subprocess.CompletedProcess: cmd_list += [f"-{arg}", str(params.pop(arg))] cmd_list.extend(["-c", cmd]) return run_cmd(cmd_list, run_cmd_verbose=True, **params) + + +# Deprecated aliases for backward compatibility +@deprecated(since="1.6", msg_suffix="Use algo_to_json instead.") +def algo_to_pickle(algo: Algo, template_path: PathLike | None = None, **algo_meta_data: Any) -> str: + return algo_to_json(algo, template_path, **algo_meta_data) + + +@deprecated(since="1.6", msg_suffix="Use algo_from_json instead.") +def algo_from_pickle(filename: str, template_path: PathLike | None = None, **kwargs: Any) -> Any: + return algo_from_json(filename, template_path, **kwargs)