From 3187461d433993ec65d1dda89350e7845245635a Mon Sep 17 00:00:00 2001 From: 23f3000839 <23f3000839@ds.study.iitm.ac.in> Date: Fri, 5 Sep 2025 02:14:18 +0530 Subject: [PATCH] feat(backend): add exclude option to pull_from_s3 for selective directory sync - Add exclude parameter to Backend._experimental_pull_from_s3 to skip specified dirs - Supported exclude values: checkpoints, logs, trajectories - Update API to accept exclude list and forward it to backend method - Enhance CLI endpoint to pass exclude option when pulling from S3 - Document exclude usage in method docstring with valid options listed - Maintain backward compatibility with default None exclude behavior --- src/art/backend.py | 3 +++ src/art/cli.py | 6 +++++- 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/src/art/backend.py b/src/art/backend.py index 9fa95c0e..0b5ed2fc 100644 --- a/src/art/backend.py +++ b/src/art/backend.py @@ -140,12 +140,14 @@ async def _experimental_pull_from_s3( verbose: bool = False, delete: bool = False, only_step: int | Literal["latest"] | None = None, + exclude: list[Literal["checkpoints", "logs", "trajectories"]] | None = None, ) -> None: """Download the model directory from S3 into file system where the LocalBackend is running. Right now this can be used to pull trajectory logs for processing or model checkpoints. Args: only_step: If specified, only pull this specific step. Can be an int for a specific step, or "latest" to pull only the latest checkpoint. If None, pulls all steps. + exclude: List of directories to exclude from sync. Valid options: "checkpoints", "logs", "trajectories". """ response = await self._client.post( "/_experimental_pull_from_s3", @@ -156,6 +158,7 @@ async def _experimental_pull_from_s3( "verbose": verbose, "delete": delete, "only_step": only_step, + "exclude": exclude, }, timeout=600, ) diff --git a/src/art/cli.py b/src/art/cli.py index 6c7c4903..1585e633 100644 --- a/src/art/cli.py +++ b/src/art/cli.py @@ -1,6 +1,6 @@ import json import socket -from typing import Any, AsyncIterator +from typing import Any, AsyncIterator, Literal import pydantic import typer @@ -101,6 +101,8 @@ async def _experimental_pull_from_s3( prefix: str | None = Body(None), verbose: bool = Body(False), delete: bool = Body(False), + only_step: int | Literal["latest"] | None = Body(None), + exclude: list[Literal["checkpoints", "logs", "trajectories"]] | None = Body(None), ): await backend._experimental_pull_from_s3( model=model, @@ -108,6 +110,8 @@ async def _experimental_pull_from_s3( prefix=prefix, verbose=verbose, delete=delete, + only_step=only_step, + exclude=exclude, ) @app.post("/_experimental_push_to_s3")