diff --git a/documents/docs/galaxy/client/aip_integration.md b/documents/docs/galaxy/client/aip_integration.md
index 1b8fab2aa..cbded5d08 100644
--- a/documents/docs/galaxy/client/aip_integration.md
+++ b/documents/docs/galaxy/client/aip_integration.md
@@ -2,12 +2,13 @@
The Agent Interaction Protocol (AIP) is the communication protocol used throughout Galaxy Client for device coordination. This document explains how Galaxy Client integrates with AIP, the message flow patterns, and how different components use the protocol.
-**Related Documentation:**
+## Related Documentation
-- [Overview](./overview.md) - Overall architecture
-- [DeviceManager](./device_manager.md) - Connection management
-- [Components](./components.md) - Component details
+- [Overview](./overview.md) - Overall Galaxy Client architecture
+- [DeviceManager](./device_manager.md) - Connection management using AIP
+- [Components](./components.md) - Component-level AIP usage
- [AIP Protocol Specification](../../aip/overview.md) - Complete protocol reference
+- [AIP Message Reference](../../aip/messages.md) - Detailed message structures
---
@@ -205,17 +206,15 @@ sequenceDiagram
**Step 3 - REGISTER_CONFIRMATION:**
```json
{
- "type": "register_confirmation",
+ "type": "heartbeat",
"status": "ok",
- "payload": {
- "device_id": "windows_pc",
- "session_id": "sess_abc123",
- "message": "Registration successful"
- },
- "timestamp": "2025-11-06T10:30:01Z"
+ "timestamp": "2025-11-06T10:30:01Z",
+ "response_id": "reg_conf_abc123"
}
```
+Note: The server confirms registration by sending a HEARTBEAT message with OK status, which serves as the registration confirmation in the AIP protocol.
+
**Step 4 - DEVICE_INFO_REQUEST:**
```json
{
@@ -234,8 +233,8 @@ sequenceDiagram
{
"type": "device_info_response",
"status": "ok",
- "payload": {
- "request_id": "req_xyz789",
+ "result": {
+ "device_id": "windows_pc",
"device_info": {
"os": "Windows 11",
"cpu_count": 8,
@@ -245,7 +244,8 @@ sequenceDiagram
"installed_apps": ["Microsoft Office", "Chrome", "VSCode"]
}
},
- "timestamp": "2025-11-06T10:30:03Z"
+ "timestamp": "2025-11-06T10:30:03Z",
+ "response_id": "info_resp_xyz789"
}
```
@@ -280,11 +280,9 @@ sequenceDiagram
{
"type": "heartbeat",
"client_type": "constellation",
- "payload": {
- "device_id": "windows_pc",
- "timestamp": "2025-11-06T10:35:00Z"
- },
- "status": "ok"
+ "client_id": "constellation_client_id",
+ "status": "ok",
+ "timestamp": "2025-11-06T10:35:00Z"
}
```
@@ -293,10 +291,8 @@ sequenceDiagram
{
"type": "heartbeat",
"status": "ok",
- "payload": {
- "device_id": "windows_pc",
- "server_time": "2025-11-06T10:35:00Z"
- }
+ "timestamp": "2025-11-06T10:35:00Z",
+ "response_id": "hb_resp_123"
}
```
@@ -342,21 +338,12 @@ sequenceDiagram
```json
{
"type": "task",
- "status": "ok",
- "payload": {
- "task_id": "task_abc123",
- "description": "Open Excel and create a chart",
- "data": {
- "file_path": "sales_report.xlsx",
- "chart_type": "bar",
- "data_range": "A1:C10"
- },
- "metadata": {
- "priority": "high",
- "timeout": 300
- }
- },
- "timestamp": "2025-11-06T10:40:00Z"
+ "status": "continue",
+ "user_request": "Open Excel and create a chart",
+ "task_name": "galaxy/production/excel_task",
+ "session_id": "sess_task_abc123",
+ "timestamp": "2025-11-06T10:40:00Z",
+ "response_id": "task_req_001"
}
```
@@ -364,25 +351,23 @@ sequenceDiagram
```json
{
"type": "command",
- "status": "ok",
- "payload": {
- "command_id": "cmd_001",
- "task_id": "task_abc123",
- "commands": [
- {
- "action": "launch_app",
- "parameters": {
- "app_name": "Excel"
- }
- },
- {
- "action": "open_file",
- "parameters": {
- "file_path": "sales_report.xlsx"
- }
+ "status": "continue",
+ "actions": [
+ {
+ "action": "launch_app",
+ "parameters": {
+ "app_name": "Excel"
}
- ]
- }
+ },
+ {
+ "action": "open_file",
+ "parameters": {
+ "file_path": "sales_report.xlsx"
+ }
+ }
+ ],
+ "session_id": "sess_task_abc123",
+ "response_id": "cmd_001"
}
```
@@ -391,23 +376,22 @@ sequenceDiagram
{
"type": "command_results",
"client_type": "device",
- "status": "ok",
- "payload": {
- "command_id": "cmd_001",
- "task_id": "task_abc123",
- "results": [
- {
- "action": "launch_app",
- "status": "completed",
- "output": "Excel launched successfully"
- },
- {
- "action": "open_file",
- "status": "completed",
- "output": "File opened: sales_report.xlsx"
- }
- ]
- }
+ "client_id": "device_agent_id",
+ "status": "continue",
+ "action_results": [
+ {
+ "action": "launch_app",
+ "status": "completed",
+ "result": "Excel launched successfully"
+ },
+ {
+ "action": "open_file",
+ "status": "completed",
+ "result": "File opened: sales_report.xlsx"
+ }
+ ],
+ "session_id": "sess_task_abc123",
+ "prev_response_id": "cmd_001"
}
```
@@ -416,22 +400,19 @@ sequenceDiagram
{
"type": "task_end",
"client_type": "device",
+ "client_id": "device_agent_id",
"status": "completed",
- "payload": {
- "task_id": "task_abc123",
- "result": {
- "success": true,
- "output": "Created bar chart showing quarterly sales",
- "artifacts": [
- {
- "type": "file",
- "path": "sales_report_with_chart.xlsx"
- }
- ]
- },
- "execution_time": 12.5,
- "steps_completed": 3
+ "result": {
+ "success": true,
+ "output": "Created bar chart showing quarterly sales",
+ "artifacts": [
+ {
+ "type": "file",
+ "path": "sales_report_with_chart.xlsx"
+ }
+ ]
},
+ "session_id": "sess_task_abc123",
"timestamp": "2025-11-06T10:40:15Z"
}
```
@@ -468,16 +449,14 @@ AIP uses ERROR messages for protocol-level errors:
{
"type": "error",
"status": "error",
- "payload": {
+ "error": "Task execution exceeded 300 second timeout",
+ "session_id": "sess_task_abc123",
+ "timestamp": "2025-11-06T10:45:00Z",
+ "response_id": "err_001",
+ "metadata": {
"error_code": "TASK_TIMEOUT",
- "error_message": "Task execution exceeded 300 second timeout",
- "task_id": "task_abc123",
- "device_id": "windows_pc",
- "timestamp": "2025-11-06T10:45:00Z",
- "context": {
- "elapsed_time": 315.2,
- "last_command": "create_chart"
- }
+ "elapsed_time": 315.2,
+ "last_command": "create_chart"
}
}
```
@@ -1041,8 +1020,8 @@ This allows the same server to support both direct device connections and conste
AIP integration in Galaxy Client follows a layered architecture:
-1. **Transport**: WebSocketConnectionManager handles raw WebSocket I/O
-2. **Protocol**: AIPProtocol handles message serialization and middleware
+1. **Transport**: WebSocketConnectionManager handles raw WebSocket I/O via AIP WebSocketTransport
+2. **Protocol**: AIP protocol classes (RegistrationProtocol, TaskExecutionProtocol, HeartbeatProtocol, DeviceInfoProtocol) handle message serialization and protocol logic
3. **Message Processing**: MessageProcessor routes messages to handlers
4. **Application**: DeviceManager and ConstellationClient use messages for coordination
5. **Server Routing**: UFOWebSocketHandler routes messages between constellation clients and devices
@@ -1050,7 +1029,7 @@ AIP integration in Galaxy Client follows a layered architecture:
**Key Message Flows:**
-- **Registration**: REGISTER → REGISTER_CONFIRMATION → DEVICE_INFO_REQUEST → DEVICE_INFO_RESPONSE
+- **Registration**: REGISTER → HEARTBEAT (OK) → DEVICE_INFO_REQUEST → DEVICE_INFO_RESPONSE
- **Heartbeat**: HEARTBEAT (request) → HEARTBEAT (response), every 30 seconds
- **Task Execution (Constellation)**: ConstellationClient TASK → Server routes → Device executes → Server routes → ConstellationClient TASK_END
- **Task Execution (Direct)**: Device TASK → Server orchestrates → Device TASK_END
@@ -1083,9 +1062,11 @@ WebSocket → Device Agent (task execution)
AIP provides a robust, extensible protocol for agent communication with strong typing, clear message flows, comprehensive error handling, and intelligent routing between constellation clients and devices.
-**Next Steps:**
+## Next Steps
- See [DeviceManager](./device_manager.md) for connection management details
-- See [Components](./components.md) for MessageProcessor implementation
+- See [Components](./components.md) for MessageProcessor and WebSocketConnectionManager implementation
- See [ConstellationClient](./constellation_client.md) for device coordination API
- See [AIP Protocol Specification](../../aip/overview.md) for complete protocol reference
+- See [AIP Message Reference](../../aip/messages.md) for detailed message structures and examples
+- See [Server Documentation](../../server/websocket_handler.md) for server-side routing details
diff --git a/documents/docs/galaxy/client/components.md b/documents/docs/galaxy/client/components.md
index 4324feb24..87da9ec5d 100644
--- a/documents/docs/galaxy/client/components.md
+++ b/documents/docs/galaxy/client/components.md
@@ -2,11 +2,12 @@
Galaxy Client is built from focused, single-responsibility components that work together to provide device management capabilities. This document explains how these components interact and what each one does.
-**Related Documentation:**
+## Related Documentation
-- [Overview](./overview.md) - Overall architecture and workflow
-- [DeviceManager](./device_manager.md) - How components are orchestrated
-- [ConstellationClient](./constellation_client.md) - Component usage in coordination layer
+- [Overview](./overview.md) - Overall Galaxy Client architecture
+- [DeviceManager](./device_manager.md) - How DeviceManager orchestrates these components
+- [ConstellationClient](./constellation_client.md) - How components are used in the coordination layer
+- [AIP Integration](./aip_integration.md) - Message protocol used by components
---
@@ -124,13 +125,13 @@ registry.reset_connection_attempts(device_id)
When `connect_to_device()` is called, WebSocketConnectionManager performs these steps:
-1. **Establish WebSocket**: Uses `websockets.connect()` to create a connection to the device's server_url. This is an async operation that may timeout or fail due to network issues.
+1. **Establish WebSocket**: Creates an AIP `WebSocketTransport` and connects to the device's server_url. This is an async operation that may timeout or fail due to network issues.
-2. **Send REGISTER**: Immediately sends an AIP REGISTER message to identify this client to the server. The server responds with a confirmation once registration succeeds.
+2. **Start Message Handler BEFORE Registration**: Crucially, this happens *before* sending REGISTER to prevent race conditions. The message handler is started via MessageProcessor to ensure we don't miss the server's response.
-3. **Store Connection**: Saves the WebSocket object in an internal dictionary `{device_id: websocket}`. Other components retrieve this WebSocket when they need to send messages.
+3. **Send REGISTER**: Uses `RegistrationProtocol` to send an AIP REGISTER message identifying this client to the server. The server responds with a HEARTBEAT message with OK status to confirm registration.
-4. **Start Message Handler**: Crucially, this happens *before* waiting for REGISTER confirmation to prevent race conditions. If the server sends a response before we start listening, we'd miss it.
+4. **Store Transport**: Saves the WebSocketTransport object and initializes AIP protocol handlers (`RegistrationProtocol`, `TaskExecutionProtocol`, `DeviceInfoProtocol`) for this connection.
**Task Execution**:
@@ -138,26 +139,30 @@ When sending a task to a device, WebSocketConnectionManager:
```python
async def send_task_to_device(device_id, task_request):
- # 1. Get WebSocket connection
- websocket = self.connections[device_id]
+ # 1. Get Transport and TaskExecutionProtocol
+ transport = self._transports[device_id]
+ task_protocol = self._task_protocols[device_id]
- # 2. Create AIP TASK message
- task_msg = ClientMessage(
+ # 2. Create AIP ClientMessage for task execution
+ task_message = ClientMessage(
type=ClientMessageType.TASK,
- client_id=self.client_id,
+ client_type=ClientType.CONSTELLATION,
+ client_id=task_client_id,
target_id=device_id,
- session_id=task_request.task_id,
+ task_name=f"galaxy/{task_name}/{task_request.task_name}",
request=task_request.request,
+ session_id=constellation_task_id,
+ status=TaskStatus.CONTINUE,
...
)
- # 3. Send message
- await websocket.send(task_msg.model_dump_json())
+ # 3. Send message via AIP transport
+ await transport.send(task_message.model_dump_json().encode("utf-8"))
- # 4. Wait for response (handled by MessageProcessor)
- result = await self._wait_for_task_completion(device_id, task_request.task_id)
+ # 4. Wait for response (handled via future)
+ result = await self._wait_for_task_response(device_id, constellation_task_id)
- return result
+ return ExecutionResult(...)
```
The `_wait_for_task_completion()` method creates an asyncio.Future that MessageProcessor will complete when it receives the TASK_END message from the device.
@@ -166,46 +171,13 @@ The `_wait_for_task_completion()` method creates an asyncio.Future that MessageP
### HeartbeatManager: Connection Health Monitor
-**Purpose**: Continuously monitors device health by sending periodic heartbeat messages. This detects connection failures much faster than waiting for a task to timeout.
+**Purpose**: Continuously monitors device health by sending periodic heartbeat messages. This detects connection failures faster than waiting for a task to timeout.
**How It Works**:
-For each connected device, HeartbeatManager starts an independent background task:
+For each connected device, HeartbeatManager starts an independent background task that uses AIP `HeartbeatProtocol` to send HEARTBEAT messages periodically and verify the device is still responsive.
-```python
-async def _send_heartbeat_loop(device_id):
- while True:
- try:
- # Get WebSocket connection
- websocket = self.connection_manager.get_connection(device_id)
-
- # Create heartbeat message
- heartbeat_msg = ClientMessage(
- type=ClientMessageType.HEARTBEAT,
- client_id=device_id,
- timestamp=datetime.now().isoformat()
- )
-
- # Send and wait for response
- await websocket.send(heartbeat_msg.model_dump_json())
- response = await asyncio.wait_for(
- websocket.recv(),
- timeout=self.heartbeat_interval * 2
- )
-
- # Update last heartbeat timestamp
- self.device_registry.update_heartbeat(device_id)
-
- # Wait before next heartbeat
- await asyncio.sleep(self.heartbeat_interval)
-
- except asyncio.TimeoutError:
- # Device didn't respond - trigger disconnection
- await self._handle_heartbeat_timeout(device_id)
- break
-```
-
-**Timeout Detection**: The timeout is set to `2 × heartbeat_interval`. For a 30-second interval, if no response arrives within 60 seconds, the device is considered disconnected. This gives enough time for network latency while still detecting failures relatively quickly.
+**Timeout Detection**: Uses a timeout mechanism to detect when devices stop responding. If no heartbeat response arrives within the expected timeframe, the device is considered disconnected and HeartbeatManager triggers the disconnection handler.
**Why Not Just Use TCP Keepalive?**: WebSocket runs over TCP, which has its own keepalive mechanism. However, TCP keepalive operates at a much longer timescale (typically 2 hours by default) and only detects network-level failures, not application-level hangs. HeartbeatManager detects if the device agent is responsive, not just if the TCP connection is alive.
@@ -215,54 +187,9 @@ async def _send_heartbeat_loop(device_id):
**The Message Loop**:
-```python
-async def _process_messages(device_id, websocket):
- while True:
- try:
- # Receive raw message from WebSocket
- msg = await websocket.recv()
-
- # Parse as AIP message
- data = ClientMessage.model_validate_json(msg)
-
- # Route to specific handler based on message type
- if data.type == ClientMessageType.TASK_END:
- await self._handle_task_end(device_id, data)
- elif data.type == ClientMessageType.COMMAND_RESULTS:
- await self._handle_command_results(device_id, data)
- elif data.type == ClientMessageType.HEARTBEAT_ACK:
- # Heartbeat handled by HeartbeatManager
- pass
- elif data.type == ClientMessageType.ERROR:
- await self._handle_error(device_id, data)
-
- except websockets.ConnectionClosed:
- # Connection dropped - trigger disconnection handling
- await self.disconnection_handler(device_id)
- break
-```
+MessageProcessor runs a background task that receives messages from the AIP transport and routes them based on message type. It handles `TASK_END` messages by completing the corresponding future that WebSocketConnectionManager is waiting on, enabling async task execution patterns.
-**Task Completion Handling**: When a TASK_END message arrives, MessageProcessor completes the corresponding future that WebSocketConnectionManager is waiting on:
-
-```python
-async def _handle_task_end(device_id, message):
- task_id = message.session_id
-
- # Create ExecutionResult from message
- result = ExecutionResult(
- task_id=task_id,
- status=message.status,
- result=message.result,
- error=message.error,
- metadata=message.metadata
- )
-
- # Complete the waiting future
- if task_id in self.pending_tasks[device_id]:
- future = self.pending_tasks[device_id][task_id]
- future.set_result(result)
- del self.pending_tasks[device_id][task_id]
-```
+**Task Completion Handling**: When a TASK_END message arrives, MessageProcessor uses the `complete_task_response()` method in WebSocketConnectionManager to resolve the pending future for that task.
**Why Run in Background**: The message loop runs continuously as an asyncio task. This allows it to receive messages asynchronously while the main execution flow (e.g., sending tasks) continues unblocked. Without this, we'd need to alternate between sending and receiving, making the code much more complex.
@@ -438,36 +365,37 @@ self.device_registry.update_device_status(device_id, DeviceStatus.CONNECTING) #
```python
# WebSocketConnectionManager.connect_to_device()
-websocket = await websockets.connect(device_info.server_url) # Create WebSocket
-self.connections[device_id] = websocket # Store connection
-
-# Start message handler BEFORE sending REGISTER to avoid race condition
-self.message_processor.start_message_handler(device_id, websocket)
-
-# Send REGISTER message
-await websocket.send(register_msg.model_dump_json())
+transport = WebSocketTransport(...)
+await transport.connect(device_info.server_url) # Create AIP transport
+self._transports[device_id] = transport # Store transport
+
+# Initialize AIP protocols for this connection
+self._registration_protocols[device_id] = RegistrationProtocol(transport)
+self._task_protocols[device_id] = TaskExecutionProtocol(transport)
+self._device_info_protocols[device_id] = DeviceInfoProtocol(transport)
+
+# ⚠️ CRITICAL: Start message handler BEFORE sending registration
+# This ensures we don't miss the server's registration response
+self.message_processor.start_message_handler(device_id, transport)
+await asyncio.sleep(0.05) # Small delay to ensure handler is listening
+
+# Register as constellation client using AIP RegistrationProtocol
+await self._register_constellation_client(device_info)
```
**Step 3: MessageProcessor Starts Background Loop**
```python
# MessageProcessor.start_message_handler()
-task = asyncio.create_task(self._process_messages(device_id, websocket))
-self.message_handlers[device_id] = task # Store task for later cancellation
+task = asyncio.create_task(self._handle_device_messages(device_id, transport))
+self._message_handlers[device_id] = task # Store task for later cancellation
```
-Now MessageProcessor is running in the background, ready to receive messages.
+Now MessageProcessor is running in the background, ready to receive messages via the AIP transport.
**Step 4: Device Registration Completes**
-The device sends back REGISTER_CONFIRMATION, which MessageProcessor receives and handles. Then WebSocketConnectionManager requests device info:
-
-```python
-# WebSocketConnectionManager.request_device_info()
-await websocket.send(device_info_request.model_dump_json())
-# Wait for response (received by MessageProcessor)
-device_info = await self._wait_for_device_info_response(device_id)
-```
+The device sends back HEARTBEAT with OK status (which serves as registration confirmation). Then WebSocketConnectionManager requests device info via `DeviceInfoProtocol`.
**Step 5: DeviceRegistry Updated with System Info**
@@ -492,8 +420,8 @@ Now HeartbeatManager is running in the background, sending heartbeats every 30 s
All components are now working together:
- DeviceRegistry knows the device is IDLE and ready
-- WebSocketConnectionManager has an active WebSocket
-- MessageProcessor is listening for incoming messages
+- WebSocketConnectionManager has an active AIP Transport with initialized protocols
+- MessageProcessor is listening for incoming messages via the transport
- HeartbeatManager is monitoring connection health
- TaskQueueManager is ready to queue tasks if device becomes busy
@@ -604,8 +532,10 @@ Galaxy Client's component architecture demonstrates several important design pri
This design makes Galaxy Client maintainable, extensible, and testable. When you understand how components collaborate, you can confidently modify or extend the system.
-**Related Documentation**:
+## Related Documentation
-- [DeviceManager Reference](./device_manager.md) - See how DeviceManager orchestrates components
+- [DeviceManager Reference](./device_manager.md) - See how DeviceManager orchestrates these components
- [ConstellationClient](./constellation_client.md) - Learn how components are used in the coordination layer
-- [Overview](./overview.md) - Understand the broader system architecture
+- [Overview](./overview.md) - Understand the broader Galaxy Client architecture
+- [AIP Integration](./aip_integration.md) - Learn about the message protocol components use
+- [DeviceRegistry Details](../agent_registration/device_registry.md) - Deep dive into device state management
diff --git a/documents/docs/galaxy/client/constellation_client.md b/documents/docs/galaxy/client/constellation_client.md
index 42ceb1cd3..8b6cba7ca 100644
--- a/documents/docs/galaxy/client/constellation_client.md
+++ b/documents/docs/galaxy/client/constellation_client.md
@@ -2,7 +2,7 @@
ConstellationClient is the device coordination layer in Galaxy Client. It provides a clean API for registering devices, managing connections, and assigning tasks. Most applications interact with ConstellationClient rather than the lower-level DeviceManager.
-**Related Documentation:**
+## Related Documentation
- [Overview](./overview.md) - Overall architecture and workflow
- [DeviceManager](./device_manager.md) - Internal connection management
@@ -10,8 +10,6 @@ ConstellationClient is the device coordination layer in Galaxy Client. It provid
- [Configuration](../../configuration/system/galaxy_constellation.md) - Device configuration
- [GalaxyClient](./galaxy_client.md) - Session wrapper on top of ConstellationClient
----
-
## What ConstellationClient Does
ConstellationClient implements the Facade pattern, providing a simplified interface to the complex device management system underneath. Think of it as the "device management API" for Galaxy.
@@ -35,8 +33,6 @@ ConstellationClient implements the Facade pattern, providing a simplified interf
This separation of concerns keeps ConstellationClient focused on device-level operations.
----
-
## Initialization
### Constructor
@@ -159,8 +155,6 @@ sequenceDiagram
This diagram shows the initialization sequence. For each configured device, ConstellationClient delegates to DeviceManager, which handles the low-level connection setup if auto-connect is enabled.
----
-
## Device Management Methods
### Register Device
@@ -178,6 +172,9 @@ async def register_device(
This method registers a device programmatically (outside of configuration). It's useful for dynamically adding devices at runtime.
+!!! warning "Known Limitation"
+ The current implementation does not pass the OS parameter to the underlying `DeviceManager`. For proper device registration with OS information, use configuration-based registration via `register_device_from_config()` or ensure the OS is included in the device metadata.
+
**Parameters Explained:**
- **device_id**: Unique identifier for the device. Used in all subsequent operations.
@@ -238,8 +235,6 @@ await client.disconnect_device("windows_pc")
Connection establishment involves WebSocket handshake, AIP registration, device info exchange, and starting background monitoring services (heartbeat and message processing).
----
-
## Task Execution
### Assign Task to Device
@@ -265,8 +260,6 @@ ConstellationClient focuses on device management, not task orchestration. Task a
This layering ensures each component has a clear responsibility.
----
-
## Status and Information
### Get Device Status
@@ -348,8 +341,6 @@ Returns constellation-level information:
This provides a high-level view of the entire constellation, useful for monitoring overall system health.
----
-
## Configuration Management
### Validate Configuration
@@ -447,8 +438,6 @@ await client.add_device_to_config(
This is useful for dynamic device discovery scenarios where devices are added at runtime.
----
-
## Lifecycle Management
### Shutdown
@@ -487,8 +476,6 @@ finally:
Without proper shutdown, background tasks continue running, WebSocket connections remain open, and resources leak.
----
-
## Usage Patterns
### Basic Device Management
@@ -561,8 +548,6 @@ async def monitor_health(client):
await asyncio.sleep(30) # Check every 30 seconds
```
----
-
## Integration with Other Components
### Used by GalaxyClient
@@ -604,8 +589,6 @@ for task in dag.tasks:
)
```
----
-
## Summary
ConstellationClient is the primary interface for device management in Galaxy Client. It provides:
diff --git a/documents/docs/galaxy/client/device_manager.md b/documents/docs/galaxy/client/device_manager.md
index 864f38448..652fad149 100644
--- a/documents/docs/galaxy/client/device_manager.md
+++ b/documents/docs/galaxy/client/device_manager.md
@@ -2,12 +2,12 @@
DeviceManager is the connection orchestration layer in Galaxy Client. While ConstellationClient provides the high-level device management API, DeviceManager handles the low-level details of WebSocket connections, health monitoring, message routing, and task queuing.
-**Related Documentation:**
+## Related Documentation
-- [Overview](./overview.md) - Overall architecture and workflow
+- [Overview](./overview.md) - Overall Galaxy Client architecture and workflow
- [ConstellationClient](./constellation_client.md) - High-level device management API
-- [Components](./components.md) - Detailed component documentation
-- [AIP Integration](./aip_integration.md) - Protocol details
+- [Components](./components.md) - Detailed documentation for each DeviceManager component
+- [AIP Integration](./aip_integration.md) - Protocol details and message flows
---
@@ -98,19 +98,17 @@ The coordinator pattern ensures components don't directly depend on each other,
```python
def __init__(
self,
- task_name: str,
+ task_name: str = "test_task",
heartbeat_interval: float = 30.0,
reconnect_delay: float = 5.0,
- max_retries: int = 5,
):
"""
Initialize DeviceManager.
Args:
- task_name: Identifier for this constellation instance
+ task_name: Identifier for this constellation instance (default "test_task")
heartbeat_interval: Seconds between heartbeat checks (default 30s)
reconnect_delay: Initial delay before reconnection attempt (default 5s)
- max_retries: Maximum reconnection attempts before giving up (default 5)
"""
```
@@ -131,7 +129,7 @@ When you create a DeviceManager, it initializes the five components:
**reconnect_delay**: Initial delay before first reconnection attempt. DeviceManager uses exponential backoff, so subsequent delays double: 5s, 10s, 20s, 40s, 80s. Lower values reconnect faster but may overwhelm unstable networks. Higher values give networks more recovery time.
-**max_retries**: Number of reconnection attempts before giving up. With `reconnect_delay=5` and `max_retries=5`, DeviceManager tries for roughly 155 seconds (5+10+20+40+80). Adjust based on expected network reliability.
+**max_retries**: The maximum number of reconnection attempts is configured per-device during registration via the `max_retries` parameter (default 5) in `AgentProfile`. This allows different devices to have different retry limits based on their reliability characteristics.
---
@@ -144,9 +142,11 @@ async def register_device(
self,
device_id: str,
server_url: str,
- os: Optional[str] = None,
+ os: str,
capabilities: Optional[List[str]] = None,
metadata: Optional[Dict[str, Any]] = None,
+ max_retries: int = 5,
+ auto_connect: bool = True,
) -> bool:
"""
Register a device for management.
@@ -203,7 +203,7 @@ else:
### Connect Device
```python
-async def connect_device(self, device_id: str) -> bool:
+async def connect_device(self, device_id: str, is_reconnection: bool = False) -> bool:
"""
Establish connection to a registered device.
@@ -220,95 +220,54 @@ DeviceManager queries DeviceRegistry to verify the device is registered. If not
**Step 2: WebSocket Connection**
-DeviceManager delegates to WebSocketConnectionManager:
+DeviceManager delegates to WebSocketConnectionManager, passing the MessageProcessor to start message handling before registration (to avoid race conditions):
```python
-ws = await websocket_connection_manager.connect(
- device_id=device_id,
- server_url=profile.server_url
+# Connect and automatically start message handler
+await connection_manager.connect_to_device(
+ device_info,
+ message_processor=self.message_processor
)
```
-WebSocketConnectionManager creates a `websockets.connect()` connection, handles SSL/TLS if needed, and stores the WebSocket object for later use.
+WebSocketConnectionManager creates an AIP `WebSocketTransport`, establishes the connection, starts the message handler (via MessageProcessor), and performs AIP registration using `RegistrationProtocol`.
-**Step 3: AIP Registration**
+**Step 3: Update Status and Start Heartbeat**
-After WebSocket connects, DeviceManager uses MessageProcessor to perform AIP protocol handshake:
+After WebSocket connects successfully:
```python
-# Send REGISTER message
-await message_processor.send_message(
- device_id=device_id,
- message_type="REGISTER",
- payload={
- "device_id": device_id,
- "capabilities": profile.capabilities,
- "metadata": profile.metadata
- }
-)
+# Update status to CONNECTED
+device_registry.update_device_status(device_id, DeviceStatus.CONNECTED)
+device_registry.update_heartbeat(device_id)
-# Wait for REGISTER_CONFIRMATION
-confirmation = await message_processor.wait_for_response(
- device_id=device_id,
- message_type="REGISTER_CONFIRMATION",
- timeout=10.0
-)
+# Start heartbeat monitoring
+heartbeat_manager.start_heartbeat(device_id)
```
-This confirms the Agent Server recognizes the device and is ready to accept tasks.
+Note: The message handler was already started in `connect_to_device()` to prevent race conditions.
**Step 4: Device Info Exchange**
-DeviceManager requests device telemetry:
-
-```python
-# Send DEVICE_INFO_REQUEST
-await message_processor.send_message(
- device_id=device_id,
- message_type="DEVICE_INFO_REQUEST"
-)
-
-# Receive device telemetry
-device_info = await message_processor.wait_for_response(
- device_id=device_id,
- message_type="DEVICE_INFO",
- timeout=10.0
-)
-```
-
-Device info includes CPU count, memory, OS version, screen resolution, and other system details. DeviceManager stores this in the AgentProfile.
-
-**Step 5: Update Status**
-
-DeviceManager updates device status in DeviceRegistry:
+DeviceManager requests device system information from the server (the device pushes its info during registration, server stores it):
```python
-device_registry.update_status(device_id, DeviceStatus.IDLE)
+device_system_info = await connection_manager.request_device_info(device_id)
+if device_system_info:
+ device_registry.update_device_system_info(device_id, device_system_info)
```
-Device is now ready to accept tasks.
-
-**Step 6: Start Background Services**
+Device info includes CPU count, memory, OS version, screen resolution, and other system details stored in the AgentProfile.
-DeviceManager starts two background monitoring services:
-
-**HeartbeatManager**:
-```python
-asyncio.create_task(
- heartbeat_manager.start_heartbeat(device_id)
-)
-```
+**Step 5: Set Device to IDLE**
-This task runs in the background, sending HEARTBEAT messages every `heartbeat_interval` seconds and checking for responses. If heartbeat fails, HeartbeatManager calls DeviceManager's disconnection handler.
+DeviceManager updates device status to ready for tasks:
-**MessageProcessor**:
```python
-asyncio.create_task(
- message_processor.start_message_handler(device_id)
-)
+device_registry.set_device_idle(device_id)
```
-This task listens for incoming messages from the device (TASK_END, HEARTBEAT responses, error messages) and routes them to appropriate handlers.
+Device is now ready to accept tasks. Note that HeartbeatManager was already started in Step 3, and MessageProcessor's message handler was started automatically during the WebSocket connection in Step 2.
**Connection Sequence Diagram:**
@@ -324,28 +283,31 @@ sequenceDiagram
DM->>DR: Get device profile
DR-->>DM: AgentProfile
- DM->>WSM: connect(device_id, server_url)
- WSM->>Server: WebSocket handshake
+ DM->>WSM: connect_to_device(device_info, message_processor)
+ WSM->>Server: WebSocket handshake (via AIP Transport)
Server-->>WSM: Connection established
- WSM-->>DM: WebSocket object
- DM->>MP: send_message(REGISTER)
- MP->>Server: REGISTER
- Server-->>MP: REGISTER_CONFIRMATION
- MP-->>DM: Success
+ Note over WSM,MP: CRITICAL: Start message handler BEFORE registration
+ WSM->>MP: start_message_handler(device_id, transport)
+ MP-->>MP: Start background message listener
- DM->>MP: send_message(DEVICE_INFO_REQUEST)
- MP->>Server: DEVICE_INFO_REQUEST
- Server-->>MP: DEVICE_INFO (telemetry)
- MP-->>DM: Device telemetry
+ WSM->>Server: REGISTER (via RegistrationProtocol)
+ Server-->>WSM: HEARTBEAT (OK status = registration confirmed)
+ WSM-->>DM: Connection successful
- DM->>DR: update_status(IDLE)
+ DM->>DR: update_device_status(CONNECTED)
+ DM->>DR: update_heartbeat()
DM->>HM: start_heartbeat(device_id)
HM-->>HM: Start background heartbeat loop
- DM->>MP: start_message_handler(device_id)
- MP-->>MP: Start background message listener
+ DM->>WSM: request_device_info(device_id)
+ WSM->>Server: DEVICE_INFO_REQUEST
+ Server-->>WSM: DEVICE_INFO_RESPONSE
+ WSM-->>DM: Device system info
+
+ DM->>DR: update_device_system_info()
+ DM->>DR: set_device_idle()
DM-->>DM: Connection complete
```
@@ -370,7 +332,7 @@ When connection fails, DeviceManager:
### Disconnect Device
```python
-async def disconnect_device(self, device_id: str) -> bool:
+async def disconnect_device(self, device_id: str) -> None:
"""
Disconnect from a device and cleanup resources.
@@ -441,7 +403,8 @@ async def assign_task_to_device(
device_id: str,
task_description: str,
task_data: Dict[str, Any],
-) -> Dict[str, Any]:
+ timeout: float = 1000,
+) -> ExecutionResult:
"""
Assign a task to a device for execution.
@@ -715,8 +678,7 @@ Here's a complete example showing how all components work together during a typi
manager = DeviceManager(
task_name="production_constellation",
heartbeat_interval=30.0,
- reconnect_delay=5.0,
- max_retries=5
+ reconnect_delay=5.0
)
# This creates all five components:
@@ -731,30 +693,32 @@ await manager.register_device(
device_id="office_pc",
server_url="ws://192.168.1.100:5000/ws",
os="Windows",
- capabilities=["office", "web"]
+ capabilities=["office", "web"],
+ max_retries=5,
+ auto_connect=True # Will automatically connect after registration
)
# DeviceManager → DeviceRegistry (store AgentProfile)
+# If auto_connect=True → DeviceManager → connect_device()
-# 3. Connect device
-await manager.connect_device("office_pc")
-# DeviceManager → WebSocketConnectionManager (connect)
-# → MessageProcessor (send REGISTER, DEVICE_INFO_REQUEST)
-# → DeviceRegistry (update status to IDLE)
+# 3. Connect device (if auto_connect was False)
+# await manager.connect_device("office_pc")
+# DeviceManager → WebSocketConnectionManager (connect, start message handler)
+# → DeviceRegistry (update status to CONNECTED, then IDLE)
# → HeartbeatManager (start heartbeat loop)
-# → MessageProcessor (start message handler loop)
# 4. Assign first task (device is IDLE)
result1 = await manager.assign_task_to_device(
task_id="task_1",
device_id="office_pc",
task_description="Open Excel",
- task_data={"file": "report.xlsx"}
+ task_data={"file": "report.xlsx"},
+ timeout=300
)
# DeviceManager → DeviceRegistry (check status: IDLE)
-# → DeviceRegistry (update status to BUSY)
-# → MessageProcessor (send TASK)
+# → DeviceRegistry (update status to BUSY via set_device_busy)
+# → WebSocketConnectionManager (send TASK via TaskExecutionProtocol)
# [wait for TASK_END]
-# → DeviceRegistry (update status to IDLE)
+# → DeviceRegistry (update status to IDLE via set_device_idle)
# 5. Assign second task while first is running (device is BUSY)
# Note: This happens concurrently with task_1
@@ -763,7 +727,8 @@ asyncio.create_task(
task_id="task_2",
device_id="office_pc",
task_description="Send email",
- task_data={"to": "john@example.com"}
+ task_data={"to": "john@example.com"},
+ timeout=300
)
)
# DeviceManager → DeviceRegistry (check status: BUSY)
@@ -787,8 +752,9 @@ asyncio.create_task(
# → WebSocketConnectionManager (disconnect)
# → TaskQueueManager (tasks remain queued)
# → DeviceRegistry (update status to FAILED)
-# → [wait 5s]
-# → connect_device (reconnection attempt)
+# → [schedule reconnection attempt]
+# → [wait reconnect_delay seconds]
+# → connect_device (reconnection attempt with is_reconnection=True)
# 7. Reconnection succeeds
# After reconnection:
@@ -979,9 +945,9 @@ DeviceManager is the orchestration layer that coordinates five specialized compo
**Key Concepts:**
- **Orchestrator Pattern**: DeviceManager coordinates components but doesn't duplicate their functionality
-- **Modular Architecture**: Five components with single responsibilities
+- **Modular Architecture**: Five components with single responsibilities (DeviceRegistry, WebSocketConnectionManager, HeartbeatManager, MessageProcessor, TaskQueueManager)
- **Lifecycle Management**: Register → Connect → Execute → Disconnect → Reconnect
-- **Automatic Reconnection**: Exponential backoff with configurable retries
+- **Automatic Reconnection**: Exponential backoff with configurable retries per device
- **Task Queuing**: Automatic queuing when devices are busy
**When to Use DeviceManager Directly:**
@@ -997,5 +963,6 @@ Most applications should use ConstellationClient, which wraps DeviceManager. Use
- See [Components](./components.md) for detailed component documentation
- See [ConstellationClient](./constellation_client.md) for high-level API
-- See [AIP Integration](./aip_integration.md) for protocol details
-- See [Overview](./overview.md) for overall architecture
+- See [AIP Integration](./aip_integration.md) for protocol details and message flows
+- See [Overview](./overview.md) for overall Galaxy Client architecture
+- See [Agent Registration](../agent_registration/overview.md) for device registration details
diff --git a/documents/docs/galaxy/client/galaxy_client.md b/documents/docs/galaxy/client/galaxy_client.md
index 2327ca4de..58672ee6a 100644
--- a/documents/docs/galaxy/client/galaxy_client.md
+++ b/documents/docs/galaxy/client/galaxy_client.md
@@ -2,13 +2,11 @@
GalaxyClient is an optional session management wrapper on top of ConstellationClient. It provides a convenient high-level API for initializing the system, processing user requests through GalaxySession, and running interactive sessions. Most applications use GalaxyClient as the main entry point.
-**Related Documentation:**
+## Related Documentation
- [Overview](./overview.md) - Overall architecture and workflow
- [ConstellationClient](./constellation_client.md) - Device coordination layer
----
-
## What GalaxyClient Does
GalaxyClient is the "easy mode" API for Galaxy. While you can use ConstellationClient directly for device management, GalaxyClient adds session management, request processing, and interactive mode on top.
@@ -43,8 +41,6 @@ GalaxyClient handles the entire request lifecycle: parsing the request, creating
GalaxyClient is the orchestrator at the highest level, delegating to specialized components for each concern.
----
-
## When to Use GalaxyClient
**Use GalaxyClient when:**
@@ -67,8 +63,6 @@ GalaxyClient is the orchestrator at the highest level, delegating to specialized
**ConstellationClient**: Monitoring system that assigns health check tasks to devices every 5 minutes
----
-
## Initialization
### Constructor
@@ -206,8 +200,6 @@ if len(connected) == 0:
raise RuntimeError("No devices connected")
```
----
-
## Request Processing
### Process Request
@@ -236,9 +228,11 @@ This is the primary method you'll use. It handles the entire request lifecycle:
```python
session = GalaxySession(
+ task=task_name,
+ should_evaluate=False,
+ id=session_id,
client=self._constellation_client,
- request=request,
- context=context or {}
+ initial_request=request
)
```
@@ -333,8 +327,6 @@ result = await client.process_request(
Context is useful for multi-round conversations where later requests reference earlier results.
----
-
## Interactive Mode
### Interactive Mode
@@ -414,8 +406,6 @@ Goodbye!
**Device Status**: Shows which devices are connected at startup.
----
-
## Lifecycle Management
### Shutdown
@@ -464,8 +454,6 @@ async with GalaxyClient(config_path="config.yaml") as client:
# Automatically calls shutdown() on exit
```
----
-
## Configuration Management
### Get Device Status
@@ -536,8 +524,6 @@ if success:
This delegates to ConstellationClient, which registers and connects the device.
----
-
## Usage Patterns
### Basic Request Processing
@@ -656,8 +642,6 @@ async def adaptive_constellation():
await client.shutdown()
```
----
-
## Integration with Other Components
### GalaxyClient vs ConstellationClient
@@ -732,8 +716,6 @@ async def list_devices():
}
```
----
-
## Summary
GalaxyClient is the high-level entry point for Galaxy Client, providing:
diff --git a/documents/docs/galaxy/client/overview.md b/documents/docs/galaxy/client/overview.md
index 8026619c7..b763ab835 100644
--- a/documents/docs/galaxy/client/overview.md
+++ b/documents/docs/galaxy/client/overview.md
@@ -2,7 +2,7 @@
Galaxy Client is the client-side layer responsible for multi-device coordination in the UFO³ framework. At its core is **ConstellationClient**, which manages device registration, connection, and task assignment. **GalaxyClient** provides a lightweight wrapper offering convenient session management interfaces.
-**Related Documentation:**
+## Related Documentation
- [ConstellationClient](./constellation_client.md) - Core device coordination component
- [DeviceManager](./device_manager.md) - Low-level connection management
@@ -11,8 +11,6 @@ Galaxy Client is the client-side layer responsible for multi-device coordination
- [GalaxyClient](./galaxy_client.md) - Session wrapper API
- [Configuration](../../configuration/system/galaxy_constellation.md) - Device configuration guide
----
-
## The Complete Path: From User Request to Device Execution
To understand Galaxy Client, we first need to see the entire system workflow. When a user submits a task request, the system processes it through several layers:
@@ -73,8 +71,6 @@ All communication with devices goes through the [Agent Interaction Protocol (AIP
For detailed AIP explanation, see [AIP Integration](./aip_integration.md).
----
-
## Component Responsibilities
Having understood the overall flow, let's examine the specific responsibilities of each component:
@@ -131,7 +127,7 @@ DeviceManager is the "engine" of ConstellationClient. It uses 5 modular componen
This modular design ensures each component has a single responsibility, making testing and maintenance easier. See [DeviceManager documentation](./device_manager.md) for details.
-### GalaxyClient: Session Management Convenience Wrapper
+### GalaxyClient: Session Management Wrapper
GalaxyClient provides a higher-level abstraction on top of ConstellationClient:
@@ -154,8 +150,6 @@ GalaxyClient's main value lies in:
If your application already has its own session management logic, you can skip GalaxyClient and use ConstellationClient directly. See [GalaxyClient documentation](./galaxy_client.md) for detailed API.
----
-
## Typical Workflow Example
Let's walk through a complete example, from user request to device execution:
@@ -289,8 +283,6 @@ After all tasks complete:
The complete execution trace is preserved in logs for debugging and analysis.
----
-
## Relationships with Other System Components
Galaxy Client is not an isolated system—it closely collaborates with other UFO³ components:
@@ -307,7 +299,7 @@ Galaxy Client doesn't connect directly to devices but routes through [Agent Serv
**Load Balancing**: If multiple clients connect to the same device, Agent Server can distribute load and prevent conflicts.
-### Uses ConstellationAgent for Task Planning
+### Used by ConstellationAgent for Task Planning
When GalaxyClient receives a user request, it calls [ConstellationAgent](../constellation_agent/overview.md) to decompose the request into a DAG (Directed Acyclic Graph). ConstellationAgent is LLM-powered and can:
@@ -319,6 +311,19 @@ When GalaxyClient receives a user request, it calls [ConstellationAgent](../cons
**Dynamically Adjust DAG**: If issues arise during execution (e.g., a device fails), ConstellationAgent can replan and modify the DAG to adapt to the new situation.
+For more details, see [ConstellationAgent Documentation](../constellation_agent/overview.md).
+
+### Coordinates with TaskConstellationOrchestrator for DAG Execution
+
+Once ConstellationAgent creates the DAG, [TaskConstellationOrchestrator](../constellation_orchestrator/overview.md) executes it across devices. The orchestrator:
+
+- **Respects Dependencies**: Ensures tasks execute in the correct order based on the DAG structure
+- **Selects Devices**: Chooses appropriate devices based on capability matching
+- **Parallel Execution**: Runs independent tasks concurrently across different devices
+- **Handles Failures**: Manages task failures and triggers replanning if needed
+
+For more details, see [TaskConstellationOrchestrator Documentation](../constellation_orchestrator/overview.md).
+
### Collaborates with Device Agents for Task Execution
The actual task execution happens on [Device Agents](../../client/overview.md) running on each device (such as UFO² Desktop Agent, Linux Agent, etc.). Device Agents are responsible for:
@@ -343,8 +348,6 @@ All cross-component communication follows the [AIP protocol](../../aip/overview.
**Error Handling Mechanism**: Defines standard ERROR message types for reporting and handling failures consistently across all components.
----
-
## Configuration and Deployment
### Device Configuration
@@ -407,8 +410,6 @@ Configuration fields explained:
- Use connection pooling if connecting to many devices
- Implement circuit breaker pattern for failing devices
----
-
## Detailed Component Documentation
- [ConstellationClient API Reference](./constellation_client.md) - Complete device coordination API
@@ -417,8 +418,6 @@ Configuration fields explained:
- [AIP Integration](./aip_integration.md) - How to use the communication protocol
- [GalaxyClient Session Wrapper](./galaxy_client.md) - Session management API
----
-
## Summary
Galaxy Client provides the core multi-device coordination capabilities in UFO³. Through layered design, it simplifies complex distributed system management into clear APIs:
diff --git a/documents/docs/galaxy/observer/agent_output_observer.md b/documents/docs/galaxy/observer/agent_output_observer.md
new file mode 100644
index 000000000..7f713365f
--- /dev/null
+++ b/documents/docs/galaxy/observer/agent_output_observer.md
@@ -0,0 +1,536 @@
+# Agent Output Observer
+
+The **AgentOutputObserver** handles real-time display of agent responses and actions. It listens for agent interaction events and delegates the actual presentation logic to specialized presenters, providing a clean separation between event handling and output formatting.
+
+**Location:** `galaxy/session/observers/agent_output_observer.py`
+
+## Purpose
+
+The Agent Output Observer enables:
+
+- **Real-time Feedback** — Display agent thinking and decision-making process
+- **Action Visibility** — Show what actions the agent is taking
+- **Debugging** — Understand agent behavior during constellation execution
+- **User Engagement** — Keep users informed of progress and decisions
+
+## Architecture
+
+The observer uses a **presenter pattern** for flexible output formatting:
+
+```mermaid
+graph TB
+ subgraph "Agent Layer"
+ A[ConstellationAgent]
+ end
+
+ subgraph "Event System"
+ EB[EventBus]
+ end
+
+ subgraph "Observer Layer"
+ AOO[AgentOutputObserver]
+ ER[Event Router]
+ end
+
+ subgraph "Presenter Layer"
+ P[Presenter Factory]
+ RP[RichPresenter]
+ TP[TextPresenter]
+ end
+
+ subgraph "Output"
+ O[Terminal/Console]
+ end
+
+ A -->|publish| EB
+ EB -->|notify| AOO
+ AOO --> ER
+ ER -->|agent_response| RP
+ ER -->|agent_action| RP
+
+ P --> RP
+ P --> TP
+
+ RP --> O
+ TP --> O
+
+ style AOO fill:#66bb6a,stroke:#333,stroke-width:3px
+ style P fill:#ffa726,stroke:#333,stroke-width:2px
+ style EB fill:#4a90e2,stroke:#333,stroke-width:2px,color:#fff
+```
+
+**Component Responsibilities:**
+
+| Component | Role | Description |
+|-----------|------|-------------|
+| **Agent** | Event publisher | Publishes AGENT_RESPONSE and AGENT_ACTION events |
+| **AgentOutputObserver** | Event handler | Receives and routes agent events |
+| **Presenter** | Output formatter | Formats and displays agent output |
+| **PresenterFactory** | Creator | Creates appropriate presenter based on type |
+
+## Handled Events
+
+The observer handles two types of agent events:
+
+### 1. AGENT_RESPONSE
+
+Triggered when agent generates responses (thoughts, plans, reasoning):
+
+**Event Data Structure:**
+
+```python
+{
+ "agent_name": "constellation_agent",
+ "agent_type": "constellation",
+ "output_type": "response",
+ "output_data": {
+ # ConstellationAgentResponse fields
+ "thought": "Task 1 completed successfully...",
+ "plan": "Next, I will process the results...",
+ "operation": "EDIT",
+ "observation": "Task result shows...",
+ # ... other fields
+ },
+ "print_action": False # Whether to print action details
+}
+```
+
+### 2. AGENT_ACTION
+
+Triggered when agent executes actions (constellation editing):
+
+**Event Data Structure:**
+
+```python
+{
+ "agent_name": "constellation_agent",
+ "agent_type": "constellation",
+ "output_type": "action",
+ "output_data": {
+ "action_type": "constellation_editing",
+ "actions": [
+ {
+ "name": "add_task",
+ "arguments": {
+ "task_id": "new_task_1",
+ "description": "Process attachment",
+ # ...
+ }
+ },
+ # ... more actions
+ ]
+ }
+}
+```
+
+## Implementation
+
+### Initialization
+
+```python
+from galaxy.session.observers import AgentOutputObserver
+
+# Create agent output observer with default Rich presenter
+agent_output_observer = AgentOutputObserver(presenter_type="rich")
+
+# Subscribe to event bus
+from galaxy.core.events import get_event_bus
+event_bus = get_event_bus()
+event_bus.subscribe(agent_output_observer)
+```
+
+**Constructor Parameters:**
+
+| Parameter | Type | Default | Description |
+|-----------|------|---------|-------------|
+| `presenter_type` | `str` | `"rich"` | Type of presenter ("rich", "text", etc.) |
+
+### Presenter Types
+
+The observer supports different presenter types for various output formats:
+
+| Presenter Type | Description | Use Case |
+|----------------|-------------|----------|
+| `"rich"` | Rich terminal formatting with colors and boxes | Interactive terminal use |
+| `"text"` | Plain text output | Log files, CI/CD, simple terminals |
+
+## Output Examples
+
+### Agent Response Display
+
+When the agent generates a response, the Rich presenter displays:
+
+```
+╭─────────────────────────────────────────────────────────────╮
+│ 🤖 Agent Response │
+├─────────────────────────────────────────────────────────────┤
+│ Thought: │
+│ Task 'fetch_emails' has completed successfully. I need to │
+│ analyze the results and determine next steps. │
+│ │
+│ Plan: │
+│ I will extract the email count from the result and create │
+│ parallel parsing tasks for each email. │
+│ │
+│ Operation: EDIT │
+│ │
+│ Observation: │
+│ Result shows 3 emails were fetched. I will create 3 │
+│ parsing tasks with dependencies on the fetch task. │
+╰─────────────────────────────────────────────────────────────╯
+```
+
+### Agent Action Display
+
+When the agent performs constellation editing:
+
+```
+╭─────────────────────────────────────────────────────────────╮
+│ 🛠️ Agent Actions: Constellation Editing │
+├─────────────────────────────────────────────────────────────┤
+│ Action 1: add_task │
+│ ├─ task_id: parse_email_1 │
+│ ├─ description: Parse the first email │
+│ ├─ target_device_id: windows_pc_001 │
+│ └─ priority: MEDIUM │
+│ │
+│ Action 2: add_task │
+│ ├─ task_id: parse_email_2 │
+│ ├─ description: Parse the second email │
+│ ├─ target_device_id: windows_pc_001 │
+│ └─ priority: MEDIUM │
+│ │
+│ Action 3: add_dependency │
+│ ├─ from_task_id: fetch_emails │
+│ ├─ to_task_id: parse_email_1 │
+│ └─ dependency_type: SUCCESS_ONLY │
+│ │
+│ Action 4: add_dependency │
+│ ├─ from_task_id: fetch_emails │
+│ ├─ to_task_id: parse_email_2 │
+│ └─ dependency_type: SUCCESS_ONLY │
+╰─────────────────────────────────────────────────────────────╯
+```
+
+## Event Processing Flow
+
+```mermaid
+sequenceDiagram
+ participant A as ConstellationAgent
+ participant EB as EventBus
+ participant AOO as AgentOutputObserver
+ participant P as Presenter
+ participant C as Console
+
+ Note over A: Agent generates response
+ A->>EB: publish(AGENT_RESPONSE)
+ EB->>AOO: on_event(event)
+ AOO->>AOO: _handle_agent_response()
+ AOO->>AOO: Reconstruct ConstellationAgentResponse
+ AOO->>P: present_constellation_agent_response()
+ P->>C: Display formatted response
+
+ Note over A: Agent performs actions
+ A->>EB: publish(AGENT_ACTION)
+ EB->>AOO: on_event(event)
+ AOO->>AOO: _handle_agent_action()
+ AOO->>AOO: Reconstruct ActionCommandInfo
+ AOO->>P: present_constellation_editing_actions()
+ P->>C: Display formatted actions
+```
+
+## API Reference
+
+### Constructor
+
+```python
+def __init__(self, presenter_type: str = "rich")
+```
+
+Initialize the agent output observer with specified presenter type.
+
+**Parameters:**
+
+- `presenter_type` — Type of presenter to use ("rich", "text", etc.)
+
+**Example:**
+
+```python
+# Use Rich presenter (default)
+rich_observer = AgentOutputObserver(presenter_type="rich")
+
+# Use plain text presenter
+text_observer = AgentOutputObserver(presenter_type="text")
+```
+
+### Event Handler
+
+```python
+async def on_event(self, event: Event) -> None
+```
+
+Handle agent output events.
+
+**Parameters:**
+
+- `event` — Event instance (must be AgentEvent)
+
+**Behavior:**
+
+- Filters for `AgentEvent` instances
+- Routes to appropriate handler based on event type
+- Reconstructs response/action objects from event data
+- Delegates display to presenter
+
+## Usage Examples
+
+### Example 1: Basic Setup
+
+```python
+from galaxy.core.events import get_event_bus
+from galaxy.session.observers import AgentOutputObserver
+
+# Create and subscribe agent output observer
+agent_output_observer = AgentOutputObserver(presenter_type="rich")
+event_bus = get_event_bus()
+event_bus.subscribe(agent_output_observer)
+
+# Agent events will now be displayed automatically
+await orchestrator.execute_constellation(constellation)
+
+# Clean up
+event_bus.unsubscribe(agent_output_observer)
+```
+
+### Example 2: Conditional Display
+
+```python
+async def execute_with_agent_feedback(show_agent_output: bool = True):
+ """Execute constellation with optional agent output display."""
+
+ event_bus = get_event_bus()
+
+ if show_agent_output:
+ agent_output_observer = AgentOutputObserver(presenter_type="rich")
+ event_bus.subscribe(agent_output_observer)
+
+ try:
+ await orchestrator.execute_constellation(constellation)
+ finally:
+ if show_agent_output:
+ event_bus.unsubscribe(agent_output_observer)
+```
+
+### Example 3: Different Presenters for Different Modes
+
+```python
+import sys
+
+def create_agent_observer():
+ """Create appropriate agent observer based on environment."""
+
+ # Use Rich presenter for interactive terminal
+ if sys.stdout.isatty():
+ return AgentOutputObserver(presenter_type="rich")
+
+ # Use text presenter for logs/CI
+ else:
+ return AgentOutputObserver(presenter_type="text")
+
+# Usage
+agent_output_observer = create_agent_observer()
+event_bus.subscribe(agent_output_observer)
+```
+
+### Example 4: Custom Filtering
+
+```python
+from galaxy.core.events import EventType
+
+# Subscribe only to specific agent events
+event_bus.subscribe(
+ agent_output_observer,
+ {EventType.AGENT_ACTION} # Only show actions, not responses
+)
+```
+
+## Implementation Details
+
+### Response Handling
+
+The observer reconstructs `ConstellationAgentResponse` from event data:
+
+```python
+async def _handle_agent_response(self, event: AgentEvent) -> None:
+ """Handle agent response event."""
+
+ try:
+ output_data = event.output_data
+
+ if event.agent_type == "constellation":
+ # Reconstruct ConstellationAgentResponse from output data
+ response = ConstellationAgentResponse.model_validate(output_data)
+ print_action = output_data.get("print_action", False)
+
+ # Use presenter to display the response
+ self.presenter.present_constellation_agent_response(
+ response,
+ print_action=print_action
+ )
+
+ except Exception as e:
+ self.logger.error(f"Error handling agent response: {e}")
+```
+
+### Action Handling
+
+The observer reconstructs action command objects:
+
+```python
+async def _handle_agent_action(self, event: AgentEvent) -> None:
+ """Handle agent action event."""
+
+ try:
+ output_data = event.output_data
+
+ if output_data.get("action_type") == "constellation_editing":
+ actions_data = output_data.get("actions", [])
+
+ # Convert each action dict to ActionCommandInfo
+ action_objects = []
+ for action_dict in actions_data:
+ action_obj = ActionCommandInfo.model_validate(action_dict)
+ action_objects.append(action_obj)
+
+ # Create ListActionCommandInfo with reconstructed actions
+ actions = ListActionCommandInfo(actions=action_objects)
+
+ # Use presenter to display the actions
+ self.presenter.present_constellation_editing_actions(actions)
+
+ except Exception as e:
+ self.logger.error(f"Error handling agent action: {e}")
+```
+
+## Best Practices
+
+### 1. Match Presenter to Environment
+
+```python
+# ✅ Good: Choose presenter based on context
+if running_in_jupyter:
+ presenter_type = "rich" # Good for notebooks
+elif running_in_ci:
+ presenter_type = "text" # Good for logs
+elif is_interactive_terminal:
+ presenter_type = "rich" # Good for terminal
+else:
+ presenter_type = "text" # Safe default
+```
+
+### 2. Selective Event Subscription
+
+```python
+# Only show actions (skip verbose responses)
+event_bus.subscribe(
+ agent_output_observer,
+ {EventType.AGENT_ACTION}
+)
+
+# Show everything (responses + actions)
+event_bus.subscribe(agent_output_observer)
+```
+
+### 3. Handle Errors Gracefully
+
+The observer includes comprehensive error handling:
+
+```python
+try:
+ # Process agent event
+ await self._handle_agent_response(event)
+except Exception as e:
+ self.logger.error(f"Error handling agent output event: {e}")
+ # Don't re-raise - continue observing other events
+```
+
+## Integration with Agent
+
+The observer integrates with the ConstellationAgent's state machine:
+
+### Agent Publishes Events
+
+The agent publishes events at key points:
+
+```python
+class ConstellationAgent:
+ async def generate_response(self):
+ """Generate agent response and publish event."""
+
+ # Generate response using LLM
+ response = await self._llm_call(...)
+
+ # Publish AGENT_RESPONSE event
+ await self._publish_agent_response_event(response)
+
+ return response
+
+ async def execute_actions(self, actions):
+ """Execute actions and publish event."""
+
+ # Publish AGENT_ACTION event
+ await self._publish_agent_action_event(actions)
+
+ # Actually execute the actions
+ result = await self._execute_constellation_editing(actions)
+
+ return result
+```
+
+## Performance Considerations
+
+### Display Overhead
+
+The observer adds minimal overhead:
+
+- **Event processing**: < 1ms per event
+- **Rich rendering**: 5-10ms per display
+- **Text rendering**: < 1ms per display
+
+### Optimization for Large Outputs
+
+```python
+# For very verbose agents, consider:
+
+# 1. Use text presenter instead of rich
+agent_output_observer = AgentOutputObserver(presenter_type="text")
+
+# 2. Subscribe only to actions
+event_bus.subscribe(
+ agent_output_observer,
+ {EventType.AGENT_ACTION}
+)
+
+# 3. Disable in production
+if not debug_mode:
+ # Don't create or subscribe observer
+ pass
+```
+
+## Related Documentation
+
+- **[Observer System Overview](overview.md)** — Architecture and design
+- **[Progress Observer](progress_observer.md)** — Task completion coordination
+- **[Constellation Agent](../constellation_agent/overview.md)** — Agent implementation and state machine
+
+## Summary
+
+The Agent Output Observer:
+
+- **Displays** agent responses and actions in real-time
+- **Delegates** to presenters for flexible formatting
+- **Supports** multiple output formats (Rich, text)
+- **Provides** transparency into agent decision-making
+- **Enables** debugging and user engagement
+
+This observer is essential for understanding agent behavior during constellation execution, providing visibility into the AI's thought process and actions.
diff --git a/documents/docs/galaxy/observer/event_system.md b/documents/docs/galaxy/observer/event_system.md
new file mode 100644
index 000000000..dfcf721ff
--- /dev/null
+++ b/documents/docs/galaxy/observer/event_system.md
@@ -0,0 +1,609 @@
+# Event System Core
+
+The Event System Core provides the foundational infrastructure for event-driven communication in the Galaxy framework. It implements the Observer pattern through a central event bus, type-safe event classes, and well-defined interfaces.
+
+**Location:** `galaxy/core/events.py`
+
+---
+
+## 📦 Core Components
+
+### EventBus — Central Message Broker
+
+The `EventBus` class is the heart of the event system, managing subscriptions and distributing events to all registered observers.
+
+```mermaid
+graph LR
+ A[Publisher 1] -->|publish| B[EventBus]
+ C[Publisher 2] -->|publish| B
+ D[Publisher 3] -->|publish| B
+
+ B -->|notify| E[Observer 1]
+ B -->|notify| F[Observer 2]
+ B -->|notify| G[Observer 3]
+ B -->|notify| H[Observer 4]
+
+ style B fill:#4a90e2,stroke:#333,stroke-width:3px,color:#fff
+ style E fill:#66bb6a,stroke:#333,stroke-width:2px
+ style F fill:#66bb6a,stroke:#333,stroke-width:2px
+ style G fill:#66bb6a,stroke:#333,stroke-width:2px
+ style H fill:#66bb6a,stroke:#333,stroke-width:2px
+```
+
+**Key Features:**
+
+- **Singleton Pattern**: Single global instance accessed via `get_event_bus()`
+- **Type-based Filtering**: Observers can subscribe to specific event types or all events
+- **Concurrent Notification**: All observers notified in parallel using `asyncio.gather()`
+- **Error Isolation**: Exceptions in one observer don't affect others
+
+### Event Types
+
+`EventType` enumeration defines all possible events in the system:
+
+```python
+class EventType(Enum):
+ # Task-level events
+ TASK_STARTED = "task_started"
+ TASK_COMPLETED = "task_completed"
+ TASK_FAILED = "task_failed"
+
+ # Constellation lifecycle events
+ CONSTELLATION_STARTED = "constellation_started"
+ CONSTELLATION_COMPLETED = "constellation_completed"
+ CONSTELLATION_FAILED = "constellation_failed"
+
+ # Structure modification events
+ CONSTELLATION_MODIFIED = "constellation_modified"
+
+ # Agent output events
+ AGENT_RESPONSE = "agent_response"
+ AGENT_ACTION = "agent_action"
+
+ # Device events
+ DEVICE_CONNECTED = "device_connected"
+ DEVICE_DISCONNECTED = "device_disconnected"
+ DEVICE_STATUS_CHANGED = "device_status_changed"
+```
+
+### Event Classes
+
+Five specialized event types provide type-safe event handling:
+
+| Event Class | Extends | Additional Fields | Use Case |
+|-------------|---------|-------------------|----------|
+| `Event` | (base) | `event_type`, `source_id`, `timestamp`, `data` | Generic events |
+| `TaskEvent` | `Event` | `task_id`, `status`, `result`, `error` | Task execution events |
+| `ConstellationEvent` | `Event` | `constellation_id`, `constellation_state`, `new_ready_tasks` | Constellation lifecycle events |
+| `AgentEvent` | `Event` | `agent_name`, `agent_type`, `output_type`, `output_data` | Agent interaction events |
+| `DeviceEvent` | `Event` | `device_id`, `device_status`, `device_info`, `all_devices` | Device management events |
+
+---
+
+## 🔌 Interfaces
+
+### IEventObserver
+
+Defines the contract for all observer implementations:
+
+```python
+from abc import ABC, abstractmethod
+from galaxy.core.events import Event
+
+class IEventObserver(ABC):
+ """Interface for event observers."""
+
+ @abstractmethod
+ async def on_event(self, event: Event) -> None:
+ """
+ Handle an event.
+
+ :param event: The event object containing type, source, timestamp and data
+ """
+ pass
+```
+
+**Implementation Pattern:**
+
+```python
+class MyCustomObserver(IEventObserver):
+ """Custom observer implementation."""
+
+ async def on_event(self, event: Event) -> None:
+ """Handle events of interest."""
+
+ # Type-safe handling using isinstance
+ if isinstance(event, TaskEvent):
+ await self._handle_task_event(event)
+ elif isinstance(event, ConstellationEvent):
+ await self._handle_constellation_event(event)
+
+ async def _handle_task_event(self, event: TaskEvent) -> None:
+ """Process task events."""
+ if event.event_type == EventType.TASK_COMPLETED:
+ print(f"Task {event.task_id} completed with status: {event.status}")
+
+ async def _handle_constellation_event(self, event: ConstellationEvent) -> None:
+ """Process constellation events."""
+ if event.event_type == EventType.CONSTELLATION_STARTED:
+ print(f"Constellation {event.constellation_id} started")
+```
+
+### IEventPublisher
+
+Defines the contract for event publishing:
+
+```python
+class IEventPublisher(ABC):
+ """Interface for event publishers."""
+
+ @abstractmethod
+ def subscribe(self, observer: IEventObserver,
+ event_types: Set[EventType] = None) -> None:
+ """Subscribe an observer to events."""
+ pass
+
+ @abstractmethod
+ def unsubscribe(self, observer: IEventObserver) -> None:
+ """Unsubscribe an observer."""
+ pass
+
+ @abstractmethod
+ async def publish_event(self, event: Event) -> None:
+ """Publish an event to subscribers."""
+ pass
+```
+
+---
+
+## 📖 EventBus API Reference
+
+### Subscription Management
+
+#### subscribe()
+
+Subscribe an observer to receive event notifications:
+
+```python
+def subscribe(
+ self,
+ observer: IEventObserver,
+ event_types: Set[EventType] = None
+) -> None
+```
+
+**Parameters:**
+
+- `observer`: The observer object implementing `IEventObserver`
+- `event_types`: Optional set of event types to subscribe to (None = all events)
+
+**Examples:**
+
+```python
+from galaxy.core.events import get_event_bus, EventType
+
+event_bus = get_event_bus()
+
+# Subscribe to all events
+event_bus.subscribe(my_observer)
+
+# Subscribe to specific event types
+event_bus.subscribe(my_observer, {
+ EventType.TASK_COMPLETED,
+ EventType.TASK_FAILED
+})
+
+# Subscribe to constellation events only
+event_bus.subscribe(constellation_observer, {
+ EventType.CONSTELLATION_STARTED,
+ EventType.CONSTELLATION_COMPLETED,
+ EventType.CONSTELLATION_MODIFIED
+})
+```
+
+#### unsubscribe()
+
+Remove an observer from all event subscriptions:
+
+```python
+def unsubscribe(self, observer: IEventObserver) -> None
+```
+
+**Parameters:**
+
+- `observer`: The observer object to unsubscribe
+
+**Example:**
+
+```python
+# Clean up observer when done
+event_bus.unsubscribe(my_observer)
+```
+
+### Event Publishing
+
+#### publish_event()
+
+Publish an event to all subscribed observers:
+
+```python
+async def publish_event(self, event: Event) -> None
+```
+
+**Parameters:**
+
+- `event`: The event object to publish
+
+**Example:**
+
+```python
+from galaxy.core.events import TaskEvent, EventType
+import time
+
+# Create and publish a task event
+event = TaskEvent(
+ event_type=EventType.TASK_COMPLETED,
+ source_id="orchestrator",
+ timestamp=time.time(),
+ data={
+ "execution_time": 2.5,
+ "newly_ready_tasks": ["task_2", "task_3"]
+ },
+ task_id="task_1",
+ status="COMPLETED",
+ result={"output": "success"}
+)
+
+await event_bus.publish_event(event)
+```
+
+**Concurrent Notification**: The event bus notifies all observers concurrently using `asyncio.gather()` with `return_exceptions=True`. This means:
+
+- All observers receive events in parallel
+- Slow observers don't block fast ones
+- Exceptions in one observer don't affect others
+- The `publish_event()` call returns after all observers have processed the event
+
+---
+
+## 🔄 Event Flow Patterns
+
+### Pattern 1: Task Execution Flow
+
+This pattern shows how task events flow through the system:
+
+```mermaid
+sequenceDiagram
+ participant O as Orchestrator
+ participant EB as EventBus
+ participant PO as ProgressObserver
+ participant MO as MetricsObserver
+ participant VO as VizObserver
+
+ Note over O: Start task execution
+ O->>EB: publish(TASK_STARTED)
+
+ par Concurrent Notification
+ EB->>PO: on_event(event)
+ EB->>MO: on_event(event)
+ EB->>VO: on_event(event)
+ end
+
+ Note over PO: Track progress
+ Note over MO: Record start time
+ Note over VO: Display task start
+
+ Note over O: Task completes
+ O->>EB: publish(TASK_COMPLETED)
+
+ par Concurrent Notification
+ EB->>PO: on_event(event)
+ EB->>MO: on_event(event)
+ EB->>VO: on_event(event)
+ end
+
+ Note over PO: Queue for agent
+ Note over MO: Calculate duration
+ Note over VO: Update display
+```
+
+### Pattern 2: Constellation Modification Flow
+
+This pattern shows how modification events coordinate agent and orchestrator:
+
+```mermaid
+sequenceDiagram
+ participant A as Agent
+ participant EB as EventBus
+ participant S as Synchronizer
+ participant M as MetricsObserver
+ participant V as VizObserver
+
+ Note over A: Modify constellation
+ A->>EB: publish(CONSTELLATION_MODIFIED)
+
+ par Concurrent Notification
+ EB->>S: on_event(event)
+ EB->>M: on_event(event)
+ EB->>V: on_event(event)
+ end
+
+ Note over S: Complete pending
modification
+ Note over M: Track modification
+ Note over V: Display changes
+```
+
+---
+
+## 💻 Usage Examples
+
+### Example 1: Basic Event Publishing
+
+```python
+import asyncio
+import time
+from galaxy.core.events import (
+ get_event_bus, Event, EventType, IEventObserver
+)
+
+class SimpleLogger(IEventObserver):
+ """Simple observer that logs all events."""
+
+ async def on_event(self, event: Event) -> None:
+ print(f"[{event.timestamp}] {event.event_type.value} from {event.source_id}")
+
+async def main():
+ # Get event bus and subscribe observer
+ event_bus = get_event_bus()
+ logger = SimpleLogger()
+ event_bus.subscribe(logger)
+
+ # Publish some events
+ for i in range(3):
+ event = Event(
+ event_type=EventType.TASK_STARTED,
+ source_id="test_publisher",
+ timestamp=time.time(),
+ data={"iteration": i}
+ )
+ await event_bus.publish_event(event)
+ await asyncio.sleep(0.1)
+
+ # Clean up
+ event_bus.unsubscribe(logger)
+
+asyncio.run(main())
+```
+
+### Example 2: Type-Specific Subscription
+
+```python
+from galaxy.core.events import (
+ get_event_bus, TaskEvent, ConstellationEvent,
+ EventType, IEventObserver
+)
+
+class TaskOnlyObserver(IEventObserver):
+ """Observer that only handles task events."""
+
+ def __init__(self):
+ self.task_count = 0
+ self.completed_tasks = []
+
+ async def on_event(self, event: Event) -> None:
+ if isinstance(event, TaskEvent):
+ self.task_count += 1
+
+ if event.event_type == EventType.TASK_COMPLETED:
+ self.completed_tasks.append(event.task_id)
+ print(f"Task {event.task_id} completed. "
+ f"Total: {len(self.completed_tasks)}")
+
+# Subscribe only to task events
+observer = TaskOnlyObserver()
+event_bus = get_event_bus()
+event_bus.subscribe(observer, {
+ EventType.TASK_STARTED,
+ EventType.TASK_COMPLETED,
+ EventType.TASK_FAILED
+})
+```
+
+### Example 3: Custom Metrics Collection
+
+```python
+from typing import Dict, List
+from galaxy.core.events import (
+ TaskEvent, ConstellationEvent, EventType, IEventObserver
+)
+
+class CustomMetricsCollector(IEventObserver):
+ """Collect custom domain-specific metrics."""
+
+ def __init__(self):
+ self.task_durations: Dict[str, float] = {}
+ self.task_start_times: Dict[str, float] = {}
+ self.constellation_tasks: Dict[str, List[str]] = {}
+
+ async def on_event(self, event: Event) -> None:
+ if isinstance(event, TaskEvent):
+ await self._handle_task_event(event)
+ elif isinstance(event, ConstellationEvent):
+ await self._handle_constellation_event(event)
+
+ async def _handle_task_event(self, event: TaskEvent) -> None:
+ if event.event_type == EventType.TASK_STARTED:
+ self.task_start_times[event.task_id] = event.timestamp
+
+ elif event.event_type == EventType.TASK_COMPLETED:
+ if event.task_id in self.task_start_times:
+ duration = event.timestamp - self.task_start_times[event.task_id]
+ self.task_durations[event.task_id] = duration
+
+ async def _handle_constellation_event(self, event: ConstellationEvent) -> None:
+ if event.event_type == EventType.CONSTELLATION_STARTED:
+ const_id = event.constellation_id
+ self.constellation_tasks[const_id] = []
+
+ def get_average_duration(self) -> float:
+ """Calculate average task duration."""
+ if not self.task_durations:
+ return 0.0
+ return sum(self.task_durations.values()) / len(self.task_durations)
+
+ def get_slowest_tasks(self, n: int = 5) -> List[tuple]:
+ """Get the n slowest tasks."""
+ sorted_tasks = sorted(
+ self.task_durations.items(),
+ key=lambda x: x[1],
+ reverse=True
+ )
+ return sorted_tasks[:n]
+```
+
+---
+
+## ⚙️ Implementation Details
+
+### Internal Observer Storage
+
+The EventBus maintains two internal data structures:
+
+```python
+class EventBus(IEventPublisher):
+ def __init__(self):
+ # Type-specific observers: EventType -> Set[IEventObserver]
+ self._observers: Dict[EventType, Set[IEventObserver]] = {}
+
+ # Observers subscribed to all events
+ self._all_observers: Set[IEventObserver] = set()
+```
+
+**Storage Strategy:**
+
+| Subscription Type | Storage | Lookup Time | Use Case |
+|-------------------|---------|-------------|----------|
+| All events | `_all_observers` set | O(1) | General monitoring |
+| Specific types | `_observers` dict | O(1) | Targeted handling |
+
+### Concurrent Notification Logic
+
+When an event is published, the bus:
+
+1. **Collects relevant observers**: Combines type-specific and all-event observers
+2. **Creates async tasks**: One task per observer
+3. **Executes concurrently**: Uses `asyncio.gather()` with `return_exceptions=True`
+4. **Isolates errors**: Exceptions don't propagate to other observers
+
+```python
+async def publish_event(self, event: Event) -> None:
+ observers_to_notify: Set[IEventObserver] = set()
+
+ # Add type-specific observers
+ if event.event_type in self._observers:
+ observers_to_notify.update(self._observers[event.event_type])
+
+ # Add wildcard observers
+ observers_to_notify.update(self._all_observers)
+
+ # Notify concurrently
+ if observers_to_notify:
+ tasks = [observer.on_event(event) for observer in observers_to_notify]
+ await asyncio.gather(*tasks, return_exceptions=True)
+```
+
+---
+
+## 🎯 Best Practices
+
+### 1. Use Type-Specific Subscriptions
+
+Subscribe only to events you care about:
+
+```python
+# ❌ Bad: Receives all events, must filter manually
+event_bus.subscribe(observer)
+
+# ✅ Good: Receives only relevant events
+event_bus.subscribe(observer, {
+ EventType.TASK_COMPLETED,
+ EventType.CONSTELLATION_MODIFIED
+})
+```
+
+### 2. Handle Errors Gracefully
+
+Always catch exceptions in observer implementations:
+
+```python
+class RobustObserver(IEventObserver):
+ async def on_event(self, event: Event) -> None:
+ try:
+ await self._process_event(event)
+ except Exception as e:
+ self.logger.error(f"Error processing event: {e}")
+ # Don't re-raise - other observers should continue
+```
+
+### 3. Clean Up Subscriptions
+
+Unsubscribe observers when done to prevent memory leaks:
+
+```python
+class SessionManager:
+ def __init__(self):
+ self.observers = []
+
+ def setup_observers(self):
+ # Create and subscribe observers
+ observer = MyObserver()
+ event_bus.subscribe(observer)
+ self.observers.append(observer)
+
+ def cleanup(self):
+ # Unsubscribe all observers
+ event_bus = get_event_bus()
+ for observer in self.observers:
+ event_bus.unsubscribe(observer)
+ self.observers.clear()
+```
+
+### 4. Use Type Guards
+
+Leverage Python's type system for safer event handling:
+
+```python
+from typing import cast
+
+async def on_event(self, event: Event) -> None:
+ if isinstance(event, TaskEvent):
+ # Type checker now knows event is TaskEvent
+ task_event = cast(TaskEvent, event)
+ task_id = task_event.task_id # Type-safe access
+ status = task_event.status
+```
+
+---
+
+## 🔗 Related Documentation
+
+- **[Observer System Overview](overview.md)** — High-level architecture and design
+- **[Session Metrics Observer](metrics_observer.md)** — Performance metrics collection
+
+!!! note "Additional Observer Documentation"
+ For documentation on `ConstellationProgressObserver`, `DAGVisualizationObserver`, `ConstellationModificationSynchronizer`, and `AgentOutputObserver`, refer to their implementation in `galaxy/session/observers/`.
+
+---
+
+## 📋 Summary
+
+The Event System Core provides:
+
+- **EventBus**: Singleton message broker for system-wide communication
+- **EventType**: Enumeration of all system events
+- **Event Classes**: Type-safe event data structures
+- **Interfaces**: Clear contracts for observers and publishers
+- **Concurrent Execution**: Efficient parallel event processing
+- **Error Isolation**: Robust error handling
+
+This foundation enables the Galaxy framework to implement a loosely coupled, extensible event-driven architecture.
diff --git a/documents/docs/galaxy/observer/metrics_observer.md b/documents/docs/galaxy/observer/metrics_observer.md
new file mode 100644
index 000000000..e9d5771f5
--- /dev/null
+++ b/documents/docs/galaxy/observer/metrics_observer.md
@@ -0,0 +1,614 @@
+# Session Metrics Observer
+
+The **SessionMetricsObserver** collects comprehensive performance metrics and statistics during constellation execution. It tracks task execution times, constellation lifecycle, modifications, and computes detailed statistics for performance analysis.
+
+**Location:** `galaxy/session/observers/base_observer.py`
+
+The metrics observer is essential for evaluating Galaxy performance, identifying bottlenecks, and analyzing constellation modification patterns for research and optimization.
+
+---
+
+## 🎯 Purpose
+
+The Metrics Observer provides:
+
+1. **Performance Tracking** — Measure task and constellation execution times
+2. **Success Rate Monitoring** — Track completion and failure rates
+3. **Modification Analytics** — Monitor constellation structural changes
+4. **Statistical Summaries** — Compute aggregated metrics for analysis
+
+---
+
+## 🏗️ Architecture
+
+```mermaid
+graph TB
+ subgraph "Event Sources"
+ O[Orchestrator]
+ A[Agent]
+ end
+
+ subgraph "Event System"
+ EB[EventBus]
+ end
+
+ subgraph "Metrics Observer"
+ SMO[SessionMetricsObserver]
+ TE[Task Events Handler]
+ CE[Constellation Events Handler]
+ MS[Metrics Storage]
+ SC[Statistics Computer]
+ end
+
+ subgraph "Outputs"
+ R[result.json]
+ L[Logs]
+ end
+
+ O -->|task events| EB
+ A -->|constellation events| EB
+ EB -->|notify| SMO
+
+ SMO --> TE
+ SMO --> CE
+ TE --> MS
+ CE --> MS
+ MS --> SC
+ SC --> R
+ SC --> L
+
+ style SMO fill:#66bb6a,stroke:#333,stroke-width:3px
+ style MS fill:#fff4e1,stroke:#333,stroke-width:2px
+ style SC fill:#ffa726,stroke:#333,stroke-width:2px
+ style EB fill:#4a90e2,stroke:#333,stroke-width:2px,color:#fff
+```
+
+---
+
+## 📊 Metrics Collected
+
+The observer collects metrics across three categories:
+
+### Task Metrics
+
+Track individual task execution:
+
+| Metric | Description | Computed |
+|--------|-------------|----------|
+| **task_count** | Total number of tasks started | Real-time |
+| **completed_tasks** | Number of successfully completed tasks | Real-time |
+| **failed_tasks** | Number of failed tasks | Real-time |
+| **total_execution_time** | Sum of all task execution times | Real-time |
+| **task_timings** | Dict mapping task_id → {start, end, duration} | Real-time |
+| **success_rate** | completed / total tasks | Computed |
+| **failure_rate** | failed / total tasks | Computed |
+| **average_task_duration** | Average execution time per task | Computed |
+| **min_task_duration** | Fastest task execution time | Computed |
+| **max_task_duration** | Slowest task execution time | Computed |
+
+### Constellation Metrics
+
+Monitor constellation lifecycle:
+
+| Metric | Description | Computed |
+|--------|-------------|----------|
+| **constellation_count** | Total constellations processed | Real-time |
+| **completed_constellations** | Successfully completed constellations | Real-time |
+| **failed_constellations** | Failed constellations | Real-time |
+| **total_constellation_time** | Total constellation execution time | Real-time |
+| **constellation_timings** | Dict mapping constellation_id → timing data | Real-time |
+| **constellation_success_rate** | completed / total constellations | Computed |
+| **average_constellation_duration** | Average constellation execution time | Computed |
+| **min_constellation_duration** | Fastest constellation | Computed |
+| **max_constellation_duration** | Slowest constellation | Computed |
+| **average_tasks_per_constellation** | Average number of tasks | Computed |
+
+### Modification Metrics
+
+Track constellation structural changes:
+
+| Metric | Description | Computed |
+|--------|-------------|----------|
+| **constellation_modifications** | Dict mapping constellation_id → modification list | Real-time |
+| **total_modifications** | Total number of modifications | Computed |
+| **constellations_modified** | Number of constellations with modifications | Computed |
+| **average_modifications_per_constellation** | Average modifications per constellation | Computed |
+| **max_modifications_for_single_constellation** | Most-modified constellation | Computed |
+| **most_modified_constellation** | ID of most-modified constellation | Computed |
+| **modification_types_breakdown** | Count by modification type | Computed |
+
+---
+
+## 💻 Implementation
+
+### Initialization
+
+```python
+from galaxy.session.observers import SessionMetricsObserver
+import logging
+
+# Create metrics observer
+metrics_observer = SessionMetricsObserver(
+ session_id="galaxy_session_20231113",
+ logger=logging.getLogger(__name__)
+)
+
+# Subscribe to event bus
+from galaxy.core.events import get_event_bus
+event_bus = get_event_bus()
+event_bus.subscribe(metrics_observer)
+```
+
+**Constructor Parameters:**
+
+| Parameter | Type | Required | Description |
+|-----------|------|----------|-------------|
+| `session_id` | `str` | Yes | Unique identifier for the session |
+| `logger` | `logging.Logger` | No | Logger instance (creates default if None) |
+
+### Internal Metrics Structure
+
+The observer maintains a comprehensive metrics dictionary:
+
+```python
+self.metrics: Dict[str, Any] = {
+ "session_id": session_id,
+
+ # Task metrics
+ "task_count": 0,
+ "completed_tasks": 0,
+ "failed_tasks": 0,
+ "total_execution_time": 0.0,
+ "task_timings": {}, # task_id -> {start, end, duration}
+
+ # Constellation metrics
+ "constellation_count": 0,
+ "completed_constellations": 0,
+ "failed_constellations": 0,
+ "total_constellation_time": 0.0,
+ "constellation_timings": {}, # constellation_id -> timing data
+
+ # Modification tracking
+ "constellation_modifications": {} # constellation_id -> [modifications]
+}
+```
+
+---
+
+## 🔄 Event Processing
+
+### Task Event Handling
+
+The observer tracks task lifecycle events:
+
+```mermaid
+sequenceDiagram
+ participant O as Orchestrator
+ participant EB as EventBus
+ participant MO as MetricsObserver
+ participant MS as Metrics Storage
+
+ O->>EB: TASK_STARTED
+ EB->>MO: on_event(event)
+ MO->>MS: Increment task_count
Record start_time
+
+ Note over O: Task executes
+
+ O->>EB: TASK_COMPLETED
+ EB->>MO: on_event(event)
+ MO->>MS: Increment completed_tasks
Calculate duration
Update total_execution_time
+```
+
+**Processing Logic:**
+
+```python
+def _handle_task_started(self, event: TaskEvent) -> None:
+ """Handle TASK_STARTED event."""
+ self.metrics["task_count"] += 1
+ self.metrics["task_timings"][event.task_id] = {
+ "start": event.timestamp
+ }
+
+def _handle_task_completed(self, event: TaskEvent) -> None:
+ """Handle TASK_COMPLETED event."""
+ self.metrics["completed_tasks"] += 1
+
+ if event.task_id in self.metrics["task_timings"]:
+ duration = (
+ event.timestamp -
+ self.metrics["task_timings"][event.task_id]["start"]
+ )
+ self.metrics["task_timings"][event.task_id]["duration"] = duration
+ self.metrics["task_timings"][event.task_id]["end"] = event.timestamp
+ self.metrics["total_execution_time"] += duration
+
+def _handle_task_failed(self, event: TaskEvent) -> None:
+ """Handle TASK_FAILED event."""
+ self.metrics["failed_tasks"] += 1
+ # Also calculate duration for failed tasks
+ if event.task_id in self.metrics["task_timings"]:
+ duration = (
+ event.timestamp -
+ self.metrics["task_timings"][event.task_id]["start"]
+ )
+ self.metrics["task_timings"][event.task_id]["duration"] = duration
+ self.metrics["total_execution_time"] += duration
+```
+
+### Constellation Event Handling
+
+Tracks constellation lifecycle and modifications:
+
+```python
+def _handle_constellation_started(self, event: ConstellationEvent) -> None:
+ """Handle CONSTELLATION_STARTED event."""
+ self.metrics["constellation_count"] += 1
+ constellation_id = event.constellation_id
+ constellation = event.data.get("constellation")
+
+ # Store initial statistics
+ self.metrics["constellation_timings"][constellation_id] = {
+ "start_time": event.timestamp,
+ "initial_statistics": (
+ constellation.get_statistics() if constellation else {}
+ ),
+ "processing_start_time": event.data.get("processing_start_time"),
+ "processing_end_time": event.data.get("processing_end_time"),
+ "processing_duration": event.data.get("processing_duration"),
+ }
+
+def _handle_constellation_completed(self, event: ConstellationEvent) -> None:
+ """Handle CONSTELLATION_COMPLETED event."""
+ self.metrics["completed_constellations"] += 1
+ constellation_id = event.constellation_id
+ constellation = event.data.get("constellation")
+
+ # Calculate duration and store final statistics
+ duration = (
+ event.timestamp -
+ self.metrics["constellation_timings"][constellation_id]["start_time"]
+ if constellation_id in self.metrics["constellation_timings"]
+ else None
+ )
+
+ if constellation_id in self.metrics["constellation_timings"]:
+ self.metrics["constellation_timings"][constellation_id].update({
+ "end_time": event.timestamp,
+ "duration": duration,
+ "final_statistics": (
+ constellation.get_statistics() if constellation else {}
+ ),
+ })
+```
+
+### Modification Tracking
+
+Tracks constellation structural changes with detailed change detection:
+
+```python
+def _handle_constellation_modified(self, event: ConstellationEvent) -> None:
+ """Handle CONSTELLATION_MODIFIED event."""
+ constellation_id = event.constellation_id
+
+ # Initialize modifications list if needed
+ if constellation_id not in self.metrics["constellation_modifications"]:
+ self.metrics["constellation_modifications"][constellation_id] = []
+
+ if hasattr(event, "data") and event.data:
+ old_constellation = event.data.get("old_constellation")
+ new_constellation = event.data.get("new_constellation")
+
+ # Calculate changes using VisualizationChangeDetector
+ changes = None
+ if old_constellation and new_constellation:
+ changes = VisualizationChangeDetector.calculate_constellation_changes(
+ old_constellation, new_constellation
+ )
+
+ # Store modification record
+ modification_record = {
+ "timestamp": event.timestamp,
+ "modification_type": event.data.get("modification_type", "unknown"),
+ "on_task_id": event.data.get("on_task_id", []),
+ "changes": changes,
+ "new_statistics": (
+ new_constellation.get_statistics() if new_constellation else {}
+ ),
+ "processing_start_time": event.data.get("processing_start_time"),
+ "processing_end_time": event.data.get("processing_end_time"),
+ "processing_duration": event.data.get("processing_duration"),
+ }
+
+ self.metrics["constellation_modifications"][constellation_id].append(
+ modification_record
+ )
+```
+
+---
+
+## 📖 API Reference
+
+### Constructor
+
+```python
+def __init__(self, session_id: str, logger: Optional[logging.Logger] = None)
+```
+
+Initialize the metrics observer.
+
+**Parameters:**
+
+- `session_id` — Unique identifier for the session
+- `logger` — Optional logger instance (creates default if None)
+
+### get_metrics()
+
+```python
+def get_metrics(self) -> Dict[str, Any]
+```
+
+Get collected metrics with computed statistics.
+
+**Returns:**
+
+Dictionary containing:
+- All raw metrics (counts, timings, etc.)
+- `task_statistics` — Computed task metrics
+- `constellation_statistics` — Computed constellation metrics
+- `modification_statistics` — Computed modification metrics
+
+**Example:**
+
+```python
+# After constellation execution
+metrics = metrics_observer.get_metrics()
+
+# Access task statistics
+print(f"Total tasks: {metrics['task_statistics']['total_tasks']}")
+print(f"Success rate: {metrics['task_statistics']['success_rate']:.2%}")
+print(f"Avg duration: {metrics['task_statistics']['average_task_duration']:.2f}s")
+
+# Access constellation statistics
+print(f"Total constellations: {metrics['constellation_statistics']['total_constellations']}")
+print(f"Avg tasks per constellation: {metrics['constellation_statistics']['average_tasks_per_constellation']:.1f}")
+
+# Access modification statistics
+print(f"Total modifications: {metrics['modification_statistics']['total_modifications']}")
+print(f"Modification types: {metrics['modification_statistics']['modification_types_breakdown']}")
+```
+
+---
+
+## 📊 Computed Statistics
+
+The observer computes three categories of statistics:
+
+### Task Statistics
+
+```python
+{
+ "total_tasks": 10,
+ "completed_tasks": 8,
+ "failed_tasks": 2,
+ "success_rate": 0.8,
+ "failure_rate": 0.2,
+ "average_task_duration": 2.5,
+ "min_task_duration": 0.5,
+ "max_task_duration": 5.2,
+ "total_task_execution_time": 25.0
+}
+```
+
+### Constellation Statistics
+
+```python
+{
+ "total_constellations": 1,
+ "completed_constellations": 1,
+ "failed_constellations": 0,
+ "success_rate": 1.0,
+ "average_constellation_duration": 30.5,
+ "min_constellation_duration": 30.5,
+ "max_constellation_duration": 30.5,
+ "total_constellation_time": 30.5,
+ "average_tasks_per_constellation": 10.0
+}
+```
+
+### Modification Statistics
+
+```python
+{
+ "total_modifications": 3,
+ "constellations_modified": 1,
+ "average_modifications_per_constellation": 3.0,
+ "max_modifications_for_single_constellation": 3,
+ "most_modified_constellation": "const_123",
+ "modifications_per_constellation": {
+ "const_123": 3
+ },
+ "modification_types_breakdown": {
+ "add_tasks": 2,
+ "modify_dependencies": 1
+ }
+}
+```
+
+---
+
+## 🔍 Usage Examples
+
+### Example 1: Basic Metrics Collection
+
+```python
+import asyncio
+from galaxy.core.events import get_event_bus
+from galaxy.session.observers import SessionMetricsObserver
+
+async def collect_metrics():
+ """Collect and display metrics for constellation execution."""
+
+ # Create and subscribe metrics observer
+ metrics_observer = SessionMetricsObserver(session_id="demo_session")
+ event_bus = get_event_bus()
+ event_bus.subscribe(metrics_observer)
+
+ # Execute constellation (orchestrator will publish events)
+ await orchestrator.execute_constellation(constellation)
+
+ # Retrieve metrics
+ metrics = metrics_observer.get_metrics()
+
+ # Display summary
+ print("\n=== Execution Summary ===")
+ print(f"Session: {metrics['session_id']}")
+ print(f"Tasks: {metrics['task_count']} total, "
+ f"{metrics['completed_tasks']} completed, "
+ f"{metrics['failed_tasks']} failed")
+ print(f"Total execution time: {metrics['total_execution_time']:.2f}s")
+
+ # Display task statistics
+ task_stats = metrics['task_statistics']
+ print(f"\nTask Success Rate: {task_stats['success_rate']:.1%}")
+ print(f"Average Task Duration: {task_stats['average_task_duration']:.2f}s")
+ print(f"Fastest Task: {task_stats['min_task_duration']:.2f}s")
+ print(f"Slowest Task: {task_stats['max_task_duration']:.2f}s")
+
+ # Clean up
+ event_bus.unsubscribe(metrics_observer)
+
+asyncio.run(collect_metrics())
+```
+
+### Example 2: Performance Analysis
+
+```python
+def analyze_performance(metrics_observer: SessionMetricsObserver):
+ """Analyze performance metrics and identify bottlenecks."""
+
+ metrics = metrics_observer.get_metrics()
+ task_timings = metrics['task_timings']
+
+ # Find slowest tasks
+ sorted_tasks = sorted(
+ task_timings.items(),
+ key=lambda x: x[1].get('duration', 0),
+ reverse=True
+ )
+
+ print("\n=== Top 5 Slowest Tasks ===")
+ for task_id, timing in sorted_tasks[:5]:
+ duration = timing.get('duration', 0)
+ print(f"{task_id}: {duration:.2f}s")
+
+ # Analyze modification patterns
+ mod_stats = metrics['modification_statistics']
+ if mod_stats['total_modifications'] > 0:
+ print(f"\n=== Modification Analysis ===")
+ print(f"Total Modifications: {mod_stats['total_modifications']}")
+ print(f"Average per Constellation: "
+ f"{mod_stats['average_modifications_per_constellation']:.1f}")
+ print(f"Most Modified: {mod_stats['most_modified_constellation']}")
+ print("\nModification Types:")
+ for mod_type, count in mod_stats['modification_types_breakdown'].items():
+ print(f" {mod_type}: {count}")
+```
+
+### Example 3: Export Metrics to JSON
+
+```python
+import json
+from pathlib import Path
+
+def export_metrics(metrics_observer: SessionMetricsObserver, output_path: str):
+ """Export metrics to JSON file for analysis."""
+
+ metrics = metrics_observer.get_metrics()
+
+ # Convert to JSON-serializable format
+ output_data = {
+ "session_id": metrics["session_id"],
+ "task_statistics": metrics["task_statistics"],
+ "constellation_statistics": metrics["constellation_statistics"],
+ "modification_statistics": metrics["modification_statistics"],
+ "raw_metrics": {
+ "task_count": metrics["task_count"],
+ "completed_tasks": metrics["completed_tasks"],
+ "failed_tasks": metrics["failed_tasks"],
+ "total_execution_time": metrics["total_execution_time"],
+ "constellation_count": metrics["constellation_count"],
+ }
+ }
+
+ # Write to file
+ output_file = Path(output_path)
+ output_file.parent.mkdir(parents=True, exist_ok=True)
+
+ with open(output_file, 'w') as f:
+ json.dump(output_data, f, indent=2)
+
+ print(f"Metrics exported to: {output_file}")
+```
+
+---
+
+## 🎓 Best Practices
+
+### 1. Session ID Naming
+
+Use descriptive session IDs for easier analysis:
+
+```python
+# ✅ Good: Descriptive session ID
+session_id = f"galaxy_session_{task_type}_{timestamp}"
+
+# ❌ Bad: Generic session ID
+session_id = "session_1"
+```
+
+### 2. Metrics Export
+
+Export metrics immediately after execution:
+
+```python
+try:
+ await orchestrator.execute_constellation(constellation)
+finally:
+ # Always export metrics, even if execution failed
+ metrics = metrics_observer.get_metrics()
+ export_metrics(metrics, "results/metrics.json")
+```
+
+### 3. Memory Management
+
+Clear large timing dictionaries for long-running sessions:
+
+```python
+# After processing metrics
+metrics_observer.metrics["task_timings"].clear()
+metrics_observer.metrics["constellation_timings"].clear()
+```
+
+---
+
+## 🔗 Related Documentation
+
+- **[Observer System Overview](overview.md)** — Architecture and design
+- **[Event System Core](event_system.md)** — Event types and EventBus
+
+!!! note "Additional Resources"
+ For information on constellation execution and orchestration, see the constellation orchestrator documentation in `galaxy/constellation/orchestrator/`.
+
+---
+
+## 📋 Summary
+
+The Session Metrics Observer:
+
+- **Collects** comprehensive performance metrics
+- **Tracks** task and constellation execution times
+- **Monitors** modification patterns
+- **Computes** statistical summaries
+- **Exports** data for analysis
+
+This observer is essential for performance evaluation, bottleneck identification, and research analysis of Galaxy's constellation execution.
diff --git a/documents/docs/galaxy/observer/overview.md b/documents/docs/galaxy/observer/overview.md
new file mode 100644
index 000000000..2d14c0d68
--- /dev/null
+++ b/documents/docs/galaxy/observer/overview.md
@@ -0,0 +1,405 @@
+# Observer System — Overview
+
+The **Observer System** in UFO Galaxy implements an event-driven architecture that enables real-time monitoring, visualization, and coordination of constellation execution. It provides a decoupled, extensible mechanism for components to react to system events without tight coupling.
+
+The system implements the classic **Observer Pattern** (also known as Publish-Subscribe), enabling loose coupling between event producers and consumers. This allows the system to be extended with new observers without modifying existing code.
+
+---
+
+## 🎯 Purpose and Design Goals
+
+The observer system serves several critical functions in the Galaxy framework:
+
+1. **Real-time Monitoring** — Track task execution, constellation lifecycle, and system events
+2. **Visualization** — Provide live updates for DAG topology and execution progress
+3. **Metrics Collection** — Gather performance statistics and execution data
+4. **Synchronization** — Coordinate between agent modifications and orchestrator execution
+5. **Agent Output Handling** — Display agent responses and actions in real-time
+
+---
+
+## 🏗️ Architecture Overview
+
+The observer system consists of three main layers:
+
+```mermaid
+graph TB
+ subgraph "Event Publishers"
+ A1[Orchestrator]
+ A2[Agent]
+ A3[Device Manager]
+ end
+
+ subgraph "Event Bus Layer"
+ B[EventBus
Singleton]
+ end
+
+ subgraph "Observer Layer"
+ C1[ConstellationProgressObserver]
+ C2[SessionMetricsObserver]
+ C3[DAGVisualizationObserver]
+ C4[ConstellationModificationSynchronizer]
+ C5[AgentOutputObserver]
+ end
+
+ subgraph "Handler Layer"
+ D1[TaskVisualizationHandler]
+ D2[ConstellationVisualizationHandler]
+ end
+
+ A1 -->|publish events| B
+ A2 -->|publish events| B
+ A3 -->|publish events| B
+
+ B -->|notify| C1
+ B -->|notify| C2
+ B -->|notify| C3
+ B -->|notify| C4
+ B -->|notify| C5
+
+ C3 -->|delegate| D1
+ C3 -->|delegate| D2
+
+ style B fill:#4a90e2,stroke:#333,stroke-width:3px,color:#fff
+ style C1 fill:#66bb6a,stroke:#333,stroke-width:2px
+ style C2 fill:#66bb6a,stroke:#333,stroke-width:2px
+ style C3 fill:#66bb6a,stroke:#333,stroke-width:2px
+ style C4 fill:#ffa726,stroke:#333,stroke-width:2px
+ style C5 fill:#66bb6a,stroke:#333,stroke-width:2px
+```
+
+**Architecture Layers:**
+
+| Layer | Component | Responsibility |
+|-------|-----------|----------------|
+| **Event Publishers** | Orchestrator, Agent, Device Manager | Generate events during system operation |
+| **Event Bus** | `EventBus` singleton | Central message broker, manages subscriptions and routing |
+| **Observers** | 5 specialized observers | React to specific event types and perform actions |
+| **Handlers** | Task & Constellation handlers | Delegate visualization logic for specific components |
+
+---
+
+## 📊 Core Components
+
+### Event System Core
+
+The foundation of the observer system consists of:
+
+| Component | Location | Description |
+|-----------|----------|-------------|
+| **EventBus** | `galaxy/core/events.py` | Central message broker managing subscriptions |
+| **EventType** | `galaxy/core/events.py` | Enumeration of all system event types |
+| **Event Classes** | `galaxy/core/events.py` | Base (`Event`) and specialized (`TaskEvent`, `ConstellationEvent`, `AgentEvent`, `DeviceEvent`) event data structures |
+| **Interfaces** | `galaxy/core/events.py` | `IEventObserver`, `IEventPublisher` contracts |
+
+For detailed documentation of the event system core components, see the **[Event System Core](event_system.md)** documentation.
+
+### Observer Implementations
+
+Five specialized observers handle different aspects of system monitoring:
+
+| Observer | File Location | Primary Role | Key Features |
+|----------|---------------|--------------|--------------|
+| **ConstellationProgressObserver** | `galaxy/session/observers/base_observer.py` | Task progress tracking | Queues completion events for agent, coordinates task lifecycle |
+| **SessionMetricsObserver** | `galaxy/session/observers/base_observer.py` | Performance metrics | Collects timing, success rates, modification statistics |
+| **DAGVisualizationObserver** | `galaxy/session/observers/dag_visualization_observer.py` | Real-time visualization | Displays constellation topology and execution flow |
+| **ConstellationModificationSynchronizer** | `galaxy/session/observers/constellation_sync_observer.py` | Modification coordination | Prevents race conditions between agent and orchestrator |
+| **AgentOutputObserver** | `galaxy/session/observers/agent_output_observer.py` | Agent interaction display | Shows agent responses and actions in real-time |
+
+---
+
+## 🔄 Event Flow
+
+The following diagram illustrates how events flow through the system:
+
+```mermaid
+sequenceDiagram
+ participant O as Orchestrator
+ participant EB as EventBus
+ participant CPO as ProgressObserver
+ participant SMO as MetricsObserver
+ participant DVO as VisualizationObserver
+ participant A as Agent
+
+ O->>EB: publish(TASK_STARTED)
+ EB->>CPO: on_event(event)
+ EB->>SMO: on_event(event)
+ EB->>DVO: on_event(event)
+
+ Note over DVO: Display task start
+ Note over SMO: Increment task count
+
+ O->>EB: publish(TASK_COMPLETED)
+ EB->>CPO: on_event(event)
+ EB->>SMO: on_event(event)
+ EB->>DVO: on_event(event)
+
+ CPO->>A: add_task_completion_event()
+ Note over A: Process result,
modify constellation
+
+ A->>EB: publish(CONSTELLATION_MODIFIED)
+ EB->>SMO: on_event(event)
+ EB->>DVO: on_event(event)
+
+ Note over DVO: Display updated DAG
+ Note over SMO: Track modification
+```
+
+The event flow demonstrates how a single action (task completion) triggers multiple observers, each performing their specialized function without interfering with others.
+
+---
+
+## 📋 Event Types
+
+The system defines the following event types:
+
+### Task Events
+
+Track individual task execution lifecycle:
+
+| Event Type | Trigger | Data Includes |
+|------------|---------|---------------|
+| `TASK_STARTED` | Task begins execution | task_id, status, constellation_id |
+| `TASK_COMPLETED` | Task finishes successfully | task_id, result, execution_time, newly_ready_tasks |
+| `TASK_FAILED` | Task encounters error | task_id, error, retry_info |
+
+### Constellation Events
+
+Monitor constellation-level operations:
+
+| Event Type | Trigger | Data Includes |
+|------------|---------|---------------|
+| `CONSTELLATION_STARTED` | Constellation begins processing | constellation, initial_statistics, processing_time |
+| `CONSTELLATION_COMPLETED` | All tasks finished | constellation, final_statistics, execution_time |
+| `CONSTELLATION_FAILED` | Constellation encounters error | constellation, error |
+| `CONSTELLATION_MODIFIED` | Structure changed by agent | old_constellation, new_constellation, on_task_id, modification_type, changes |
+
+### Agent Events
+
+Display agent interactions:
+
+| Event Type | Trigger | Data Includes |
+|------------|---------|---------------|
+| `AGENT_RESPONSE` | Agent generates response | agent_name, agent_type, response_data |
+| `AGENT_ACTION` | Agent executes action | agent_name, action_type, actions |
+
+### Device Events
+
+Monitor device status (used by client):
+
+| Event Type | Trigger | Data Includes |
+|------------|---------|---------------|
+| `DEVICE_CONNECTED` | Device joins pool | device_id, device_status, device_info |
+| `DEVICE_DISCONNECTED` | Device leaves pool | device_id, device_status |
+| `DEVICE_STATUS_CHANGED` | Device state changes | device_id, device_status, all_devices |
+
+---
+
+## 🚀 Usage Example
+
+Here's a complete example showing how observers are initialized and used in a Galaxy session:
+
+```python
+from galaxy.core.events import get_event_bus, EventType
+from galaxy.session.observers import (
+ ConstellationProgressObserver,
+ SessionMetricsObserver,
+ DAGVisualizationObserver,
+ ConstellationModificationSynchronizer,
+ AgentOutputObserver
+)
+
+# Get the global event bus
+event_bus = get_event_bus()
+
+# 1. Create progress observer for agent coordination
+progress_observer = ConstellationProgressObserver(agent=constellation_agent)
+event_bus.subscribe(progress_observer)
+
+# 2. Create metrics observer for performance tracking
+metrics_observer = SessionMetricsObserver(
+ session_id="my_session",
+ logger=logger
+)
+event_bus.subscribe(metrics_observer)
+
+# 3. Create visualization observer for real-time display
+viz_observer = DAGVisualizationObserver(enable_visualization=True)
+event_bus.subscribe(viz_observer)
+
+# 4. Create synchronizer to prevent race conditions
+synchronizer = ConstellationModificationSynchronizer(
+ orchestrator=orchestrator,
+ logger=logger
+)
+event_bus.subscribe(synchronizer)
+
+# 5. Create agent output observer for displaying interactions
+agent_output_observer = AgentOutputObserver(presenter_type="rich")
+event_bus.subscribe(agent_output_observer)
+
+# Execute constellation
+await orchestrator.execute_constellation(constellation)
+
+# Retrieve collected metrics
+metrics = metrics_observer.get_metrics()
+print(f"Tasks completed: {metrics['completed_tasks']}")
+print(f"Total execution time: {metrics['total_execution_time']:.2f}s")
+print(f"Modifications: {metrics['constellation_modifications']}")
+```
+
+---
+
+## 🔑 Key Benefits
+
+### 1. Decoupling
+
+Events decouple components — publishers don't need to know about observers:
+
+- **Orchestrator** publishes task events without knowing who's listening
+- **Agent** publishes modification events without coordinating with orchestrator
+- **New observers** can be added without changing existing code
+
+### 2. Extensibility
+
+Add custom observers for new functionality:
+
+```python
+from galaxy.core.events import IEventObserver, Event, EventType
+
+class CustomMetricsObserver(IEventObserver):
+ """Custom observer for domain-specific metrics."""
+
+ def __init__(self):
+ self.custom_metrics = {}
+
+ async def on_event(self, event: Event) -> None:
+ if event.event_type == EventType.TASK_COMPLETED:
+ # Collect custom metrics
+ task_type = event.data.get("task_type")
+ if task_type not in self.custom_metrics:
+ self.custom_metrics[task_type] = []
+
+ self.custom_metrics[task_type].append({
+ "duration": event.data.get("execution_time"),
+ "result": event.result
+ })
+
+# Subscribe to specific events
+event_bus = get_event_bus()
+custom_observer = CustomMetricsObserver()
+event_bus.subscribe(custom_observer, {EventType.TASK_COMPLETED})
+```
+
+### 3. Concurrent Execution
+
+All observers are notified concurrently using `asyncio.gather()`:
+
+- No observer blocks another
+- Exceptions in one observer don't affect others
+- Efficient parallel processing
+
+### 4. Type-Safe Event Handling
+
+Specialized event classes provide type safety:
+
+```python
+async def on_event(self, event: Event) -> None:
+ if isinstance(event, TaskEvent):
+ # TaskEvent-specific handling
+ task_id = event.task_id # Type-safe access
+ status = event.status
+
+ elif isinstance(event, ConstellationEvent):
+ # ConstellationEvent-specific handling
+ constellation_id = event.constellation_id
+ state = event.constellation_state
+```
+
+---
+
+## 📚 Component Documentation
+
+Explore detailed documentation for each observer:
+
+- **[Session Metrics Observer](metrics_observer.md)** — Performance metrics and statistics collection
+- **[Event System Core](event_system.md)** — Event bus, event types, and interfaces
+
+!!! note "Additional Observers"
+ Documentation for `ConstellationProgressObserver`, `DAGVisualizationObserver`, `ConstellationModificationSynchronizer`, and `AgentOutputObserver` is available in their source code files. These observers handle task progress tracking, real-time visualization, modification synchronization, and agent output display respectively.
+
+---
+
+## 🔗 Related Documentation
+
+- **[Constellation Orchestrator](../constellation_orchestrator/overview.md)** — Event publishers for task execution
+- **[Constellation Agent](../constellation_agent/overview.md)** — Event publishers for agent operations
+- **[Performance Metrics](../evaluation/performance_metrics.md)** — How metrics are collected and analyzed
+- **[Event-Driven Coordination](../constellation_orchestrator/event_driven_coordination.md)** — Deep dive into event system architecture
+
+---
+
+## 💡 Best Practices
+
+### Observer Lifecycle Management
+
+Properly manage observer subscriptions to prevent memory leaks:
+
+```python
+# Subscribe observers
+observers = [progress_observer, metrics_observer, viz_observer]
+for observer in observers:
+ event_bus.subscribe(observer)
+
+try:
+ # Execute constellation
+ await orchestrator.execute_constellation(constellation)
+finally:
+ # Clean up observers
+ for observer in observers:
+ event_bus.unsubscribe(observer)
+```
+
+### Event-Specific Subscription
+
+Subscribe only to relevant events for efficiency:
+
+```python
+# Instead of subscribing to all events
+event_bus.subscribe(observer) # Receives ALL events
+
+# Subscribe to specific event types
+event_bus.subscribe(observer, {
+ EventType.TASK_COMPLETED,
+ EventType.TASK_FAILED,
+ EventType.CONSTELLATION_MODIFIED
+})
+```
+
+### Error Handling in Observers
+
+Always handle exceptions gracefully:
+
+```python
+async def on_event(self, event: Event) -> None:
+ try:
+ # Process event
+ await self._handle_event(event)
+ except Exception as e:
+ self.logger.error(f"Error processing event: {e}")
+ # Don't re-raise - let other observers continue
+```
+
+---
+
+## 🎓 Summary
+
+The Observer System provides a robust, event-driven foundation for monitoring and coordinating Galaxy's constellation execution:
+
+- **Event Bus** acts as central message broker
+- **5 specialized observers** handle different aspects of monitoring
+- **Loose coupling** enables extensibility and maintainability
+- **Concurrent execution** ensures efficient event processing
+- **Type-safe events** provide clear contracts and error prevention
+
+For implementation details of specific observers, refer to the individual component documentation pages linked above.
diff --git a/documents/docs/galaxy/observer/progress_observer.md b/documents/docs/galaxy/observer/progress_observer.md
new file mode 100644
index 000000000..13df76860
--- /dev/null
+++ b/documents/docs/galaxy/observer/progress_observer.md
@@ -0,0 +1,483 @@
+# Constellation Progress Observer
+
+The **ConstellationProgressObserver** is responsible for tracking task execution progress and coordinating between the orchestrator and the agent. It acts as the bridge that enables the agent to react to task completion events and make necessary constellation modifications.
+
+**Location:** `galaxy/session/observers/base_observer.py`
+
+## Purpose
+
+The Progress Observer serves two critical functions:
+
+- **Task Completion Coordination** — Queues task completion events for the agent to process
+- **Constellation Event Handling** — Notifies the agent when constellation execution completes
+
+## Architecture
+
+```mermaid
+graph TB
+ subgraph "Orchestrator Layer"
+ O[TaskConstellationOrchestrator]
+ end
+
+ subgraph "Event System"
+ EB[EventBus]
+ end
+
+ subgraph "Observer Layer"
+ CPO[ConstellationProgressObserver]
+ end
+
+ subgraph "Agent Layer"
+ A[ConstellationAgent]
+ Q[Task Completion Queue]
+ end
+
+ O -->|publish events| EB
+ EB -->|notify| CPO
+ CPO -->|queue events| Q
+ A -->|process from| Q
+
+ style CPO fill:#66bb6a,stroke:#333,stroke-width:3px
+ style Q fill:#fff4e1,stroke:#333,stroke-width:2px
+ style EB fill:#4a90e2,stroke:#333,stroke-width:2px,color:#fff
+```
+
+**Component Interaction:**
+
+| Component | Role | Communication |
+|-----------|------|---------------|
+| **Orchestrator** | Executes tasks, publishes events | → EventBus |
+| **EventBus** | Distributes events | → Progress Observer |
+| **Progress Observer** | Filters & queues relevant events | → Agent Queue |
+| **Agent** | Processes completions, modifies constellation | ← Agent Queue |
+
+## Event Handling
+
+The Progress Observer handles two types of events:
+
+### Task Events
+
+Monitors task execution lifecycle and queues completion events:
+
+```mermaid
+sequenceDiagram
+ participant O as Orchestrator
+ participant EB as EventBus
+ participant PO as ProgressObserver
+ participant Q as Agent Queue
+ participant A as Agent
+
+ O->>EB: TASK_STARTED
+ EB->>PO: on_event(event)
+ Note over PO: Store task result
Log progress
+
+ O->>EB: TASK_COMPLETED
+ EB->>PO: on_event(event)
+ Note over PO: Store result
Queue for agent
+ PO->>Q: add_task_completion_event()
+
+ Note over A: Agent in Continue state
waiting for events
+ A->>Q: get event
+ Note over A: Process result
Modify constellation
+```
+
+**Handled Event Types:**
+
+| Event Type | Action | Data Stored |
+|------------|--------|-------------|
+| `TASK_STARTED` | Store task result placeholder | task_id, status, timestamp |
+| `TASK_COMPLETED` | Store result, queue for agent | task_id, status, result, timestamp |
+| `TASK_FAILED` | Store error, queue for agent | task_id, status, error, timestamp |
+
+### Constellation Events
+
+Handles constellation lifecycle events:
+
+| Event Type | Action | Effect |
+|------------|--------|--------|
+| `CONSTELLATION_COMPLETED` | Queue completion event for agent | Wakes up agent's Continue state to process final results |
+
+## Implementation
+
+### Initialization
+
+```python
+from galaxy.session.observers import ConstellationProgressObserver
+from galaxy.agents import ConstellationAgent
+
+# Create progress observer with agent reference
+agent = ConstellationAgent(orchestrator=orchestrator)
+progress_observer = ConstellationProgressObserver(agent=agent)
+
+# Subscribe to event bus
+from galaxy.core.events import get_event_bus
+event_bus = get_event_bus()
+event_bus.subscribe(progress_observer)
+```
+
+**Constructor Parameters:**
+
+| Parameter | Type | Description |
+|-----------|------|-------------|
+| `agent` | `ConstellationAgent` | The agent that will process queued events |
+
+### Internal Data Structures
+
+The observer maintains:
+
+```python
+class ConstellationProgressObserver(IEventObserver):
+ def __init__(self, agent: ConstellationAgent):
+ self.agent = agent
+
+ # Task results storage: task_id -> result dict
+ self.task_results: Dict[str, Dict[str, Any]] = {}
+
+ self.logger = logging.getLogger(__name__)
+```
+
+**Task Result Structure:**
+
+```python
+{
+ "task_id": "task_123",
+ "status": "COMPLETED", # or "FAILED"
+ "result": {...}, # Task execution result
+ "error": None, # Exception if failed
+ "timestamp": 1234567890.123
+}
+```
+
+## Event Processing Flow
+
+### Task Event Processing
+
+```python
+async def _handle_task_event(self, event: TaskEvent) -> None:
+ """Handle task progress events and queue them for agent processing."""
+
+ try:
+ self.logger.info(
+ f"Task progress: {event.task_id} -> {event.status}. "
+ f"Event Type: {event.event_type}"
+ )
+
+ # 1. Store task result for tracking
+ self.task_results[event.task_id] = {
+ "task_id": event.task_id,
+ "status": event.status,
+ "result": event.result,
+ "error": event.error,
+ "timestamp": event.timestamp,
+ }
+
+ # 2. Queue completion/failure events for agent
+ if event.event_type in [EventType.TASK_COMPLETED, EventType.TASK_FAILED]:
+ await self.agent.add_task_completion_event(event)
+
+ except Exception as e:
+ self.logger.error(f"Error handling task event: {e}", exc_info=True)
+```
+
+**Processing Steps:**
+
+1. **Log Progress**: Record task status change
+2. **Store Result**: Update internal task_results dictionary
+3. **Queue for Agent**: If completion/failure, add to agent's queue
+4. **Error Handling**: Catch and log any exceptions
+
+### Constellation Event Processing
+
+```python
+async def _handle_constellation_event(self, event: ConstellationEvent) -> None:
+ """Handle constellation update events."""
+
+ try:
+ if event.event_type == EventType.CONSTELLATION_COMPLETED:
+ # Queue completion event for agent
+ await self.agent.add_constellation_completion_event(event)
+
+ except Exception as e:
+ self.logger.error(
+ f"Error handling constellation event: {e}",
+ exc_info=True
+ )
+```
+
+## API Reference
+
+### Constructor
+
+```python
+def __init__(self, agent: ConstellationAgent)
+```
+
+Initialize the progress observer with a reference to the agent.
+
+**Parameters:**
+
+- `agent` — `ConstellationAgent` instance that will process queued events
+
+**Example:**
+
+```python
+from galaxy.agents import ConstellationAgent
+from galaxy.session.observers import ConstellationProgressObserver
+
+agent = ConstellationAgent(orchestrator=orchestrator)
+progress_observer = ConstellationProgressObserver(agent=agent)
+```
+
+### Event Handler
+
+```python
+async def on_event(self, event: Event) -> None
+```
+
+Handle constellation-related events (TaskEvent or ConstellationEvent).
+
+**Parameters:**
+
+- `event` — Event instance (TaskEvent or ConstellationEvent)
+
+**Behavior:**
+
+- Filters events by type (TaskEvent vs ConstellationEvent)
+- Delegates to appropriate handler method
+- Logs progress and stores results
+- Queues completion events for agent
+
+## Usage Examples
+
+### Example 1: Basic Setup
+
+```python
+import asyncio
+from galaxy.core.events import get_event_bus
+from galaxy.agents import ConstellationAgent
+from galaxy.constellation import TaskConstellationOrchestrator
+from galaxy.session.observers import ConstellationProgressObserver
+
+async def setup_progress_tracking():
+ """Set up progress tracking for constellation execution."""
+
+ # Create orchestrator and agent
+ orchestrator = TaskConstellationOrchestrator()
+ agent = ConstellationAgent(orchestrator=orchestrator)
+
+ # Create and subscribe progress observer
+ progress_observer = ConstellationProgressObserver(agent=agent)
+ event_bus = get_event_bus()
+ event_bus.subscribe(progress_observer)
+
+ # Now orchestrator events will be tracked and queued for agent
+ return agent, orchestrator, progress_observer
+```
+
+### Example 2: Monitoring Task Results
+
+```python
+async def monitor_task_progress(observer: ConstellationProgressObserver):
+ """Monitor task execution progress."""
+
+ # Wait for some tasks to complete
+ await asyncio.sleep(5)
+
+ # Access stored results
+ for task_id, result in observer.task_results.items():
+ status = result["status"]
+ timestamp = result["timestamp"]
+
+ if status == "COMPLETED":
+ print(f"✅ Task {task_id} completed at {timestamp}")
+ print(f" Result: {result['result']}")
+ elif status == "FAILED":
+ print(f"❌ Task {task_id} failed at {timestamp}")
+ print(f" Error: {result['error']}")
+```
+
+### Example 3: Custom Progress Observer
+
+```python
+from galaxy.core.events import IEventObserver, TaskEvent, EventType
+
+class CustomProgressObserver(IEventObserver):
+ """Custom observer with additional progress tracking."""
+
+ def __init__(self, agent, on_progress_callback=None):
+ self.agent = agent
+ self.on_progress_callback = on_progress_callback
+
+ # Track progress statistics
+ self.total_tasks = 0
+ self.completed_tasks = 0
+ self.failed_tasks = 0
+
+ async def on_event(self, event: Event) -> None:
+ if isinstance(event, TaskEvent):
+ # Update statistics
+ if event.event_type == EventType.TASK_STARTED:
+ self.total_tasks += 1
+ elif event.event_type == EventType.TASK_COMPLETED:
+ self.completed_tasks += 1
+ elif event.event_type == EventType.TASK_FAILED:
+ self.failed_tasks += 1
+
+ # Call custom callback
+ if self.on_progress_callback:
+ progress = self.completed_tasks / self.total_tasks if self.total_tasks > 0 else 0
+ self.on_progress_callback(progress, event)
+
+ # Queue for agent
+ if event.event_type in [EventType.TASK_COMPLETED, EventType.TASK_FAILED]:
+ await self.agent.add_task_completion_event(event)
+
+# Usage
+def progress_callback(progress, event):
+ print(f"Progress: {progress*100:.1f}% - {event.task_id} {event.status}")
+
+custom_observer = CustomProgressObserver(
+ agent=agent,
+ on_progress_callback=progress_callback
+)
+event_bus.subscribe(custom_observer)
+```
+
+## Integration with Agent
+
+The Progress Observer integrates tightly with the ConstellationAgent's state machine:
+
+### Agent Queue Interface
+
+The observer calls these agent methods:
+
+```python
+# Queue task completion event
+await self.agent.add_task_completion_event(task_event)
+
+# Queue constellation completion event
+await self.agent.add_constellation_completion_event(constellation_event)
+```
+
+### Agent Processing
+
+The agent processes queued events in its `Continue` state:
+
+```mermaid
+stateDiagram-v2
+ [*] --> Continue: Task completes
+ Continue --> ProcessEvent: Get event from queue
+ ProcessEvent --> UpdateConstellation: Event is TASK_COMPLETED
+ ProcessEvent --> HandleFailure: Event is TASK_FAILED
+ UpdateConstellation --> Continue: More tasks pending
+ UpdateConstellation --> Finish: All tasks done
+ HandleFailure --> Continue: Retry task
+ HandleFailure --> Finish: Max retries exceeded
+ Finish --> [*]
+```
+
+**Agent State Machine States:**
+
+| State | Description | Trigger |
+|-------|-------------|---------|
+| **Continue** | Wait for task completion events | Events queued by Progress Observer |
+| **ProcessEvent** | Extract event from queue | Event available |
+| **UpdateConstellation** | Modify constellation based on result | Task completed successfully |
+| **HandleFailure** | Handle task failure, retry if needed | Task failed |
+| **Finish** | Complete constellation execution | All tasks done or unrecoverable error |
+
+## Performance Considerations
+
+### Memory Management
+
+The observer stores all task results in memory:
+
+```python
+self.task_results: Dict[str, Dict[str, Any]] = {}
+```
+
+**Best Practices:**
+
+- **Clear results** after constellation completion to free memory
+- **Limit result size** by storing only essential data
+- **Use weak references** for large result objects if needed
+
+### Queue Management
+
+Events are queued to the agent's asyncio queue:
+
+```python
+await self.agent.add_task_completion_event(event)
+```
+
+**Considerations:**
+
+- **Queue size** is unbounded by default
+- **Back pressure** may occur if agent processes slowly
+- **Memory growth** possible with many rapid completions
+
+!!! warning "Memory Usage"
+ For long-running sessions with many tasks, consider periodically clearing the `task_results` dictionary to prevent memory growth.
+
+## Best Practices
+
+### 1. Clean Up After Completion
+
+Clear task results after constellation execution:
+
+```python
+async def execute_with_cleanup(orchestrator, constellation, progress_observer):
+ """Execute constellation and clean up observer."""
+
+ try:
+ await orchestrator.execute_constellation(constellation)
+ finally:
+ # Clear stored results
+ progress_observer.task_results.clear()
+```
+
+### 2. Handle Errors Gracefully
+
+The observer includes comprehensive error handling:
+
+```python
+try:
+ # Process event
+ await self._handle_task_event(event)
+except AttributeError as e:
+ self.logger.error(f"Attribute error: {e}", exc_info=True)
+except KeyError as e:
+ self.logger.error(f"Missing key: {e}", exc_info=True)
+except Exception as e:
+ self.logger.error(f"Unexpected error: {e}", exc_info=True)
+```
+
+### 3. Monitor Queue Size
+
+Check agent queue size periodically:
+
+```python
+# Access agent's internal queue
+queue_size = self.agent.task_completion_queue.qsize()
+if queue_size > 100:
+ logger.warning(f"Task completion queue growing large: {queue_size}")
+```
+
+## Related Documentation
+
+- **[Observer System Overview](overview.md)** — Architecture and design principles
+- **[Agent Output Observer](agent_output_observer.md)** — Agent response and action display
+- **[Constellation Agent](../constellation_agent/overview.md)** — Agent state machine and event processing
+- **[Constellation Modification Synchronizer](synchronizer.md)** — Coordination between agent and orchestrator
+
+## Summary
+
+The Constellation Progress Observer:
+
+- **Tracks** task execution progress
+- **Stores** task results for historical reference
+- **Queues** completion events for agent processing
+- **Coordinates** between orchestrator and agent
+- **Enables** event-driven constellation modification
+
+This observer is essential for the agent-orchestrator coordination pattern in Galaxy, replacing complex callback mechanisms with a clean event-driven interface.
diff --git a/documents/docs/galaxy/observer/synchronizer.md b/documents/docs/galaxy/observer/synchronizer.md
new file mode 100644
index 000000000..ae3b8ef39
--- /dev/null
+++ b/documents/docs/galaxy/observer/synchronizer.md
@@ -0,0 +1,553 @@
+# Constellation Modification Synchronizer
+
+The **ConstellationModificationSynchronizer** prevents race conditions between constellation modifications by the agent and task execution by the orchestrator. It ensures proper synchronization so the orchestrator doesn't execute newly ready tasks before the agent finishes updating the constellation structure.
+
+**Location:** `galaxy/session/observers/constellation_sync_observer.py`
+
+## Problem Statement
+
+Without synchronization, the following race condition can occur:
+
+```mermaid
+sequenceDiagram
+ participant O as Orchestrator
+ participant T as Task A
+ participant A as Agent
+ participant C as Constellation
+
+ T->>O: Task A completes
+ O->>A: Publish TASK_COMPLETED
+ O->>C: Get ready tasks
+ Note over O: Task B appears ready!
+ O->>T: Execute Task B
+
+ Note over A: Slow: Processing Task A
completion...
+ A->>C: Modify Task B
(changes dependencies!)
+
+ Note over T: ERROR: Task B executing
with outdated state!
+```
+
+**The Race Condition:**
+
+- **Task A completes** → triggers constellation update
+- **Orchestrator immediately** gets ready tasks → might execute Task B
+- **Agent is still** modifying Task B or its dependencies
+- **Result**: Task B executes with outdated/incorrect configuration
+
+!!! danger "Critical Issue"
+ Executing tasks with outdated constellation state can lead to incorrect task parameters, wrong dependency chains, data inconsistencies, and unpredictable workflow behavior.
+
+## Solution: Synchronization Pattern
+
+The Synchronizer implements a **wait-before-execute** pattern:
+
+```mermaid
+sequenceDiagram
+ participant O as Orchestrator
+ participant T as Task A
+ participant S as Synchronizer
+ participant A as Agent
+ participant C as Constellation
+
+ T->>O: Task A completes
+ O->>S: Publish TASK_COMPLETED
+ S->>S: Register pending
modification
+ O->>A: Forward to Agent
+
+ Note over O: Before getting ready tasks
+ O->>S: wait_for_pending_modifications()
+ Note over S: Block until agent done
+
+ A->>C: Modify constellation
+ A->>S: Publish CONSTELLATION_MODIFIED
+ S->>S: Mark modification
complete
+
+ Note over S: Unblock orchestrator
+ O->>C: Get ready tasks
+ Note over C: Now safe to execute!
+ O->>T: Execute Task B
+```
+
+## Architecture
+
+```mermaid
+graph TB
+ subgraph "Orchestrator Loop"
+ OL[Execute Task Loop]
+ WF[Wait for Modifications]
+ GT[Get Ready Tasks]
+ ET[Execute Tasks]
+ end
+
+ subgraph "Synchronizer"
+ PM[Pending Modifications Dict]
+ TC[Task Completion Handler]
+ MC[Modification Complete Handler]
+ WP[Wait Point]
+ end
+
+ subgraph "Agent"
+ A[Agent Process Results]
+ M[Modify Constellation]
+ end
+
+ OL --> WF
+ WF --> WP
+ WP -->|all modifications complete| GT
+ GT --> ET
+ ET --> OL
+
+ TC --> PM
+ MC --> PM
+ PM --> WP
+
+ A --> M
+ M -->|CONSTELLATION_MODIFIED| MC
+
+ style WP fill:#ffa726,stroke:#333,stroke-width:3px
+ style PM fill:#fff4e1,stroke:#333,stroke-width:2px
+ style WF fill:#4a90e2,stroke:#333,stroke-width:2px,color:#fff
+```
+
+## Synchronization Flow
+
+### Step-by-Step Process
+
+1. **Task Completes** → `TASK_COMPLETED` event published
+2. **Synchronizer Registers** → Creates pending modification Future
+3. **Orchestrator Waits** → Calls `wait_for_pending_modifications()`
+4. **Agent Processes** → Modifies constellation structure
+5. **Agent Publishes** → `CONSTELLATION_MODIFIED` event published
+6. **Synchronizer Completes** → Sets Future result, unblocks orchestrator
+7. **Orchestrator Continues** → Gets ready tasks with updated constellation
+
+### Event Flow
+
+```mermaid
+stateDiagram-v2
+ [*] --> WaitingForCompletion: Task executing
+ WaitingForCompletion --> PendingModification: TASK_COMPLETED event
+ PendingModification --> AgentProcessing: Registered in synchronizer
+ AgentProcessing --> ModificationComplete: CONSTELLATION_MODIFIED event
+ ModificationComplete --> Ready: Future completed
+ Ready --> WaitingForCompletion: Next task
+
+ note right of PendingModification
+ Orchestrator blocks here
+ until modification completes
+ end note
+```
+
+## Implementation
+
+### Initialization
+
+```python
+from galaxy.session.observers import ConstellationModificationSynchronizer
+from galaxy.constellation import TaskConstellationOrchestrator
+
+# Create synchronizer with orchestrator reference
+synchronizer = ConstellationModificationSynchronizer(
+ orchestrator=orchestrator,
+ logger=logger
+)
+
+# Subscribe to event bus
+from galaxy.core.events import get_event_bus
+event_bus = get_event_bus()
+event_bus.subscribe(synchronizer)
+
+# Attach to orchestrator (for easy access)
+orchestrator.set_modification_synchronizer(synchronizer)
+```
+
+### Constructor Parameters
+
+| Parameter | Type | Description |
+|-----------|------|-------------|
+| `orchestrator` | `TaskConstellationOrchestrator` | Orchestrator to synchronize with |
+| `logger` | `logging.Logger` | Optional logger instance |
+
+### Internal State
+
+The synchronizer maintains:
+
+```python
+class ConstellationModificationSynchronizer(IEventObserver):
+ def __init__(self, orchestrator, logger=None):
+ self.orchestrator = orchestrator
+
+ # Pending modifications: task_id -> asyncio.Future
+ self._pending_modifications: Dict[str, asyncio.Future] = {}
+
+ # Current constellation being modified
+ self._current_constellation_id: Optional[str] = None
+ self._current_constellation: Optional[TaskConstellation] = None
+
+ # Timeout for modifications (safety measure)
+ self._modification_timeout = 600.0 # 10 minutes
+
+ # Statistics
+ self._stats = {
+ "total_modifications": 0,
+ "completed_modifications": 0,
+ "timeout_modifications": 0,
+ }
+```
+
+## API Reference
+
+### Main Wait Point
+
+#### wait_for_pending_modifications()
+
+Wait for all pending modifications to complete before proceeding.
+
+```python
+async def wait_for_pending_modifications(
+ self,
+ timeout: Optional[float] = None
+) -> bool
+```
+
+**Parameters:**
+
+- `timeout` — Optional timeout in seconds (uses default 600s if None)
+
+**Returns:**
+
+- `True` if all modifications completed successfully
+- `False` if timeout occurred
+
+**Usage in Orchestrator:**
+
+```python
+async def execute_constellation(self, constellation):
+ """Execute constellation with synchronized modifications."""
+
+ while True:
+ # Wait for any pending modifications
+ await self.synchronizer.wait_for_pending_modifications()
+
+ # Now safe to get ready tasks
+ ready_tasks = constellation.get_ready_tasks()
+
+ if not ready_tasks:
+ break # All tasks complete
+
+ # Execute ready tasks
+ await self._execute_tasks(ready_tasks)
+```
+
+### State Management Methods
+
+#### get_current_constellation()
+
+Get the most recent constellation state after modifications.
+
+```python
+def get_current_constellation(self) -> Optional[TaskConstellation]
+```
+
+**Returns:** Latest constellation instance or None
+
+#### has_pending_modifications()
+
+Check if any modifications are pending.
+
+```python
+def has_pending_modifications(self) -> bool
+```
+
+**Returns:** `True` if modifications pending, `False` otherwise
+
+#### get_pending_count()
+
+Get number of pending modifications.
+
+```python
+def get_pending_count(self) -> int
+```
+
+**Returns:** Count of pending modifications
+
+### Constellation State Merging
+
+#### merge_and_sync_constellation_states()
+
+Merge constellation states to preserve both structural changes and execution state.
+
+```python
+def merge_and_sync_constellation_states(
+ self,
+ orchestrator_constellation: TaskConstellation
+) -> TaskConstellation
+```
+
+**Purpose:** Prevents loss of execution state when agent modifies constellation structure.
+
+**Merge Strategy:**
+
+1. **Use agent's constellation as base** (has structural modifications)
+2. **Preserve orchestrator's execution state** for existing tasks
+3. **Priority rule**: More advanced state wins (COMPLETED > RUNNING > PENDING)
+4. **Update constellation state** after merging
+
+**Example Scenario:**
+
+```
+Before Merge:
+- Orchestrator's Task A: COMPLETED (execution state)
+- Agent's Task A: RUNNING (structural changes applied)
+
+After Merge:
+- Task A: COMPLETED (preserved from orchestrator)
+ + structural changes from agent
+```
+
+## Usage Examples
+
+### Example 1: Basic Integration
+
+```python
+from galaxy.core.events import get_event_bus
+from galaxy.session.observers import ConstellationModificationSynchronizer
+
+async def setup_synchronized_execution():
+ """Set up synchronized constellation execution."""
+
+ # Create orchestrator
+ orchestrator = TaskConstellationOrchestrator()
+
+ # Create and attach synchronizer
+ synchronizer = ConstellationModificationSynchronizer(
+ orchestrator=orchestrator,
+ logger=logger
+ )
+
+ # Subscribe to events
+ event_bus = get_event_bus()
+ event_bus.subscribe(synchronizer)
+
+ # Attach to orchestrator
+ orchestrator.set_modification_synchronizer(synchronizer)
+
+ # Execute constellation (now synchronized)
+ await orchestrator.execute_constellation(constellation)
+```
+
+### Example 2: Monitor Synchronization
+
+```python
+async def monitor_synchronization(synchronizer):
+ """Monitor synchronization status during execution."""
+
+ while True:
+ await asyncio.sleep(1)
+
+ if synchronizer.has_pending_modifications():
+ count = synchronizer.get_pending_count()
+ pending = synchronizer.get_pending_task_ids()
+ print(f"⏳ Waiting for {count} modifications: {pending}")
+ else:
+ print("✅ No pending modifications")
+
+ # Check statistics
+ stats = synchronizer.get_statistics()
+ print(f"Stats: {stats['completed_modifications']} completed, "
+ f"{stats['timeout_modifications']} timeouts")
+```
+
+### Example 3: Custom Timeout Handling
+
+```python
+# Set custom timeout (default is 600 seconds)
+synchronizer.set_modification_timeout(300.0) # 5 minutes
+
+# Wait with custom timeout
+success = await synchronizer.wait_for_pending_modifications(timeout=120.0)
+
+if not success:
+ print("⚠️ Modifications timed out, proceeding anyway")
+ # Handle timeout scenario
+ synchronizer.clear_pending_modifications() # Emergency cleanup
+```
+
+## Advanced Features
+
+### Automatic Timeout Handling
+
+The synchronizer automatically times out stuck modifications:
+
+```python
+async def _auto_complete_on_timeout(
+ self,
+ task_id: str,
+ future: asyncio.Future
+) -> None:
+ """Auto-complete a pending modification if it times out."""
+
+ await asyncio.sleep(self._modification_timeout)
+
+ if not future.done():
+ self._stats["timeout_modifications"] += 1
+ self.logger.warning(
+ f"⚠️ Modification for task '{task_id}' timed out after "
+ f"{self._modification_timeout}s. Auto-completing to prevent deadlock."
+ )
+ future.set_result(False)
+ del self._pending_modifications[task_id]
+```
+
+**Timeout Benefits:**
+
+- Prevents deadlocks if agent fails
+- Allows execution to continue
+- Logs timeout for debugging
+- Tracks timeout statistics
+
+### Dynamic Modification Tracking
+
+Handles new modifications registered during wait:
+
+```python
+async def wait_for_pending_modifications(self, timeout=None) -> bool:
+ """Wait for all pending modifications, including those added during wait."""
+
+ while self._pending_modifications:
+ # Get snapshot of current pending modifications
+ pending_tasks = list(self._pending_modifications.keys())
+ pending_futures = list(self._pending_modifications.values())
+
+ # Wait for current batch
+ await asyncio.wait_for(
+ asyncio.gather(*pending_futures, return_exceptions=True),
+ timeout=remaining_timeout
+ )
+
+ # Check if new modifications were added during wait
+ # If yes, loop again; if no, we're done
+ if not self._pending_modifications:
+ break
+
+ return True
+```
+
+## Statistics and Monitoring
+
+### Available Statistics
+
+```python
+stats = synchronizer.get_statistics()
+
+{
+ "total_modifications": 10, # Total registered
+ "completed_modifications": 9, # Successfully completed
+ "timeout_modifications": 1 # Timed out
+}
+```
+
+### Monitoring Points
+
+| Metric | Method | Description |
+|--------|--------|-------------|
+| Pending count | `get_pending_count()` | Number of pending modifications |
+| Pending tasks | `get_pending_task_ids()` | List of task IDs with pending modifications |
+| Has pending | `has_pending_modifications()` | Boolean check |
+| Statistics | `get_statistics()` | Complete stats dictionary |
+
+## Performance Considerations
+
+### Memory Usage
+
+The synchronizer stores futures for each pending modification:
+
+```python
+self._pending_modifications: Dict[str, asyncio.Future] = {}
+```
+
+**Memory Impact:**
+
+- **Low overhead**: Only stores Future objects (small)
+- **Temporary**: Cleared after completion
+- **Bounded**: Limited by concurrent task completions
+
+### Timeout Configuration
+
+Choose appropriate timeout based on constellation complexity:
+
+```python
+# Simple constellations
+synchronizer.set_modification_timeout(60.0) # 1 minute
+
+# Complex constellations with slow LLM
+synchronizer.set_modification_timeout(600.0) # 10 minutes
+
+# Very complex multi-device scenarios
+synchronizer.set_modification_timeout(1800.0) # 30 minutes
+```
+
+## Best Practices
+
+### 1. Always Attach to Orchestrator
+
+The orchestrator needs to call `wait_for_pending_modifications()`:
+
+```python
+# ✅ Good: Orchestrator can access synchronizer
+orchestrator.set_modification_synchronizer(synchronizer)
+
+# ❌ Bad: No way for orchestrator to wait
+# synchronizer exists but orchestrator doesn't use it
+```
+
+### 2. Handle Timeouts Gracefully
+
+```python
+success = await synchronizer.wait_for_pending_modifications()
+
+if not success:
+ # Log timeout
+ logger.warning("Modifications timed out")
+
+ # Get current state anyway (may be partially updated)
+ constellation = synchronizer.get_current_constellation()
+
+ # Continue execution (with caution)
+```
+
+### 3. Monitor Statistics
+
+Track synchronization health:
+
+```python
+stats = synchronizer.get_statistics()
+
+timeout_rate = (
+ stats["timeout_modifications"] / stats["total_modifications"]
+ if stats["total_modifications"] > 0
+ else 0
+)
+
+if timeout_rate > 0.1: # More than 10% timing out
+ logger.warning(f"High timeout rate: {timeout_rate:.1%}")
+ # Consider increasing timeout or investigating agent performance
+```
+
+## Related Documentation
+
+- **[Observer System Overview](overview.md)** — Architecture and design
+- **[Constellation Progress Observer](progress_observer.md)** — Task completion events
+- **[Constellation Agent](../constellation_agent/overview.md)** — Agent modification process
+
+## Summary
+
+The Constellation Modification Synchronizer:
+
+- **Prevents** race conditions between agent and orchestrator
+- **Synchronizes** constellation modifications with task execution
+- **Blocks** orchestrator until modifications complete
+- **Handles** timeouts to prevent deadlocks
+- **Merges** constellation states to preserve execution data
+
+This observer is critical for ensuring correct constellation execution when the agent dynamically modifies workflow structure during execution.
diff --git a/documents/docs/galaxy/observer/visualization_observer.md b/documents/docs/galaxy/observer/visualization_observer.md
new file mode 100644
index 000000000..cd4f8c176
--- /dev/null
+++ b/documents/docs/galaxy/observer/visualization_observer.md
@@ -0,0 +1,534 @@
+# DAG Visualization Observer
+
+The **DAGVisualizationObserver** provides real-time visual feedback during constellation execution. It displays DAG topology, task progress, and constellation modifications using rich terminal graphics.
+
+**Location:** `galaxy/session/observers/dag_visualization_observer.py`
+
+## Purpose
+
+The Visualization Observer enables developers and users to:
+
+- **See DAG Structure** — View constellation topology and task dependencies
+- **Monitor Progress** — Track task execution in real-time
+- **Observe Modifications** — Visualize how the constellation changes
+- **Debug Issues** — Identify bottlenecks and failed tasks visually
+
+## Architecture
+
+The observer uses a **delegation pattern** with specialized handlers:
+
+```mermaid
+graph TB
+ subgraph "Main Observer"
+ DVO[DAGVisualizationObserver]
+ CE[Constellation Events]
+ TE[Task Events]
+ end
+
+ subgraph "Specialized Handlers"
+ CVH[ConstellationVisualizationHandler]
+ TVH[TaskVisualizationHandler]
+ end
+
+ subgraph "Display Components"
+ CD[ConstellationDisplay]
+ TD[TaskDisplay]
+ DV[DAGVisualizer]
+ end
+
+ DVO --> CE
+ DVO --> TE
+ CE --> CVH
+ TE --> TVH
+
+ CVH --> CD
+ CVH --> DV
+ TVH --> TD
+ TVH --> DV
+
+ style DVO fill:#66bb6a,stroke:#333,stroke-width:3px
+ style CVH fill:#ffa726,stroke:#333,stroke-width:2px
+ style TVH fill:#ffa726,stroke:#333,stroke-width:2px
+```
+
+**Component Responsibilities:**
+
+| Component | Role | Handled Events |
+|-----------|------|----------------|
+| **DAGVisualizationObserver** | Main coordinator, routes events | All constellation and task events |
+| **ConstellationVisualizationHandler** | Handles constellation-level displays | CONSTELLATION_STARTED, COMPLETED, MODIFIED |
+| **TaskVisualizationHandler** | Handles task-level displays | TASK_STARTED, COMPLETED, FAILED |
+| **DAGVisualizer** | Renders complex DAG visualizations | Used by handlers for topology |
+| **ConstellationDisplay** | Renders constellation information | Used by handler for constellation events |
+| **TaskDisplay** | Renders task information | Used by handler for task events |
+
+## Implementation
+
+### Initialization
+
+```python
+from galaxy.session.observers import DAGVisualizationObserver
+from rich.console import Console
+
+# Create visualization observer
+viz_observer = DAGVisualizationObserver(
+ enable_visualization=True,
+ console=Console() # Optional: provide custom console
+)
+
+# Subscribe to event bus
+from galaxy.core.events import get_event_bus
+event_bus = get_event_bus()
+event_bus.subscribe(viz_observer)
+```
+
+**Constructor Parameters:**
+
+| Parameter | Type | Default | Description |
+|-----------|------|---------|-------------|
+| `enable_visualization` | `bool` | `True` | Whether to enable visualization |
+| `console` | `rich.Console` | `None` | Optional rich console for output |
+
+### Disabling Visualization
+
+Visualization can be toggled at runtime:
+
+```python
+# Disable visualization temporarily
+viz_observer.set_visualization_enabled(False)
+
+# Re-enable
+viz_observer.set_visualization_enabled(True)
+```
+
+## Visualization Types
+
+The observer produces several types of visualizations:
+
+### 1. Constellation Started
+
+Displays when a constellation begins execution:
+
+```
+╭──────────────────────────────────────────────────────────────╮
+│ 🌟 Constellation Started: email_batch_constellation │
+├──────────────────────────────────────────────────────────────┤
+│ ID: const_abc123 │
+│ Total Tasks: 8 │
+│ Status: ACTIVE │
+│ Parallel Capacity: 3 │
+╰──────────────────────────────────────────────────────────────╯
+```
+
+Followed by DAG topology:
+
+```mermaid
+graph TD
+ fetch_emails[Fetch Emails]
+ parse_1[Parse Email 1]
+ parse_2[Parse Email 2]
+ parse_3[Parse Email 3]
+ reply_1[Reply Email 1]
+ reply_2[Reply Email 2]
+ reply_3[Reply Email 3]
+ summarize[Summarize Results]
+
+ fetch_emails --> parse_1
+ fetch_emails --> parse_2
+ fetch_emails --> parse_3
+ parse_1 --> reply_1
+ parse_2 --> reply_2
+ parse_3 --> reply_3
+ reply_1 --> summarize
+ reply_2 --> summarize
+ reply_3 --> summarize
+```
+
+### 2. Task Progress
+
+Displays task execution events:
+
+**Task Started:**
+```
+▶ Task Started: parse_email_1
+ └─ Type: parse_email
+ └─ Device: windows_pc_001
+ └─ Priority: MEDIUM
+```
+
+**Task Completed:**
+```
+✅ Task Completed: parse_email_1
+ Duration: 2.3s
+ Result: Parsed 1 email with 2 attachments
+ Newly Ready: [reply_email_1]
+```
+
+**Task Failed:**
+```
+❌ Task Failed: parse_email_2
+ Duration: 1.8s
+ Error: NetworkTimeout: Failed to connect to email server
+ Retry: 1/3
+ Newly Ready: []
+```
+
+### 3. Constellation Modified
+
+Shows structural changes to the constellation:
+
+```
+🔄 Constellation Modified: email_batch_constellation
+ Modification Type: add_tasks
+ On Task: parse_email_1
+
+ Changes:
+ ├─ Tasks Added: 2
+ │ └─ extract_attachment_1
+ │ └─ extract_attachment_2
+ ├─ Dependencies Added: 2
+ │ └─ parse_email_1 → extract_attachment_1
+ │ └─ parse_email_1 → extract_attachment_2
+ └─ Tasks Modified: 1
+ └─ reply_email_1 (dependencies updated)
+```
+
+Followed by updated DAG topology showing new tasks.
+
+### 4. Execution Flow
+
+Shows current execution state (for smaller constellations):
+
+```
+Execution Flow:
+┏━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━┳━━━━━━━━━┳━━━━━━━━━━┓
+┃ Task ID ┃ Status ┃ Device ┃ Duration ┃
+┡━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━╇━━━━━━━━━╇━━━━━━━━━━┩
+│ fetch_emails │ COMPLETED │ win_001 │ 1.2s │
+│ parse_email_1 │ RUNNING │ win_001 │ 0.8s... │
+│ parse_email_2 │ RUNNING │ mac_002 │ 0.5s... │
+│ parse_email_3 │ PENDING │ - │ - │
+│ reply_email_1 │ PENDING │ - │ - │
+└─────────────────┴───────────┴─────────┴──────────┘
+```
+
+## Event Handling Flow
+
+```mermaid
+sequenceDiagram
+ participant O as Orchestrator
+ participant EB as EventBus
+ participant DVO as DAGVisualizationObserver
+ participant CVH as ConstellationHandler
+ participant TVH as TaskHandler
+ participant D as Display Components
+
+ O->>EB: CONSTELLATION_STARTED
+ EB->>DVO: on_event(event)
+ DVO->>CVH: handle_constellation_event()
+ CVH->>D: Display constellation start
+ CVH->>D: Display DAG topology
+
+ O->>EB: TASK_STARTED
+ EB->>DVO: on_event(event)
+ DVO->>TVH: handle_task_event()
+ TVH->>D: Display task start
+
+ O->>EB: TASK_COMPLETED
+ EB->>DVO: on_event(event)
+ DVO->>TVH: handle_task_event()
+ TVH->>D: Display task completion
+ TVH->>D: Display execution flow
+
+ Note over O: Agent modifies constellation
+
+ O->>EB: CONSTELLATION_MODIFIED
+ EB->>DVO: on_event(event)
+ DVO->>CVH: handle_constellation_event()
+ CVH->>D: Display modifications
+ CVH->>D: Display updated topology
+```
+
+## API Reference
+
+### Main Observer Methods
+
+#### Constructor
+
+```python
+def __init__(
+ self,
+ enable_visualization: bool = True,
+ console=None
+)
+```
+
+**Parameters:**
+
+- `enable_visualization` — Enable/disable visualization output
+- `console` — Optional `rich.Console` for output control
+
+#### set_visualization_enabled()
+
+Toggle visualization at runtime:
+
+```python
+def set_visualization_enabled(self, enabled: bool) -> None
+```
+
+**Example:**
+
+```python
+# Disable during bulk operations
+viz_observer.set_visualization_enabled(False)
+await orchestrator.execute_constellation(constellation)
+
+# Re-enable for interactive use
+viz_observer.set_visualization_enabled(True)
+```
+
+### Constellation Management
+
+#### register_constellation()
+
+Manually register a constellation for visualization:
+
+```python
+def register_constellation(
+ self,
+ constellation_id: str,
+ constellation: TaskConstellation
+) -> None
+```
+
+**Use Case:** Pre-register constellations before execution starts.
+
+#### get_constellation()
+
+Retrieve stored constellation reference:
+
+```python
+def get_constellation(self, constellation_id: str) -> Optional[TaskConstellation]
+```
+
+#### clear_constellations()
+
+Clear all stored constellation references:
+
+```python
+def clear_constellations(self) -> None
+```
+
+## Customization
+
+### Custom Console
+
+Provide custom Rich console for output control:
+
+```python
+from rich.console import Console
+
+# Console with custom width and theme
+custom_console = Console(
+ width=120,
+ theme=my_custom_theme,
+ record=True # Enable recording for export
+)
+
+viz_observer = DAGVisualizationObserver(
+ enable_visualization=True,
+ console=custom_console
+)
+```
+
+### Selective Visualization
+
+Visualize only specific event types:
+
+```python
+from galaxy.core.events import EventType
+
+# Subscribe to specific events only
+event_bus.subscribe(viz_observer, {
+ EventType.CONSTELLATION_STARTED,
+ EventType.CONSTELLATION_MODIFIED,
+ EventType.TASK_FAILED # Only show failures
+})
+```
+
+## Usage Examples
+
+### Example 1: Basic Visualization
+
+```python
+from galaxy.session.observers import DAGVisualizationObserver
+from galaxy.core.events import get_event_bus
+
+async def visualize_execution():
+ """Execute constellation with visualization."""
+
+ # Create and subscribe visualization observer
+ viz_observer = DAGVisualizationObserver(enable_visualization=True)
+ event_bus = get_event_bus()
+ event_bus.subscribe(viz_observer)
+
+ # Execute constellation (visualization happens automatically)
+ await orchestrator.execute_constellation(constellation)
+
+ # Clean up
+ event_bus.unsubscribe(viz_observer)
+```
+
+### Example 2: Conditional Visualization
+
+```python
+async def execute_with_conditional_viz(constellation, verbose: bool = False):
+ """Execute with visualization only if verbose mode enabled."""
+
+ viz_observer = DAGVisualizationObserver(enable_visualization=verbose)
+ event_bus = get_event_bus()
+
+ if verbose:
+ event_bus.subscribe(viz_observer)
+
+ try:
+ await orchestrator.execute_constellation(constellation)
+ finally:
+ if verbose:
+ event_bus.unsubscribe(viz_observer)
+```
+
+### Example 3: Export Visualization
+
+```python
+from rich.console import Console
+
+async def execute_and_export_visualization():
+ """Execute constellation and export visualization to HTML."""
+
+ # Create console with recording enabled
+ console = Console(record=True, width=120)
+ viz_observer = DAGVisualizationObserver(
+ enable_visualization=True,
+ console=console
+ )
+
+ event_bus = get_event_bus()
+ event_bus.subscribe(viz_observer)
+
+ try:
+ await orchestrator.execute_constellation(constellation)
+ finally:
+ event_bus.unsubscribe(viz_observer)
+
+ # Export recorded output to HTML
+ console.save_html("execution_visualization.html")
+ print("Visualization saved to execution_visualization.html")
+```
+
+### Example 4: Multiple Constellations
+
+```python
+async def visualize_multiple_constellations():
+ """Visualize multiple constellation executions."""
+
+ viz_observer = DAGVisualizationObserver(enable_visualization=True)
+ event_bus = get_event_bus()
+ event_bus.subscribe(viz_observer)
+
+ try:
+ for constellation in constellations:
+ print(f"\n{'='*60}")
+ print(f"Executing: {constellation.name}")
+ print(f"{'='*60}\n")
+
+ await orchestrator.execute_constellation(constellation)
+
+ # Clear constellation references between executions
+ viz_observer.clear_constellations()
+ finally:
+ event_bus.unsubscribe(viz_observer)
+```
+
+## Performance Considerations
+
+### Visualization Overhead
+
+Visualization adds minimal overhead:
+
+- **Small DAGs** (< 10 tasks): Negligible impact
+- **Medium DAGs** (10-50 tasks): < 1% overhead
+- **Large DAGs** (> 50 tasks): Topology rendering may be slow
+
+### Optimization Strategies
+
+```python
+# Strategy 1: Disable for large constellations
+if constellation.task_count > 50:
+ viz_observer.set_visualization_enabled(False)
+
+# Strategy 2: Subscribe to fewer events
+event_bus.subscribe(viz_observer, {
+ EventType.CONSTELLATION_STARTED,
+ EventType.CONSTELLATION_COMPLETED,
+ EventType.TASK_FAILED # Only show problems
+})
+
+# Strategy 3: Conditional topology display
+# (Handler automatically skips topology for constellations > 10 tasks)
+```
+
+## Best Practices
+
+### 1. Enable for Interactive Sessions
+
+```python
+# ✅ Good: Interactive development/debugging
+if __name__ == "__main__":
+ viz_observer = DAGVisualizationObserver(enable_visualization=True)
+ # ...
+
+# ✅ Good: Batch processing
+if running_in_batch_mode:
+ viz_observer = DAGVisualizationObserver(enable_visualization=False)
+```
+
+### 2. Clean Up Constellation References
+
+```python
+# After processing many constellations
+for constellation in constellation_list:
+ await orchestrator.execute_constellation(constellation)
+ viz_observer.clear_constellations() # Free memory
+```
+
+### 3. Export for Documentation
+
+```python
+# Record visualization for documentation/reports
+console = Console(record=True)
+viz_observer = DAGVisualizationObserver(console=console)
+
+# ... execute constellation ...
+
+# Export
+console.save_html("docs/execution_example.html")
+console.save_text("logs/execution.txt")
+```
+
+## Related Documentation
+
+- **[Observer System Overview](overview.md)** — Architecture and design
+- **[Progress Observer](progress_observer.md)** — Task completion tracking
+
+## Summary
+
+The DAG Visualization Observer:
+
+- **Displays** constellation structure and execution progress
+- **Delegates** to specialized handlers for clean separation
+- **Uses** Rich terminal graphics for beautiful output
+- **Supports** conditional enabling/disabling
+- **Exports** visualization for documentation
+
+This observer is essential for understanding and debugging constellation execution, providing intuitive visual feedback for complex DAG workflows.
diff --git a/documents/mkdocs.yml b/documents/mkdocs.yml
index 2649a510a..db2e3f310 100644
--- a/documents/mkdocs.yml
+++ b/documents/mkdocs.yml
@@ -72,6 +72,14 @@ nav:
- Batched Editing: galaxy/constellation_orchestrator/batched_editing.md
- Constellation Manager: galaxy/constellation_orchestrator/constellation_manager.md
- API Reference: galaxy/constellation_orchestrator/api_reference.md
+ - Observer System:
+ - Overview: galaxy/observer/overview.md
+ - Event System: galaxy/observer/event_system.md
+ - Progress Observer: galaxy/observer/progress_observer.md
+ - Agent Output Observer: galaxy/observer/agent_output_observer.md
+ - Synchronizer: galaxy/observer/synchronizer.md
+ - Metrics Observer: galaxy/observer/metrics_observer.md
+ - Visualization Observer: galaxy/observer/visualization_observer.md
- Evaluation & Logging:
- Trajectory Report: galaxy/evaluation/trajectory_report.md
- Performance Metrics: galaxy/evaluation/performance_metrics.md
diff --git a/galaxy/client/DEVICE_EVENTS.md b/galaxy/client/DEVICE_EVENTS.md
deleted file mode 100644
index 5142383b9..000000000
--- a/galaxy/client/DEVICE_EVENTS.md
+++ /dev/null
@@ -1,311 +0,0 @@
-# 设备事件系统使用指南
-
-## 概述
-
-设备事件系统允许你监听和响应设备管理器中的设备连接、断连和状态变化事件。所有事件都包含完整的设备注册表快照,让你可以随时了解所有设备的当前状态。
-
-## 事件类型
-
-系统提供三种设备相关事件:
-
-1. **DEVICE_CONNECTED** - 设备成功连接时触发
-2. **DEVICE_DISCONNECTED** - 设备断开连接时触发
-3. **DEVICE_STATUS_CHANGED** - 设备状态改变时触发(例如:IDLE ↔ BUSY)
-
-## DeviceEvent 结构
-
-所有设备事件都使用 `DeviceEvent` 类,包含以下字段:
-
-```python
-@dataclass
-class DeviceEvent(Event):
- device_id: str # 触发事件的设备ID
- device_status: str # 当前设备状态
- device_info: Dict[str, Any] # 当前设备的详细信息
- all_devices: Dict[str, Dict[str, Any]] # 所有设备的状态快照
-```
-
-### device_info 字段内容
-
-```python
-{
- "device_id": "device_001",
- "status": "idle",
- "os": "Windows",
- "server_url": "ws://localhost:8000",
- "capabilities": ["ui_control", "file_access"],
- "metadata": {...},
- "last_heartbeat": "2025-11-08T10:30:00",
- "connection_attempts": 0,
- "max_retries": 5,
- "current_task_id": None
-}
-```
-
-### all_devices 字段内容
-
-包含 device_registry 中所有设备的状态信息:
-
-```python
-{
- "device_001": {
- "device_id": "device_001",
- "status": "idle",
- "os": "Windows",
- ...
- },
- "device_002": {
- "device_id": "device_002",
- "status": "busy",
- "os": "macOS",
- ...
- }
-}
-```
-
-## 使用示例
-
-### 1. 创建设备事件观察者
-
-```python
-from galaxy.core.events import IEventObserver, EventType, DeviceEvent
-
-class DeviceMonitor(IEventObserver):
- """监控设备连接状态的观察者"""
-
- async def on_event(self, event):
- if isinstance(event, DeviceEvent):
- if event.event_type == EventType.DEVICE_CONNECTED:
- await self._handle_device_connected(event)
- elif event.event_type == EventType.DEVICE_DISCONNECTED:
- await self._handle_device_disconnected(event)
- elif event.event_type == EventType.DEVICE_STATUS_CHANGED:
- await self._handle_device_status_changed(event)
-
- async def _handle_device_connected(self, event: DeviceEvent):
- print(f"✅ Device {event.device_id} connected")
- print(f" OS: {event.device_info['os']}")
- print(f" Total devices: {len(event.all_devices)}")
-
- async def _handle_device_disconnected(self, event: DeviceEvent):
- print(f"❌ Device {event.device_id} disconnected")
- print(f" Remaining devices: {len(event.all_devices) - 1}")
-
- async def _handle_device_status_changed(self, event: DeviceEvent):
- print(f"🔄 Device {event.device_id} status: {event.device_status}")
- if event.device_status == "busy":
- task_id = event.device_info.get("current_task_id")
- print(f" Executing task: {task_id}")
-```
-
-### 2. 订阅设备事件
-
-```python
-from galaxy.core.events import get_event_bus, EventType
-from galaxy.client.device_manager import ConstellationDeviceManager
-
-# 创建设备管理器
-manager = ConstellationDeviceManager()
-
-# 创建观察者
-monitor = DeviceMonitor()
-
-# 获取事件总线并订阅设备事件
-event_bus = get_event_bus()
-event_bus.subscribe(
- monitor,
- event_types={
- EventType.DEVICE_CONNECTED,
- EventType.DEVICE_DISCONNECTED,
- EventType.DEVICE_STATUS_CHANGED,
- }
-)
-
-# 或者订阅所有事件
-# event_bus.subscribe(monitor) # 订阅所有事件
-```
-
-### 3. 监控所有设备状态
-
-```python
-class DeviceRegistryMonitor(IEventObserver):
- """实时监控设备注册表的完整状态"""
-
- def __init__(self):
- self.device_history = []
-
- async def on_event(self, event):
- if isinstance(event, DeviceEvent):
- # 记录设备注册表快照
- snapshot = {
- "timestamp": event.timestamp,
- "event_type": event.event_type.value,
- "triggered_by": event.device_id,
- "all_devices": event.all_devices.copy()
- }
- self.device_history.append(snapshot)
-
- # 分析设备状态分布
- status_counts = {}
- for device_id, device_info in event.all_devices.items():
- status = device_info["status"]
- status_counts[status] = status_counts.get(status, 0) + 1
-
- print(f"📊 Device Status Distribution:")
- for status, count in status_counts.items():
- print(f" {status}: {count}")
-```
-
-### 4. WebSocket 实时推送(示例)
-
-```python
-from galaxy.webui.websocket_observer import WebSocketObserver
-
-class DeviceWebSocketObserver(WebSocketObserver):
- """将设备事件推送到 Web UI"""
-
- async def on_event(self, event):
- if isinstance(event, DeviceEvent):
- # 准备发送给前端的数据
- message = {
- "type": "device_event",
- "event_type": event.event_type.value,
- "device_id": event.device_id,
- "device_status": event.device_status,
- "device_info": event.device_info,
- "all_devices": event.all_devices,
- "timestamp": event.timestamp
- }
-
- # 广播给所有连接的 WebSocket 客户端
- await self.broadcast(message)
-```
-
-### 5. 设备负载均衡器
-
-```python
-class DeviceLoadBalancer(IEventObserver):
- """根据设备状态进行负载均衡"""
-
- def __init__(self):
- self.idle_devices = []
-
- async def on_event(self, event):
- if isinstance(event, DeviceEvent):
- # 更新空闲设备列表
- self.idle_devices = [
- device_id
- for device_id, device_info in event.all_devices.items()
- if device_info["status"] == "idle"
- ]
-
- print(f"💡 Available devices: {len(self.idle_devices)}")
-
- def get_next_available_device(self):
- """获取下一个可用设备(简单轮询)"""
- if self.idle_devices:
- return self.idle_devices[0]
- return None
-```
-
-## 事件触发时机
-
-### DEVICE_CONNECTED
-
-- 设备成功连接并完成初始化
-- 重连成功后
-- 设备状态已设置为 IDLE
-
-### DEVICE_DISCONNECTED
-
-- 主动断开连接(调用 `disconnect_device()`)
-- 检测到设备异常断开
-- 连接丢失或超时
-
-### DEVICE_STATUS_CHANGED
-
-- 设备开始执行任务(IDLE → BUSY)
-- 设备完成任务(BUSY → IDLE)
-- 任务失败或超时(BUSY → IDLE)
-
-## 注意事项
-
-1. **事件是异步的** - 所有事件处理函数必须是 async 函数
-2. **包含完整快照** - 每个事件都包含所有设备的状态,无需额外查询
-3. **事件顺序** - 事件按发生顺序发布,但处理可能并发执行
-4. **错误处理** - 观察者中的异常不会影响其他观察者或事件发布者
-
-## 完整示例
-
-```python
-import asyncio
-from galaxy.client.device_manager import ConstellationDeviceManager
-from galaxy.core.events import get_event_bus, EventType, IEventObserver, DeviceEvent
-
-class DeviceLogger(IEventObserver):
- async def on_event(self, event):
- if isinstance(event, DeviceEvent):
- print(f"\n{'='*60}")
- print(f"Event: {event.event_type.value}")
- print(f"Device: {event.device_id}")
- print(f"Status: {event.device_status}")
- print(f"Total Devices: {len(event.all_devices)}")
- print(f"{'='*60}\n")
-
-async def main():
- # 创建设备管理器
- manager = ConstellationDeviceManager()
-
- # 创建并订阅观察者
- logger = DeviceLogger()
- event_bus = get_event_bus()
- event_bus.subscribe(
- logger,
- event_types={
- EventType.DEVICE_CONNECTED,
- EventType.DEVICE_DISCONNECTED,
- EventType.DEVICE_STATUS_CHANGED,
- }
- )
-
- # 注册设备
- await manager.register_device(
- device_id="device_001",
- server_url="ws://localhost:8000",
- os="Windows",
- capabilities=["ui_control"]
- )
-
- # 执行任务
- await manager.assign_task_to_device(
- task_id="task_001",
- device_id="device_001",
- task_description="Test task",
- task_data={}
- )
-
- # 断开连接
- await manager.disconnect_device("device_001")
-
-if __name__ == "__main__":
- asyncio.run(main())
-```
-
-## 调试技巧
-
-启用详细日志查看事件发布过程:
-
-```python
-import logging
-
-logging.basicConfig(level=logging.DEBUG)
-logger = logging.getLogger("galaxy.client.device_manager")
-logger.setLevel(logging.DEBUG)
-```
-
-查看事件发布日志:
-```
-📢 Published device_connected event for device device_001
-📢 Published device_status_changed event for device device_001
-📢 Published device_disconnected event for device device_001
-```
diff --git a/galaxy/client/DEVICE_EVENTS_SUMMARY.md b/galaxy/client/DEVICE_EVENTS_SUMMARY.md
deleted file mode 100644
index 84a1f6d60..000000000
--- a/galaxy/client/DEVICE_EVENTS_SUMMARY.md
+++ /dev/null
@@ -1,175 +0,0 @@
-# 设备事件系统实现总结
-
-## ✅ 已完成的工作
-
-### 1. 事件系统扩展 (`galaxy/core/events.py`)
-
-#### 新增事件类型
-在 `EventType` 枚举中添加了三种设备相关事件:
-- `DEVICE_CONNECTED` - 设备连接事件
-- `DEVICE_DISCONNECTED` - 设备断连事件
-- `DEVICE_STATUS_CHANGED` - 设备状态改变事件
-
-#### 新增事件类
-创建了 `DeviceEvent` 数据类,包含:
-- `device_id`: 触发事件的设备ID
-- `device_status`: 当前设备状态
-- `device_info`: 当前设备的完整信息
-- `all_devices`: **device_registry 中所有设备的状态快照**
-
-### 2. 设备管理器集成 (`galaxy/client/device_manager.py`)
-
-#### 新增方法
-
-**`_get_device_registry_snapshot()`**
-- 创建所有设备的完整状态快照
-- 包含每个设备的状态、配置、心跳、任务等信息
-
-**`_publish_device_event()`**
-- 发布设备事件到事件总线
-- 自动附加设备注册表快照
-- 包含完整的设备上下文信息
-
-#### 事件发布时机
-
-**DEVICE_CONNECTED 事件**
-- 位置:`connect_device()` 方法
-- 时机:设备成功连接并设置为 IDLE 状态后
-- 包含:新连接设备信息 + 所有设备快照
-
-**DEVICE_DISCONNECTED 事件**
-- 位置:`disconnect_device()` 和 `_handle_device_disconnection()` 方法
-- 时机:设备断开连接并更新状态后
-- 包含:断开设备信息 + 所有设备快照
-
-**DEVICE_STATUS_CHANGED 事件**
-- 位置:`_execute_task_on_device()` 方法(finally 块)
-- 时机:设备状态改变时(IDLE ↔ BUSY)
- - 任务开始执行时(IDLE → BUSY)
- - 任务完成/失败/超时后(BUSY → IDLE)
-- 包含:状态变化的设备信息 + 所有设备快照
-
-### 3. 测试套件 (`tests/galaxy/client/test_device_events.py`)
-
-创建了完整的测试套件,包含 4 个测试:
-
-✅ `test_device_connected_event` - 验证设备连接事件
-✅ `test_device_disconnected_event` - 验证设备断连事件
-✅ `test_device_status_changed_event` - 验证设备状态改变事件
-✅ `test_device_registry_snapshot_in_events` - 验证设备快照功能
-
-**测试结果:4 passed in 14.02s** 🎉
-
-### 4. 文档和示例
-
-#### 使用指南 (`galaxy/client/DEVICE_EVENTS.md`)
-- 事件类型详细说明
-- DeviceEvent 结构文档
-- 多个实际使用示例
-- 事件触发时机说明
-- 调试技巧
-
-#### 演示脚本 (`galaxy/client/demo_device_events.py`)
-- 可执行的演示代码
-- 展示如何订阅和处理设备事件
-- 包含详细的日志输出
-
-## 📊 事件数据结构
-
-每个 `DeviceEvent` 包含的 `all_devices` 字段结构:
-
-```python
-{
- "device_001": {
- "device_id": "device_001",
- "status": "idle", # 设备状态
- "os": "Windows", # 操作系统
- "server_url": "ws://...", # WebSocket URL
- "capabilities": [...], # 能力列表
- "metadata": {...}, # 元数据
- "last_heartbeat": "2025-...", # 最后心跳时间
- "connection_attempts": 0, # 连接尝试次数
- "max_retries": 5, # 最大重试次数
- "current_task_id": None # 当前任务ID
- },
- "device_002": { ... },
- ...
-}
-```
-
-## 🎯 核心特性
-
-1. **完整的设备状态快照** - 每个事件都包含所有设备的当前状态
-2. **异步事件发布** - 不阻塞设备管理器的主要操作
-3. **解耦设计** - 观察者可以独立订阅和处理事件
-4. **错误隔离** - 观察者异常不影响事件发布者
-5. **丰富的上下文** - 事件包含完整的设备信息和快照
-
-## 📝 使用示例
-
-```python
-from galaxy.core.events import get_event_bus, EventType, DeviceEvent, IEventObserver
-from galaxy.client.device_manager import ConstellationDeviceManager
-
-class MyDeviceMonitor(IEventObserver):
- async def on_event(self, event):
- if isinstance(event, DeviceEvent):
- print(f"Event: {event.event_type.value}")
- print(f"Device: {event.device_id}")
- print(f"Total devices: {len(event.all_devices)}")
-
-# 创建管理器和观察者
-manager = ConstellationDeviceManager()
-monitor = MyDeviceMonitor()
-
-# 订阅事件
-get_event_bus().subscribe(
- monitor,
- event_types={
- EventType.DEVICE_CONNECTED,
- EventType.DEVICE_DISCONNECTED,
- EventType.DEVICE_STATUS_CHANGED,
- }
-)
-
-# 现在所有设备事件都会自动通知 monitor
-```
-
-## 🔍 验证方法
-
-运行测试套件:
-```bash
-python -m pytest tests/galaxy/client/test_device_events.py -v
-```
-
-运行演示脚本:
-```bash
-python -m galaxy.client.demo_device_events
-```
-
-## 📁 修改的文件
-
-1. `galaxy/core/events.py` - 添加设备事件类型和 DeviceEvent 类
-2. `galaxy/client/device_manager.py` - 集成事件发布功能
-3. `tests/galaxy/client/test_device_events.py` - 完整测试套件(新建)
-4. `galaxy/client/DEVICE_EVENTS.md` - 使用指南(新建)
-5. `galaxy/client/demo_device_events.py` - 演示脚本(新建)
-
-## ✨ 下一步可以做的
-
-1. 在 WebUI 中集成设备事件显示
-2. 添加设备事件持久化/日志记录
-3. 实现基于事件的设备负载均衡
-4. 添加设备健康监控和告警
-5. 创建设备状态变化的时间线可视化
-
-## 🎉 总结
-
-成功在事件系统中添加了完整的设备事件支持,包括:
-- ✅ 3种设备事件类型
-- ✅ 设备注册表完整快照
-- ✅ 自动事件发布机制
-- ✅ 完整测试覆盖
-- ✅ 详细文档和示例
-
-所有功能都已测试通过,可以立即使用!🚀