From 169175a9bce40316100cc1bb36b45b872d4fb646 Mon Sep 17 00:00:00 2001 From: Ali Syed Date: Fri, 19 Dec 2025 14:20:39 +0000 Subject: [PATCH] feat: Use builtin claude-code agent for net-edge evals Summary: - Add Claude Code agent configuration for running NetEdge eval scenarios - Include 5 eval definitions matching the Gemini agent structure. 6th eval needs more investigation - Update README Assisted with Claude Code --- examples/net-edge/README.md | 22 ++++++++++++++++--- .../eval_1_selector-mismatch.yaml | 14 ++++++++++++ .../claude-code-agent/eval_2_nxdomain.yaml | 14 ++++++++++++ .../eval_3_networkpolicy.yaml | 14 ++++++++++++ .../eval_4_reencrypt-tls.yaml | 14 ++++++++++++ .../eval_5_loadbalancer.yaml | 14 ++++++++++++ examples/net-edge/mcp-config.yaml | 2 ++ pkg/agent/claude_code.go | 13 ++++++++++- 8 files changed, 103 insertions(+), 4 deletions(-) create mode 100644 examples/net-edge/claude-code-agent/eval_1_selector-mismatch.yaml create mode 100644 examples/net-edge/claude-code-agent/eval_2_nxdomain.yaml create mode 100644 examples/net-edge/claude-code-agent/eval_3_networkpolicy.yaml create mode 100644 examples/net-edge/claude-code-agent/eval_4_reencrypt-tls.yaml create mode 100644 examples/net-edge/claude-code-agent/eval_5_loadbalancer.yaml diff --git a/examples/net-edge/README.md b/examples/net-edge/README.md index 9e06a3d..d7b4ef2 100644 --- a/examples/net-edge/README.md +++ b/examples/net-edge/README.md @@ -11,7 +11,9 @@ net-edge/ ├── mcp-config.yaml # Launches the gen-mcp NetEdge server for gevals ├── codex-agent/ │ ├── agent.yaml # Codex CLI wiring -│ └── eval.yaml # Eval definition (scenario 1) +│ └── eval_*.yaml # Eval definitions +├── claude-code-agent/ +│ └── eval_*.yaml # Eval definitions (uses builtin claude-code agent) └── tasks/ └── selector-mismatch/ # Task definition and helper scripts ├── selector-mismatch.yaml @@ -57,7 +59,7 @@ Provide the key at runtime, for example: export OPENAI_API_KEY=sk-... ``` -## Running the eval +## Running with Codex 1. Build the project (from repo root): `make build` 2. Ensure your current shell can reach the OpenShift cluster (`oc whoami` should succeed). @@ -65,7 +67,21 @@ export OPENAI_API_KEY=sk-... 4. Run the evaluation: ```bash - ./gevals run examples/net-edge/codex-agent/eval.yaml + ./gevals eval examples/net-edge/codex-agent/eval_1_selector-mismatch.yaml + ``` + +## Running with Claude Code + +1. Build the project (from repo root): `make build` +2. Ensure `claude` CLI is in your `PATH`. +3. Ensure your current shell can reach the OpenShift cluster (`oc whoami` should succeed). +4. **Authentication:** If using Vertex AI, ensure you have GCP credentials configured: + - Set `GOOGLE_APPLICATION_CREDENTIALS` environment variable, OR + - Run `gcloud auth application-default login` +5. Run the evaluation: + + ```bash + ./gevals eval examples/net-edge/claude-code-agent/eval_1_selector-mismatch.yaml ``` `setup.sh` deploys the hello workload, then intentionally breaks the Service selector so the Route loses its diff --git a/examples/net-edge/claude-code-agent/eval_1_selector-mismatch.yaml b/examples/net-edge/claude-code-agent/eval_1_selector-mismatch.yaml new file mode 100644 index 0000000..e5a1db9 --- /dev/null +++ b/examples/net-edge/claude-code-agent/eval_1_selector-mismatch.yaml @@ -0,0 +1,14 @@ +kind: Eval +metadata: + name: "claude-code-netedge-selector-mismatch" +config: + agent: + type: "builtin.claude-code" + mcpConfigFile: ../mcp-config.yaml + taskSets: + - path: ../tasks/selector-mismatch/selector-mismatch.yaml + assertions: + toolsUsed: + - server: netedge + minToolCalls: 1 + maxToolCalls: 20 diff --git a/examples/net-edge/claude-code-agent/eval_2_nxdomain.yaml b/examples/net-edge/claude-code-agent/eval_2_nxdomain.yaml new file mode 100644 index 0000000..2af9291 --- /dev/null +++ b/examples/net-edge/claude-code-agent/eval_2_nxdomain.yaml @@ -0,0 +1,14 @@ +kind: Eval +metadata: + name: "claude-code-netedge-nxdomain-host" +config: + agent: + type: "builtin.claude-code" + mcpConfigFile: ../mcp-config.yaml + taskSets: + - path: ../tasks/nxdomain-host/nxdomain-host.yaml + assertions: + toolsUsed: + - server: netedge + minToolCalls: 1 + maxToolCalls: 20 diff --git a/examples/net-edge/claude-code-agent/eval_3_networkpolicy.yaml b/examples/net-edge/claude-code-agent/eval_3_networkpolicy.yaml new file mode 100644 index 0000000..73080b2 --- /dev/null +++ b/examples/net-edge/claude-code-agent/eval_3_networkpolicy.yaml @@ -0,0 +1,14 @@ +kind: Eval +metadata: + name: "claude-code-netedge-networkpolicy-block" +config: + agent: + type: "builtin.claude-code" + mcpConfigFile: ../mcp-config.yaml + taskSets: + - path: ../tasks/networkpolicy-block/networkpolicy-block.yaml + assertions: + toolsUsed: + - server: netedge + minToolCalls: 1 + maxToolCalls: 20 diff --git a/examples/net-edge/claude-code-agent/eval_4_reencrypt-tls.yaml b/examples/net-edge/claude-code-agent/eval_4_reencrypt-tls.yaml new file mode 100644 index 0000000..d710033 --- /dev/null +++ b/examples/net-edge/claude-code-agent/eval_4_reencrypt-tls.yaml @@ -0,0 +1,14 @@ +kind: Eval +metadata: + name: "claude-code-netedge-reencrypt-no-backend-tls" +config: + agent: + type: "builtin.claude-code" + mcpConfigFile: ../mcp-config.yaml + taskSets: + - path: ../tasks/reencrypt-no-backend-tls/reencrypt-no-backend-tls.yaml + assertions: + toolsUsed: + - server: netedge + minToolCalls: 1 + maxToolCalls: 20 diff --git a/examples/net-edge/claude-code-agent/eval_5_loadbalancer.yaml b/examples/net-edge/claude-code-agent/eval_5_loadbalancer.yaml new file mode 100644 index 0000000..473bbcb --- /dev/null +++ b/examples/net-edge/claude-code-agent/eval_5_loadbalancer.yaml @@ -0,0 +1,14 @@ +kind: Eval +metadata: + name: "claude-code-netedge-loadbalancer-missing" +config: + agent: + type: "builtin.claude-code" + mcpConfigFile: ../mcp-config.yaml + taskSets: + - path: ../tasks/loadbalancer-missing/loadbalancer-missing.yaml + assertions: + toolsUsed: + - server: netedge + minToolCalls: 1 + maxToolCalls: 20 diff --git a/examples/net-edge/mcp-config.yaml b/examples/net-edge/mcp-config.yaml index d007209..ff4a016 100644 --- a/examples/net-edge/mcp-config.yaml +++ b/examples/net-edge/mcp-config.yaml @@ -5,4 +5,6 @@ mcpServers: - run - -f - ../gen-mcp/examples/netedge-tools/mcpfile.yaml + - -s + - ../gen-mcp/examples/netedge-tools/mcpserver.yaml enableAllTools: true diff --git a/pkg/agent/claude_code.go b/pkg/agent/claude_code.go index 6c4fa3e..737f469 100644 --- a/pkg/agent/claude_code.go +++ b/pkg/agent/claude_code.go @@ -2,6 +2,7 @@ package agent import ( "fmt" + "os" "os/exec" ) @@ -23,6 +24,16 @@ func (a *ClaudeCodeAgent) ValidateEnvironment() error { if _, err := exec.LookPath("claude"); err != nil { return fmt.Errorf("'claude' binary not found in PATH") } + // Check for GCP credentials (for Vertex AI users) + if os.Getenv("GOOGLE_APPLICATION_CREDENTIALS") == "" { + if _, err := exec.LookPath("gcloud"); err == nil { + // gcloud exists, check if ADC is configured + cmd := exec.Command("gcloud", "auth", "application-default", "print-access-token") + if err := cmd.Run(); err != nil { + fmt.Fprintf(os.Stderr, "Warning: No GCP credentials found. If using Vertex AI, run 'gcloud auth application-default login'\n") + } + } + } return nil } @@ -38,7 +49,7 @@ func (a *ClaudeCodeAgent) GetDefaults(model string) (*AgentSpec, error) { ArgTemplateMcpServer: "--mcp-config {{ .File }}", ArgTemplateAllowedTools: "mcp__{{ .ServerName }}__{{ .ToolName }}", AllowedToolsJoinSeparator: &separator, - RunPrompt: `claude {{ .McpServerFileArgs }} --strict-mcp-config --allowedTools "{{ .AllowedToolArgs }}" --print "{{ .Prompt }}"`, + RunPrompt: `claude {{ .McpServerFileArgs }} --strict-mcp-config --allowedTools "{{ .AllowedToolArgs }}" -p "{{ .Prompt }}" --dangerously-skip-permissions --output-format stream-json --verbose`, }, }, nil }