Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 19 additions & 3 deletions examples/net-edge/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,9 @@ net-edge/
├── mcp-config.yaml # Launches the gen-mcp NetEdge server for gevals
├── codex-agent/
│ ├── agent.yaml # Codex CLI wiring
│ └── eval.yaml # Eval definition (scenario 1)
│ └── eval_*.yaml # Eval definitions
├── claude-code-agent/
│ └── eval_*.yaml # Eval definitions (uses builtin claude-code agent)
└── tasks/
└── selector-mismatch/ # Task definition and helper scripts
├── selector-mismatch.yaml
Expand Down Expand Up @@ -57,15 +59,29 @@ Provide the key at runtime, for example:
export OPENAI_API_KEY=sk-...
```

## Running the eval
## Running with Codex

1. Build the project (from repo root): `make build`
2. Ensure your current shell can reach the OpenShift cluster (`oc whoami` should succeed).
3. Ensure `OPENAI_API_KEY` is exported in the shell that will launch `gevals`.
4. Run the evaluation:

```bash
./gevals run examples/net-edge/codex-agent/eval.yaml
./gevals eval examples/net-edge/codex-agent/eval_1_selector-mismatch.yaml
```

## Running with Claude Code

1. Build the project (from repo root): `make build`
2. Ensure `claude` CLI is in your `PATH`.
3. Ensure your current shell can reach the OpenShift cluster (`oc whoami` should succeed).
4. **Authentication:** If using Vertex AI, ensure you have GCP credentials configured:
- Set `GOOGLE_APPLICATION_CREDENTIALS` environment variable, OR
- Run `gcloud auth application-default login`
5. Run the evaluation:

```bash
./gevals eval examples/net-edge/claude-code-agent/eval_1_selector-mismatch.yaml
```

`setup.sh` deploys the hello workload, then intentionally breaks the Service selector so the Route loses its
Expand Down
14 changes: 14 additions & 0 deletions examples/net-edge/claude-code-agent/eval_1_selector-mismatch.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
kind: Eval
metadata:
name: "claude-code-netedge-selector-mismatch"
config:
agent:
type: "builtin.claude-code"
mcpConfigFile: ../mcp-config.yaml
taskSets:
- path: ../tasks/selector-mismatch/selector-mismatch.yaml
assertions:
toolsUsed:
- server: netedge
minToolCalls: 1
maxToolCalls: 20
14 changes: 14 additions & 0 deletions examples/net-edge/claude-code-agent/eval_2_nxdomain.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
kind: Eval
metadata:
name: "claude-code-netedge-nxdomain-host"
config:
agent:
type: "builtin.claude-code"
mcpConfigFile: ../mcp-config.yaml
taskSets:
- path: ../tasks/nxdomain-host/nxdomain-host.yaml
assertions:
toolsUsed:
- server: netedge
minToolCalls: 1
maxToolCalls: 20
14 changes: 14 additions & 0 deletions examples/net-edge/claude-code-agent/eval_3_networkpolicy.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
kind: Eval
metadata:
name: "claude-code-netedge-networkpolicy-block"
config:
agent:
type: "builtin.claude-code"
mcpConfigFile: ../mcp-config.yaml
taskSets:
- path: ../tasks/networkpolicy-block/networkpolicy-block.yaml
assertions:
toolsUsed:
- server: netedge
minToolCalls: 1
maxToolCalls: 20
14 changes: 14 additions & 0 deletions examples/net-edge/claude-code-agent/eval_4_reencrypt-tls.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
kind: Eval
metadata:
name: "claude-code-netedge-reencrypt-no-backend-tls"
config:
agent:
type: "builtin.claude-code"
mcpConfigFile: ../mcp-config.yaml
taskSets:
- path: ../tasks/reencrypt-no-backend-tls/reencrypt-no-backend-tls.yaml
assertions:
toolsUsed:
- server: netedge
minToolCalls: 1
maxToolCalls: 20
14 changes: 14 additions & 0 deletions examples/net-edge/claude-code-agent/eval_5_loadbalancer.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
kind: Eval
metadata:
name: "claude-code-netedge-loadbalancer-missing"
config:
agent:
type: "builtin.claude-code"
mcpConfigFile: ../mcp-config.yaml
taskSets:
- path: ../tasks/loadbalancer-missing/loadbalancer-missing.yaml
assertions:
toolsUsed:
- server: netedge
minToolCalls: 1
maxToolCalls: 20
2 changes: 2 additions & 0 deletions examples/net-edge/mcp-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -5,4 +5,6 @@ mcpServers:
- run
- -f
- ../gen-mcp/examples/netedge-tools/mcpfile.yaml
- -s
- ../gen-mcp/examples/netedge-tools/mcpserver.yaml
enableAllTools: true
13 changes: 12 additions & 1 deletion pkg/agent/claude_code.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ package agent

import (
"fmt"
"os"
"os/exec"
)

Expand All @@ -23,6 +24,16 @@ func (a *ClaudeCodeAgent) ValidateEnvironment() error {
if _, err := exec.LookPath("claude"); err != nil {
return fmt.Errorf("'claude' binary not found in PATH")
}
// Check for GCP credentials (for Vertex AI users)
if os.Getenv("GOOGLE_APPLICATION_CREDENTIALS") == "" {
if _, err := exec.LookPath("gcloud"); err == nil {
// gcloud exists, check if ADC is configured
cmd := exec.Command("gcloud", "auth", "application-default", "print-access-token")
if err := cmd.Run(); err != nil {
fmt.Fprintf(os.Stderr, "Warning: No GCP credentials found. If using Vertex AI, run 'gcloud auth application-default login'\n")
}
}
}
return nil
}

Expand All @@ -38,7 +49,7 @@ func (a *ClaudeCodeAgent) GetDefaults(model string) (*AgentSpec, error) {
ArgTemplateMcpServer: "--mcp-config {{ .File }}",
ArgTemplateAllowedTools: "mcp__{{ .ServerName }}__{{ .ToolName }}",
AllowedToolsJoinSeparator: &separator,
RunPrompt: `claude {{ .McpServerFileArgs }} --strict-mcp-config --allowedTools "{{ .AllowedToolArgs }}" --print "{{ .Prompt }}"`,
RunPrompt: `claude {{ .McpServerFileArgs }} --strict-mcp-config --allowedTools "{{ .AllowedToolArgs }}" -p "{{ .Prompt }}" --dangerously-skip-permissions --output-format stream-json --verbose`,
},
}, nil
}