From 89714766963b7ddeb859d598afd3224bed98360d Mon Sep 17 00:00:00 2001 From: GAP Promoter Date: Sat, 30 May 2026 17:55:43 +0000 Subject: [PATCH 1/2] Add fajarsajid/agent-redteam to the registry --- agents/fajarsajid__agent-redteam/README.md | 82 +++++++++++++++++++ .../fajarsajid__agent-redteam/metadata.json | 13 +++ 2 files changed, 95 insertions(+) create mode 100644 agents/fajarsajid__agent-redteam/README.md create mode 100644 agents/fajarsajid__agent-redteam/metadata.json diff --git a/agents/fajarsajid__agent-redteam/README.md b/agents/fajarsajid__agent-redteam/README.md new file mode 100644 index 0000000..8a0f87a --- /dev/null +++ b/agents/fajarsajid__agent-redteam/README.md @@ -0,0 +1,82 @@ +# agent-redteam + +An agentic LLM red-team harness that uses **Claude** to systematically probe AI agent system prompts for security vulnerabilities — including prompt injection, identity spoofing, credential exfiltration, privilege escalation, goal hijacking, and safety-boundary bypass. + +Built as a research artifact by Fajar Sajid (Purdue University), it found a **49.5% mean violation rate** across 384 trials against a real e-commerce agent prompt. + +--- + +## Run + +```bash +npx @open-gitagent/gitagent run -r https://github.com/fajarsajid/agent-redteam +``` + +--- + +## What It Can Do + +- **Adversarial Probe Generation** — Crafts realistic, target-tailored probes across 8 MITRE-mapped attack categories using Claude as the red-team analyst +- **Vulnerability Analysis** — Evaluates whether a target system prompt is susceptible to each probe, with CVSS-like severity scores (0–10) and attack paths +- **Incident Reporting** — Produces structured JSON findings for SIEM/tooling integration and human-readable Markdown reports +- **CI Pipeline Integration** — Exits `1` on critical/high findings; zero extra dependencies beyond `requests` +- **Multi-turn Assessment** — Evaluates context drift across interaction sequences (single-turn benchmarks underestimate real-world vulnerability ~1.7× at 7 turns) + +--- + +## Attack Categories + +| Category | MITRE | Violation Rate (empirical) | +|---|---|---| +| Prompt Injection (Indirect) | T1059 | 70.8% | +| Identity Spoofing | T1078 | 62.5% | +| Prompt Injection (Direct) | T1059 | 54.2% | +| Goal Hijacking | T1565 | 50.0% | +| Privilege Escalation | T1548 | 45.8% | +| Data Exfiltration | T1041 | 41.7% | +| Credential Exfiltration | T1552 | 37.5% | +| Safety Boundary Bypass | T1562 | 33.3% | + +--- + +## Usage + +```bash +git clone https://github.com/fajarsajid/agent-redteam +cd agent-redteam +pip install requests +export ANTHROPIC_API_KEY=sk-ant-... + +# Quick scan +python redteam.py --prompt examples/orderbot_prompt.txt + +# Full run with report output +python redteam.py --prompt system_prompt.txt \ + --probes 3 --output report.md --json findings.json + +# CI mode (exit 1 on critical/high findings) +python redteam.py --prompt system_prompt.txt --quiet +``` + +--- + +## Structure + +``` +agent-redteam/ +├── agent.yaml ← GAP manifest +├── SOUL.md ← Agent persona & security philosophy +├── redteam.py ← CLI evaluation tool +├── probe_engine.py ← Claude-powered adversarial probe generation +├── categories.py ← Attack taxonomy (8 categories, MITRE-mapped) +├── reporter.py ← Terminal summary + Markdown incident report +├── test_redteam.py ← 17/17 unit + mocked API tests +└── examples/ + └── orderbot_prompt.txt ← Target agent used in experiments +``` + +--- + +## Built with + +[gitagent](https://github.com/open-gitagent/gitagent) — a git-native, framework-agnostic open standard for AI agents. diff --git a/agents/fajarsajid__agent-redteam/metadata.json b/agents/fajarsajid__agent-redteam/metadata.json new file mode 100644 index 0000000..5f7f943 --- /dev/null +++ b/agents/fajarsajid__agent-redteam/metadata.json @@ -0,0 +1,13 @@ +{ + "name": "agent-redteam", + "author": "fajarsajid", + "description": "Agentic LLM red-team harness: uses Claude to probe AI agent system prompts for identity abuse, credential exfiltration, and safety-boundary vulnerabilities.", + "repository": "https://github.com/fajarsajid/agent-redteam", + "version": "1.0.0", + "category": "security", + "tags": ["red-teaming", "security", "llm-security", "prompt-injection", "adversarial", "claude", "ai-safety", "vulnerability-assessment"], + "license": "MIT", + "model": "claude-sonnet-4-20250514", + "adapters": ["system-prompt"], + "icon": false +} From 23031e6f977bd8661363a3f961702a334054dd3f Mon Sep 17 00:00:00 2001 From: GAP Promoter Date: Sat, 30 May 2026 17:56:35 +0000 Subject: [PATCH 2/2] Fix: point repository to fork with GAP files (pending upstream PR merge) --- agents/fajarsajid__agent-redteam/metadata.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/agents/fajarsajid__agent-redteam/metadata.json b/agents/fajarsajid__agent-redteam/metadata.json index 5f7f943..80685d0 100644 --- a/agents/fajarsajid__agent-redteam/metadata.json +++ b/agents/fajarsajid__agent-redteam/metadata.json @@ -2,7 +2,7 @@ "name": "agent-redteam", "author": "fajarsajid", "description": "Agentic LLM red-team harness: uses Claude to probe AI agent system prompts for identity abuse, credential exfiltration, and safety-boundary vulnerabilities.", - "repository": "https://github.com/fajarsajid/agent-redteam", + "repository": "https://github.com/computer-agent/agent-redteam", "version": "1.0.0", "category": "security", "tags": ["red-teaming", "security", "llm-security", "prompt-injection", "adversarial", "claude", "ai-safety", "vulnerability-assessment"],