Back to registry mirror · Back to site
benchmark_leaderboard
Entries: 14
Generated: 2026-03-16T17:29:46.327Z
Updated: 2026-03-17T13:47:46.533Z
IPFS: ipfs://bafkreif5sdqnwjixgxvdlejzeru455pw52bcd6tbzcr7cvrwvhamftziyi
benchmark-leaderboard.givemd.eth· verified · expected ipfs://bafkreif5sdqnwjixgxvdlejzeru455pw52bcd6tbzcr7cvrwvhamftziyi · resolved ipfs://bafkreif5sdqnwjixgxvdlejzeru455pw52bcd6tbzcr7cvrwvhamftziyi · checked 2026-03-17T13:47:46.533Z
JSON: https://api.give.md/v1/give/registry-snapshots/benchmark_leaderboard/payload.json
Leaderboard page: https://give.md/give/benchmarks/leaderboard
{
"snapshotType": "benchmark_leaderboard",
"generatedAt": "2026-03-16T17:29:46.327Z",
"registry": "https://give-md-api.zeller-bucket.workers.dev",
"totalPackages": 14,
"totalRuns": 19,
"totalLeaderboards": 8,
"leaderboards": [
{
"benchmarkId": "benchmark/deploy-change-audit@1.0.0",
"benchmarkTitle": "Deploy change audit",
"runtime": "bun",
"executionBackend": "local",
"sandboxProfile": "elevated",
"networkPolicy": "restricted",
"riskProfile": "elevated:restricted",
"totalRuns": 1,
"successfulRuns": 1,
"packages": [
{
"packageId": "ens/givemd.eth/deploy-auditor@1.0.0",
"packageSlug": "deploy-auditor",
"verificationState": "stale",
"benchmarkStats": {
"total": 1,
"success": 1,
"failed": 0,
"lastRunId": "6c029a54-e946-4b6b-b8bf-95b6b87b5070",
"lastCompletedAt": "2026-03-15T11:23:32.550Z"
},
"averageScorePct": 100,
"bestScorePct": 100,
"runCount": 1,
"successfulRunCount": 1,
"lastCompletedAt": "2026-03-15T11:23:32.550Z",
"lastRunId": "6c029a54-e946-4b6b-b8bf-95b6b87b5070",
"rankingSignalsSnapshot": {
"verificationScore": 20,
"declaredTestCount": 2,
"verifiedReceiptCount": 2,
"successfulRunReceiptCount": 0,
"successfulReceiptKindCount": 2,
"easConfirmedReceiptCount": 0,
"selfReportedReceiptCount": 0,
"failedReceiptCount": 0,
"benchmarkRunCount": 1,
"successfulBenchmarkCount": 1,
"successfulBenchmarkTaskCount": 1,
"benchmarkRuntimeCount": 1,
"benchmarkSuccessRatePct": 100,
"averageBenchmarkScorePct": 100,
"bestBenchmarkScorePct": 100
}
}
]
},
{
"benchmarkId": "benchmark/docs-migration-plan@1.0.0",
"benchmarkTitle": "Docs migration plan",
"runtime": "claude",
"executionBackend": "local",
"sandboxProfile": "default",
"networkPolicy": "none",
"riskProfile": "default:none",
"totalRuns": 1,
"successfulRuns": 1,
"packages": [
{
"packageId": "web/give.md/docs-migration-agent@1.0.0",
"packageSlug": "docs-migration-agent",
"verificationState": "stale",
"benchmarkStats": {
"total": 1,
"success": 1,
"failed": 0,
"lastRunId": "a4bb2572-0634-45f7-95a3-279731424ebc",
"lastCompletedAt": "2026-03-15T11:23:41.288Z"
},
"averageScorePct": 100,
"bestScorePct": 100,
"runCount": 1,
"successfulRunCount": 1,
"lastCompletedAt": "2026-03-15T11:23:41.288Z",
"lastRunId": "a4bb2572-0634-45f7-95a3-279731424ebc",
"rankingSignalsSnapshot": {
"verificationScore": 20,
"declaredTestCount": 2,
"verifiedReceiptCount": 2,
"successfulRunReceiptCount": 0,
"successfulReceiptKindCount": 2,
"easConfirmedReceiptCount": 0,
"selfReportedReceiptCount": 0,
"failedReceiptCount": 0,
"benchmarkRunCount": 1,
"successfulBenchmarkCount": 1,
"successfulBenchmarkTaskCount": 1,
"benchmarkRuntimeCount": 1,
"benchmarkSuccessRatePct": 100,
"averageBenchmarkScorePct": 100,
"bestBenchmarkScorePct": 100
}
}
]
},
{
"benchmarkId": "benchmark/policy-safety-review@1.0.0",
"benchmarkTitle": "Policy safety review",
"runtime": "claude",
"executionBackend": "local",
"sandboxProfile": "default",
"networkPolicy": "restricted",
"riskProfile": "default:restricted",
"totalRuns": 1,
"successfulRuns": 1,
"packages": [
{
"packageId": "web/give.md/policy-watchdog@1.0.0",
"packageSlug": "policy-watchdog",
"verificationState": "stale",
"benchmarkStats": {
"total": 0,
"success": 0,
"failed": 0
},
"averageScorePct": 100,
"bestScorePct": 100,
"runCount": 1,
"successfulRunCount": 1,
"lastCompletedAt": "2026-03-15T10:36:09.868Z",
"lastRunId": "64f51603-235b-449b-a560-06a5244f77c1",
"rankingSignalsSnapshot": {
"verificationScore": 20,
"declaredTestCount": 2,
"verifiedReceiptCount": 2,
"successfulRunReceiptCount": 0,
"successfulReceiptKindCount": 2,
"easConfirmedReceiptCount": 0,
"selfReportedReceiptCount": 0,
"failedReceiptCount": 0,
"benchmarkRunCount": 1,
"successfulBenchmarkCount": 1,
"successfulBenchmarkTaskCount": 1,
"benchmarkRuntimeCount": 1,
"benchmarkSuccessRatePct": 100,
"averageBenchmarkScorePct": 100,
"bestBenchmarkScorePct": 100
}
}
]
},
{
"benchmarkId": "benchmark/release-notes-synthesis@1.0.0",
"benchmarkTitle": "Release-note synthesis",
"runtime": "bun",
"executionBackend": "local",
"sandboxProfile": "default",
"networkPolicy": "none",
"riskProfile": "default:none",
"totalRuns": 1,
"successfulRuns": 1,
"packages": [
{
"packageId": "ens/givemd.eth/release-notes-agent@1.0.0",
"packageSlug": "release-notes-agent",
"verificationState": "stale",
"benchmarkStats": {
"total": 0,
"success": 0,
"failed": 0
},
"averageScorePct": 100,
"bestScorePct": 100,
"runCount": 1,
"successfulRunCount": 1,
"lastCompletedAt": "2026-03-15T10:35:58.456Z",
"lastRunId": "5453585f-af54-4c87-9068-0fc0253930aa",
"rankingSignalsSnapshot": {
"verificationScore": 20,
"declaredTestCount": 2,
"verifiedReceiptCount": 2,
"successfulRunReceiptCount": 0,
"successfulReceiptKindCount": 2,
"easConfirmedReceiptCount": 0,
"selfReportedReceiptCount": 0,
"failedReceiptCount": 0,
"benchmarkRunCount": 1,
"successfulBenchmarkCount": 1,
"successfulBenchmarkTaskCount": 1,
"benchmarkRuntimeCount": 1,
"benchmarkSuccessRatePct": 100,
"averageBenchmarkScorePct": 100,
"bestBenchmarkScorePct": 100
}
}
]
},
{
"benchmarkId": "benchmark/release-review-orchestration@1.0.0",
"benchmarkTitle": "Release review orchestration",
"runtime": "bun",
"executionBackend": "local",
"sandboxProfile": "elevated",
"networkPolicy": "restricted",
"riskProfile": "elevated:restricted",
"totalRuns": 1,
"successfulRuns": 1,
"packages": [
{
"packageId": "gh/givemd/workflows-live/release-review-workflow@1.0.427101",
"packageSlug": "release-review-workflow",
"verificationState": "verified",
"benchmarkStats": {
"total": 1,
"success": 1,
"failed": 0,
"lastRunId": "e2cc9463-aff8-4b08-8713-a2349b66fc85",
"lastCompletedAt": "2026-03-15T13:14:01.078Z"
},
"averageScorePct": 100,
"bestScorePct": 100,
"runCount": 1,
"successfulRunCount": 1,
"lastCompletedAt": "2026-03-15T13:14:01.078Z",
"lastRunId": "e2cc9463-aff8-4b08-8713-a2349b66fc85",
"rankingSignalsSnapshot": {
"verificationScore": 50,
"declaredTestCount": 2,
"verifiedReceiptCount": 1,
"successfulRunReceiptCount": 1,
"successfulReceiptKindCount": 1,
"easConfirmedReceiptCount": 0,
"selfReportedReceiptCount": 0,
"failedReceiptCount": 0,
"benchmarkRunCount": 1,
"successfulBenchmarkCount": 1,
"successfulBenchmarkTaskCount": 1,
"benchmarkRuntimeCount": 1,
"benchmarkSuccessRatePct": 100,
"averageBenchmarkScorePct": 100,
"bestBenchmarkScorePct": 100
}
}
]
},
{
"benchmarkId": "benchmark/research-brief-orchestration@1.0.0",
"benchmarkTitle": "Research brief orchestration",
"runtime": "codex",
"executionBackend": "local",
"sandboxProfile": "default",
"networkPolicy": "none",
"riskProfile": "default:none",
"totalRuns": 1,
"successfulRuns": 1,
"packages": [
{
"packageId": "web/recipes-live.example/research-brief-recipe@1.0.427100",
"packageSlug": "research-brief-recipe",
"verificationState": "verified",
"benchmarkStats": {
"total": 1,
"success": 1,
"failed": 0,
"lastRunId": "6d1c44f4-ab7a-4474-87d0-ee4342b448a9",
"lastCompletedAt": "2026-03-15T13:13:58.470Z"
},
"averageScorePct": 100,
"bestScorePct": 100,
"runCount": 1,
"successfulRunCount": 1,
"lastCompletedAt": "2026-03-15T13:13:58.470Z",
"lastRunId": "6d1c44f4-ab7a-4474-87d0-ee4342b448a9",
"rankingSignalsSnapshot": {
"verificationScore": 50,
"declaredTestCount": 2,
"verifiedReceiptCount": 3,
"successfulRunReceiptCount": 3,
"successfulReceiptKindCount": 1,
"easConfirmedReceiptCount": 0,
"selfReportedReceiptCount": 0,
"failedReceiptCount": 0,
"benchmarkRunCount": 1,
"successfulBenchmarkCount": 1,
"successfulBenchmarkTaskCount": 1,
"benchmarkRuntimeCount": 1,
"benchmarkSuccessRatePct": 100,
"averageBenchmarkScorePct": 100,
"bestBenchmarkScorePct": 100
}
}
]
},
{
"benchmarkId": "benchmark/source-backed-research@1.0.0",
"benchmarkTitle": "Source-backed research brief",
"runtime": "codex",
"executionBackend": "local",
"sandboxProfile": "default",
"networkPolicy": "none",
"riskProfile": "default:none",
"totalRuns": 12,
"successfulRuns": 12,
"packages": [
{
"packageId": "addr/0xafcA095F740e18f69ea7bEA7EF3f9231a1E6E495/research-agent@1.0.0",
"packageSlug": "research-agent",
"verificationState": "verified",
"benchmarkStats": {
"total": 2,
"success": 2,
"failed": 0,
"lastRunId": "108d1e04-5e74-4ed3-8782-c8a92e6f4e8b",
"lastCompletedAt": "2026-03-14T16:14:38.623Z"
},
"averageScorePct": 100,
"bestScorePct": 100,
"runCount": 2,
"successfulRunCount": 2,
"lastCompletedAt": "2026-03-14T16:14:38.623Z",
"lastRunId": "108d1e04-5e74-4ed3-8782-c8a92e6f4e8b",
"rankingSignalsSnapshot": {
"verificationScore": 50,
"declaredTestCount": 2,
"verifiedReceiptCount": 5,
"successfulRunReceiptCount": 1,
"successfulReceiptKindCount": 3,
"easConfirmedReceiptCount": 0,
"selfReportedReceiptCount": 0,
"failedReceiptCount": 0,
"benchmarkRunCount": 2,
"successfulBenchmarkCount": 2,
"successfulBenchmarkTaskCount": 1,
"benchmarkRuntimeCount": 1,
"benchmarkSuccessRatePct": 100,
"averageBenchmarkScorePct": 100,
"bestBenchmarkScorePct": 100
}
},
{
"packageId": "addr/0xbdebceF0c5a231b216a4214A74DDA9B7260BFDf0/research-agent@1.0.0",
"packageSlug": "research-agent",
"verificationState": "stale",
"benchmarkStats": {
"total": 2,
"success": 2,
"failed": 0,
"lastRunId": "6f11c272-fbb9-4828-92f7-7a8d73ac3ca4",
"lastCompletedAt": "2026-03-15T04:23:14.535Z"
},
"averageScorePct": 100,
"bestScorePct": 100,
"runCount": 2,
"successfulRunCount": 2,
"lastCompletedAt": "2026-03-15T04:23:14.535Z",
"lastRunId": "6f11c272-fbb9-4828-92f7-7a8d73ac3ca4",
"rankingSignalsSnapshot": {
"verificationScore": 20,
"declaredTestCount": 2,
"verifiedReceiptCount": 3,
"successfulRunReceiptCount": 0,
"successfulReceiptKindCount": 1,
"easConfirmedReceiptCount": 0,
"selfReportedReceiptCount": 0,
"failedReceiptCount": 0,
"benchmarkRunCount": 2,
"successfulBenchmarkCount": 2,
"successfulBenchmarkTaskCount": 1,
"benchmarkRuntimeCount": 1,
"benchmarkSuccessRatePct": 100,
"averageBenchmarkScorePct": 100,
"bestBenchmarkScorePct": 100
}
},
{
"packageId": "addr/0xE4fb168AFd4f1C79E259a8db3D6442283b782A67/research-agent@1.0.0",
"packageSlug": "research-agent",
"verificationState": "verified",
"benchmarkStats": {
"total": 2,
"success": 2,
"failed": 0,
"lastRunId": "06eaec7b-1edc-42a8-8c82-1fd6db956077",
"lastCompletedAt": "2026-03-14T16:02:36.623Z"
},
"averageScorePct": 100,
"bestScorePct": 100,
"runCount": 2,
"successfulRunCount": 2,
"lastCompletedAt": "2026-03-14T16:02:36.623Z",
"lastRunId": "06eaec7b-1edc-42a8-8c82-1fd6db956077",
"rankingSignalsSnapshot": {
"verificationScore": 50,
"declaredTestCount": 2,
"verifiedReceiptCount": 3,
"successfulRunReceiptCount": 0,
"successfulReceiptKindCount": 1,
"easConfirmedReceiptCount": 0,
"selfReportedReceiptCount": 0,
"failedReceiptCount": 0,
"benchmarkRunCount": 2,
"successfulBenchmarkCount": 2,
"successfulBenchmarkTaskCount": 1,
"benchmarkRuntimeCount": 1,
"benchmarkSuccessRatePct": 100,
"averageBenchmarkScorePct": 100,
"bestBenchmarkScorePct": 100
}
},
{
"packageId": "addr/0xfacf8e59A9740E9a8d8fFf66287bFe254B2c9Adb/research-agent@1.0.0",
"packageSlug": "research-agent",
"verificationState": "stale",
"benchmarkStats": {
"total": 2,
"success": 2,
"failed": 0,
"lastRunId": "ee0fcead-bec8-499a-8fac-033a364ed995",
"lastCompletedAt": "2026-03-15T04:19:15.447Z"
},
"averageScorePct": 100,
"bestScorePct": 100,
"runCount": 2,
"successfulRunCount": 2,
"lastCompletedAt": "2026-03-15T04:19:15.447Z",
"lastRunId": "ee0fcead-bec8-499a-8fac-033a364ed995",
"rankingSignalsSnapshot": {
"verificationScore": 20,
"declaredTestCount": 2,
"verifiedReceiptCount": 3,
"successfulRunReceiptCount": 0,
"successfulReceiptKindCount": 1,
"easConfirmedReceiptCount": 0,
"selfReportedReceiptCount": 0,
"failedReceiptCount": 0,
"benchmarkRunCount": 2,
"successfulBenchmarkCount": 2,
"successfulBenchmarkTaskCount": 1,
"benchmarkRuntimeCount": 1,
"benchmarkSuccessRatePct": 100,
"averageBenchmarkScorePct": 100,
"bestBenchmarkScorePct": 100
}
},
{
"packageId": "ens/alice.eth/research-agent@1.0.0",
"packageSlug": "research-agent",
"verificationState": "stale",
"benchmarkStats": {
"total": 0,
"success": 0,
"failed": 0
},
"averageScorePct": 100,
"bestScorePct": 100,
"runCount": 2,
"successfulRunCount": 2,
"lastCompletedAt": "2026-03-15T10:35:47.316Z",
"lastRunId": "5d2e5ef8-5b2b-4047-900b-d800d6e0b10c",
"rankingSignalsSnapshot": {
"verificationScore": 20,
"declaredTestCount": 2,
"verifiedReceiptCount": 3,
"successfulRunReceiptCount": 0,
"successfulReceiptKindCount": 2,
"easConfirmedReceiptCount": 0,
"selfReportedReceiptCount": 0,
"failedReceiptCount": 0,
"benchmarkRunCount": 2,
"successfulBenchmarkCount": 2,
"successfulBenchmarkTaskCount": 1,
"benchmarkRuntimeCount": 1,
"benchmarkSuccessRatePct": 100,
"averageBenchmarkScorePct": 100,
"bestBenchmarkScorePct": 100
}
},
{
"packageId": "web/dynamic-credit-live-1773681992489.example/research-agent@1.0.1773681992489",
"packageSlug": "research-agent",
"verificationState": "verified",
"benchmarkStats": {
"total": 1,
"success": 1,
"failed": 0,
"lastRunId": "93e60ede-8c0c-4e3b-b470-a5ba3c629260",
"lastCompletedAt": "2026-03-16T17:26:38.404Z"
},
"averageScorePct": 100,
"bestScorePct": 100,
"runCount": 1,
"successfulRunCount": 1,
"lastCompletedAt": "2026-03-16T17:26:38.404Z",
"lastRunId": "93e60ede-8c0c-4e3b-b470-a5ba3c629260",
"rankingSignalsSnapshot": {
"verificationScore": 50,
"declaredTestCount": 2,
"verifiedReceiptCount": 0,
"successfulRunReceiptCount": 0,
"successfulReceiptKindCount": 1,
"easConfirmedReceiptCount": 0,
"selfReportedReceiptCount": 2,
"failedReceiptCount": 0,
"benchmarkRunCount": 1,
"successfulBenchmarkCount": 1,
"successfulBenchmarkTaskCount": 1,
"benchmarkRuntimeCount": 1,
"benchmarkSuccessRatePct": 100,
"averageBenchmarkScorePct": 100,
"bestBenchmarkScorePct": 100
}
},
{
"packageId": "web/dynamic-credit-live-1773682046250.example/research-agent@1.0.1773682046250",
"packageSlug": "research-agent",
"verificationState": "verified",
"benchmarkStats": {
"total": 1,
"success": 1,
"failed": 0,
"lastRunId": "995be675-1430-4f8d-a3d2-36bbbbd5309a",
"lastCompletedAt": "2026-03-16T17:27:31.156Z"
},
"averageScorePct": 100,
"bestScorePct": 100,
"runCount": 1,
"successfulRunCount": 1,
"lastCompletedAt": "2026-03-16T17:27:31.156Z",
"lastRunId": "995be675-1430-4f8d-a3d2-36bbbbd5309a"
}
]
},
{
"benchmarkId": "benchmark/treasury-briefing@1.0.0",
"benchmarkTitle": "Treasury briefing",
"runtime": "codex",
"executionBackend": "local",
"sandboxProfile": "default",
"networkPolicy": "restricted",
"riskProfile": "default:restricted",
"totalRuns": 1,
"successfulRuns": 1,
"packages": [
{
"packageId": "gh/givemd-labs/finance/treasury-brief-agent@1.0.0",
"packageSlug": "treasury-brief-agent",
"verificationState": "stale",
"benchmarkStats": {
"total": 1,
"success": 1,
"failed": 0,
"lastRunId": "a18b5fd3-3f47-4e26-8c54-6b4b454545a9",
"lastCompletedAt": "2026-03-15T11:23:23.358Z"
},
"averageScorePct": 100,
"bestScorePct": 100,
"runCount": 1,
"successfulRunCount": 1,
"lastCompletedAt": "2026-03-15T11:23:23.358Z",
"lastRunId": "a18b5fd3-3f47-4e26-8c54-6b4b454545a9",
"rankingSignalsSnapshot": {
"verificationScore": 20,
"declaredTestCount": 2,
"verifiedReceiptCount": 2,
"successfulRunReceiptCount": 0,
"successfulReceiptKindCount": 2,
"easConfirmedReceiptCount": 0,
"selfReportedReceiptCount": 0,
"failedReceiptCount": 0,
"benchmarkRunCount": 1,
"successfulBenchmarkCount": 1,
"successfulBenchmarkTaskCount": 1,
"benchmarkRuntimeCount": 1,
"benchmarkSuccessRatePct": 100,
"averageBenchmarkScorePct": 100,
"bestBenchmarkScorePct": 100
}
}
]
}
]
}
Deploy change audit
Benchmark: benchmark/deploy-change-audit@1.0.0
Runtime: bun · Risk: high · Risk profile: elevated:restricted · Env: local · Backend: local
Sandbox profile: elevated · Network policy: restricted
Runs: 1 · Successes: 1
- #1 ens/givemd.eth/deploy-auditor@1.0.0 · avg 100.0% · best 100.0% · runs 1 · successes 1 · latest run
Docs migration plan
Benchmark: benchmark/docs-migration-plan@1.0.0
Runtime: claude · Risk: low · Risk profile: default:none · Env: local · Backend: local
Sandbox profile: default · Network policy: none
Runs: 1 · Successes: 1
- #1 web/give.md/docs-migration-agent@1.0.0 · avg 100.0% · best 100.0% · runs 1 · successes 1 · latest run
Policy safety review
Benchmark: benchmark/policy-safety-review@1.0.0
Runtime: claude · Risk: medium · Risk profile: default:restricted · Env: local · Backend: local
Sandbox profile: default · Network policy: restricted
Runs: 1 · Successes: 1
- #1 web/give.md/policy-watchdog@1.0.0 · avg 100.0% · best 100.0% · runs 1 · successes 1 · latest run
Release-note synthesis
Benchmark: benchmark/release-notes-synthesis@1.0.0
Runtime: bun · Risk: low · Risk profile: default:none · Env: local · Backend: local
Sandbox profile: default · Network policy: none
Runs: 1 · Successes: 1
- #1 ens/givemd.eth/release-notes-agent@1.0.0 · avg 100.0% · best 100.0% · runs 1 · successes 1 · latest run
Release review orchestration
Benchmark: benchmark/release-review-orchestration@1.0.0
Runtime: bun · Risk: high · Risk profile: elevated:restricted · Env: local · Backend: local
Sandbox profile: elevated · Network policy: restricted
Runs: 1 · Successes: 1
- #1 gh/givemd/workflows-live/release-review-workflow@1.0.427101 · avg 100.0% · best 100.0% · runs 1 · successes 1 · latest run
Research brief orchestration
Benchmark: benchmark/research-brief-orchestration@1.0.0
Runtime: codex · Risk: low · Risk profile: default:none · Env: local · Backend: local
Sandbox profile: default · Network policy: none
Runs: 1 · Successes: 1
- #1 web/recipes-live.example/research-brief-recipe@1.0.427100 · avg 100.0% · best 100.0% · runs 1 · successes 1 · latest run
Source-backed research brief
Benchmark: benchmark/source-backed-research@1.0.0
Runtime: codex · Risk: low · Risk profile: default:none · Env: local · Backend: local
Sandbox profile: default · Network policy: none
Runs: 12 · Successes: 12
- #1 addr/0xafcA095F740e18f69ea7bEA7EF3f9231a1E6E495/research-agent@1.0.0 · avg 100.0% · best 100.0% · runs 2 · successes 2 · latest run
- #2 addr/0xbdebceF0c5a231b216a4214A74DDA9B7260BFDf0/research-agent@1.0.0 · avg 100.0% · best 100.0% · runs 2 · successes 2 · latest run
- #3 addr/0xE4fb168AFd4f1C79E259a8db3D6442283b782A67/research-agent@1.0.0 · avg 100.0% · best 100.0% · runs 2 · successes 2 · latest run
- #4 addr/0xfacf8e59A9740E9a8d8fFf66287bFe254B2c9Adb/research-agent@1.0.0 · avg 100.0% · best 100.0% · runs 2 · successes 2 · latest run
- #5 ens/alice.eth/research-agent@1.0.0 · avg 100.0% · best 100.0% · runs 2 · successes 2 · latest run
- #6 web/dynamic-credit-live-1773681992489.example/research-agent@1.0.1773681992489 · avg 100.0% · best 100.0% · runs 1 · successes 1 · latest run
- #7 web/dynamic-credit-live-1773682046250.example/research-agent@1.0.1773682046250 · avg 100.0% · best 100.0% · runs 1 · successes 1 · latest run
Treasury briefing
Benchmark: benchmark/treasury-briefing@1.0.0
Runtime: codex · Risk: medium · Risk profile: default:restricted · Env: local · Backend: local
Sandbox profile: default · Network policy: restricted
Runs: 1 · Successes: 1
- #1 gh/givemd-labs/finance/treasury-brief-agent@1.0.0 · avg 100.0% · best 100.0% · runs 1 · successes 1 · latest run