Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
25 commits
Select commit Hold shift + click to select a range
533c93c
Install python in openai-bridge
SplotyCode Apr 10, 2026
1dadbd8
Fix openrouter schema name and allow to set providers
SplotyCode Apr 10, 2026
3e2fb16
Ensure api key is not empty
SplotyCode Apr 10, 2026
5d07da2
Fix no common prefix FileSystemToDiskWriter
SplotyCode May 17, 2026
06fefa8
Add FileSystemToDiskWriterTests
SplotyCode May 17, 2026
c3c4d60
Add LlmUpstreamException
SplotyCode May 17, 2026
6af2003
Temporary fix for python in codeql
SplotyCode May 17, 2026
c43d9a9
Add python-security-extended codeql pack for python
SplotyCode May 18, 2026
57a27e9
Refactor edit format handling into dedicated wrappers
SplotyCode May 18, 2026
f95986a
Add multi-language syntax guardians
SplotyCode May 19, 2026
e5c5f65
Normalize malformed source outputs before validation
SplotyCode May 19, 2026
86a1bb8
Add self-test loop to the workflow engine
SplotyCode May 20, 2026
802bc48
Introduce soft and hard guardian retry policies
SplotyCode May 21, 2026
70cdf33
Add workflow trace logging support
SplotyCode May 21, 2026
875d8ad
Harden OpenRouter client response handling
SplotyCode May 22, 2026
662234d
Return bridge failures as API errors instead of source text
SplotyCode May 22, 2026
f3fd787
Add heuristic prompt enrichment for code generation
SplotyCode May 23, 2026
fb4280b
Tighten generic LLM guardian findings with sink-aware filters
SplotyCode May 23, 2026
31993c1
Add LLM triage for analyzer findings
SplotyCode May 24, 2026
09ba3cc
Stabilize CodeQL runner process environment
SplotyCode May 24, 2026
6e70e6d
Teach CodeQL guardian to recover from tool failures
SplotyCode May 25, 2026
f7164f2
Ship default sensitive CodeQL packs and triage prompts
SplotyCode May 25, 2026
bd9811b
Wire advanced guardian configuration into the bridge factory
SplotyCode May 26, 2026
a922809
Document and containerize the OpenAI bridge toolchain
SplotyCode May 26, 2026
c8f6b11
Add change-extraction benchmarks and usage tracking
SplotyCode May 27, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
198 changes: 198 additions & 0 deletions SecBenchSuite/Benchmarks/EditRepair/dataset.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,198 @@
{
"tasks": [
{
"id": "textkit_csv_separator",
"task_family": "single-line replacement",
"project": "textkit",
"source_files": ["formatter.py"],
"prompt": "Update format_csv_row so it joins values with ';' instead of ','. Keep the rest of the module unchanged.",
"repair_prompt": "The behavior is correct. Now introduce a module-level constant named CSV_SEPARATOR with the value ';' and use that constant from format_csv_row. Do not modify slugify, build_preview, or build_profile_url.",
"verify_command": ["python3", "verify.py", "textkit_csv_separator"],
"allowed_changed_files": ["formatter.py"],
"max_changed_lines": 10,
"review_assertions": {
"must_contain": {
"formatter.py": ["CSV_SEPARATOR = \";\"", "return CSV_SEPARATOR.join(values)"]
}
}
},
{
"id": "textkit_preview_helper",
"task_family": "add a helper function and update call site",
"project": "textkit",
"source_files": ["formatter.py"],
"prompt": "Make build_preview collapse repeated internal whitespace before trimming and truncating. Add a helper function if needed.",
"repair_prompt": "Please add a dedicated helper named collapse_spaces and call it from build_preview instead of inlining the whitespace-normalization logic.",
"verify_command": ["python3", "verify.py", "textkit_preview_helper"],
"allowed_changed_files": ["formatter.py"],
"max_changed_lines": 18,
"review_assertions": {
"must_contain": {
"formatter.py": ["def collapse_spaces(", "cleaned = collapse_spaces("]
}
}
},
{
"id": "textkit_profile_url_quote",
"task_family": "insert imports / dependencies",
"project": "textkit",
"source_files": ["formatter.py"],
"prompt": "Make build_profile_url URL-encode the username using the Python standard library so spaces and slashes are escaped in the path segment.",
"repair_prompt": "Preserve the public function signature, but move the quoting into a helper named safe_path_segment and call that helper from build_profile_url.",
"verify_command": ["python3", "verify.py", "textkit_profile_url_quote"],
"allowed_changed_files": ["formatter.py"],
"max_changed_lines": 18,
"review_assertions": {
"must_contain": {
"formatter.py": ["from urllib.parse import quote", "def safe_path_segment(", "return f\"/users/{safe_path_segment(username)}\""]
}
}
},
{
"id": "orders_ignore_negative_quantity",
"task_family": "small local patch",
"project": "orders",
"source_files": ["pricing.py", "service.py"],
"prompt": "Update subtotal so items with a negative quantity are ignored instead of reducing the total.",
"repair_prompt": "Keep the public signatures unchanged, but extract the per-item calculation into a helper named _line_total and call that helper from subtotal.",
"verify_command": ["python3", "verify.py", "orders_ignore_negative_quantity"],
"allowed_changed_files": ["pricing.py"],
"max_changed_lines": 18,
"review_assertions": {
"must_contain": {
"pricing.py": ["def _line_total(", "total += _line_total(item)"]
}
}
},
{
"id": "orders_lowercase_discount_code",
"task_family": "multi-line patch with surrounding context",
"project": "orders",
"source_files": ["pricing.py", "service.py"],
"prompt": "Make apply_discount accept discount codes case-insensitively and ignore surrounding whitespace.",
"repair_prompt": "Keep the call sites untouched, but move the normalization into a helper named normalize_discount_code and call that helper from apply_discount.",
"verify_command": ["python3", "verify.py", "orders_lowercase_discount_code"],
"allowed_changed_files": ["pricing.py"],
"max_changed_lines": 18,
"review_assertions": {
"must_contain": {
"pricing.py": ["def normalize_discount_code(", "normalized = normalize_discount_code(code)"]
}
}
},
{
"id": "orders_format_total_helper",
"task_family": "edit across two files",
"project": "orders",
"source_files": ["pricing.py", "service.py"],
"prompt": "Add a helper named format_total to pricing.py that formats cents as a Euro string like '€12.34', and update quote_order to use it for the display field.",
"repair_prompt": "Keep the formatting logic in the new helper and only wire quote_order to that helper. Do not duplicate the formatting code in service.py.",
"verify_command": ["python3", "verify.py", "orders_format_total_helper"],
"allowed_changed_files": ["pricing.py", "service.py"],
"max_changed_lines": 18,
"review_assertions": {
"must_contain": {
"pricing.py": ["def format_total("],
"service.py": ["format_total(total)"]
}
}
},
{
"id": "catalog_inventory_badge",
"task_family": "edit across two files",
"project": "catalog",
"source_files": ["models.py", "view.py"],
"prompt": "Add a helper named inventory_badge in models.py that returns '[IN]' for in-stock items and '[OUT]' for sold-out items. Update render_card to include that badge before the product name.",
"repair_prompt": "Keep stock_label available for other callers and introduce the new helper only for the card rendering path.",
"verify_command": ["python3", "verify.py", "catalog_inventory_badge"],
"allowed_changed_files": ["models.py", "view.py"],
"max_changed_lines": 18,
"review_assertions": {
"must_contain": {
"models.py": ["def inventory_badge("],
"view.py": ["inventory_badge(count)"]
}
}
},
{
"id": "catalog_price_label_compact",
"task_family": "whole-function rewrite",
"project": "catalog",
"source_files": ["models.py", "view.py"],
"prompt": "Rewrite price_label so whole-euro values render without a decimal fraction, e.g. 1200 -> '€12', while other values still keep two decimals.",
"repair_prompt": "Preserve the function signature, keep the change inside price_label, and make the whole-euro branch use a local euros variable.",
"verify_command": ["python3", "verify.py", "catalog_price_label_compact"],
"allowed_changed_files": ["models.py"],
"max_changed_lines": 12,
"review_assertions": {
"must_contain": {
"models.py": ["euros = cents // 100"]
}
}
},
{
"id": "catalog_render_card_multiline",
"task_family": "fix a failing behavior described in text",
"project": "catalog",
"source_files": ["models.py", "view.py"],
"prompt": "Change render_card so the stock status appears on a second line instead of inside parentheses, while keeping the price on the first line.",
"repair_prompt": "Do not touch models.py. This is a rendering-only change in view.py, and the second line should come from a helper named render_status_line.",
"verify_command": ["python3", "verify.py", "catalog_render_card_multiline"],
"allowed_changed_files": ["view.py"],
"max_changed_lines": 16,
"review_assertions": {
"must_contain": {
"view.py": ["def render_status_line(", "render_status_line(count)"]
}
}
},
{
"id": "notifications_title_case_subject",
"task_family": "single-line replacement",
"project": "notifications",
"source_files": ["emailer.py"],
"prompt": "Update build_subject so it starts with 'Report: ' instead of 'report: '.",
"repair_prompt": "Keep the change local to build_subject, but introduce a SUBJECT_PREFIX constant and use it from the function.",
"verify_command": ["python3", "verify.py", "notifications_title_case_subject"],
"allowed_changed_files": ["emailer.py"],
"max_changed_lines": 10,
"review_assertions": {
"must_contain": {
"emailer.py": ["SUBJECT_PREFIX = \"Report: \"", "return f\"{SUBJECT_PREFIX}{report_name}\""]
}
}
},
{
"id": "notifications_trim_body_name",
"task_family": "small local patch",
"project": "notifications",
"source_files": ["emailer.py"],
"prompt": "Make build_body trim surrounding whitespace from user_name and title-case it before building the greeting.",
"repair_prompt": "Preserve the function signature, keep the change local to build_body, and store the cleaned display name in a local variable named display_name.",
"verify_command": ["python3", "verify.py", "notifications_trim_body_name"],
"allowed_changed_files": ["emailer.py"],
"max_changed_lines": 10,
"review_assertions": {
"must_contain": {
"emailer.py": ["display_name = user_name.strip().title()", "greeting = f\"hello {display_name}\""]
}
}
},
{
"id": "notifications_digest_bullets",
"task_family": "whole-function rewrite",
"project": "notifications",
"source_files": ["emailer.py"],
"prompt": "Rewrite render_digest so it returns one bullet per item using '- ' prefixes and returns 'No updates.' when the list is empty.",
"repair_prompt": "Please keep the render_digest function name and signature exactly as they are, and use an explicit early return for the empty-list case.",
"verify_command": ["python3", "verify.py", "notifications_digest_bullets"],
"allowed_changed_files": ["emailer.py"],
"max_changed_lines": 14,
"review_assertions": {
"must_contain": {
"emailer.py": ["if not items:", "return \"No updates.\""]
}
}
}
]
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
def price_label(cents):
return f"€{cents / 100:.2f}"


def stock_label(count):
return "in stock" if count > 0 else "sold out"
27 changes: 27 additions & 0 deletions SecBenchSuite/Benchmarks/EditRepair/projects/catalog/verify.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
import importlib.util
import sys
from pathlib import Path


def load_module(name: str, file_name: str):
spec = importlib.util.spec_from_file_location(name, Path(file_name))
module = importlib.util.module_from_spec(spec)
assert spec.loader is not None
spec.loader.exec_module(module)
return module


models = load_module("models", "models.py")
view = load_module("view", "view.py")
task_id = sys.argv[1]

if task_id == "catalog_inventory_badge":
assert view.render_card("Mug", 1200, 3).startswith("[IN] Mug:")
assert view.render_card("Mug", 1200, 0).startswith("[OUT] Mug:")
elif task_id == "catalog_price_label_compact":
assert models.price_label(1200) == "€12"
assert models.price_label(1250) == "€12.50"
elif task_id == "catalog_render_card_multiline":
assert view.render_card("Lamp", 2500, 2) == "Lamp: €25.00\nin stock"
else:
raise AssertionError(f"Unknown task id: {task_id}")
5 changes: 5 additions & 0 deletions SecBenchSuite/Benchmarks/EditRepair/projects/catalog/view.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
from models import price_label, stock_label


def render_card(name, cents, count):
return f"{name}: {price_label(cents)} ({stock_label(count)})"
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
def build_subject(report_name):
return f"report: {report_name}"


def build_body(user_name, lines):
greeting = f"hello {user_name}"
joined = "\n".join(lines)
return f"{greeting}\n\n{joined}"


def render_digest(items):
return ", ".join(items)
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
import importlib.util
import sys
from pathlib import Path


def load_module(name: str, file_name: str):
spec = importlib.util.spec_from_file_location(name, Path(file_name))
module = importlib.util.module_from_spec(spec)
assert spec.loader is not None
spec.loader.exec_module(module)
return module


emailer = load_module("emailer", "emailer.py")
task_id = sys.argv[1]

if task_id == "notifications_title_case_subject":
assert emailer.build_subject("weekly") == "Report: weekly"
elif task_id == "notifications_trim_body_name":
body = emailer.build_body(" ada lovelace ", ["line 1", "line 2"])
assert body.startswith("hello Ada Lovelace")
elif task_id == "notifications_digest_bullets":
assert emailer.render_digest(["A", "B"]) == "- A\n- B"
assert emailer.render_digest([]) == "No updates."
else:
raise AssertionError(f"Unknown task id: {task_id}")
17 changes: 17 additions & 0 deletions SecBenchSuite/Benchmarks/EditRepair/projects/orders/pricing.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
DISCOUNTS = {
"SAVE10": 0.10,
"SAVE20": 0.20,
}


def subtotal(items):
total = 0
for item in items:
total += item["price_cents"] * item["quantity"]
return total


def apply_discount(total_cents, code):
if code in DISCOUNTS:
return int(total_cents * (1 - DISCOUNTS[code]))
return total_cents
10 changes: 10 additions & 0 deletions SecBenchSuite/Benchmarks/EditRepair/projects/orders/service.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
from pricing import subtotal, apply_discount


def quote_order(items, code=None):
total = subtotal(items)
total = apply_discount(total, code)
return {
"total_cents": total,
"display": f"{total / 100:.2f} EUR",
}
30 changes: 30 additions & 0 deletions SecBenchSuite/Benchmarks/EditRepair/projects/orders/verify.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
import importlib.util
import sys
from pathlib import Path


def load_module(name: str, file_name: str):
spec = importlib.util.spec_from_file_location(name, Path(file_name))
module = importlib.util.module_from_spec(spec)
assert spec.loader is not None
spec.loader.exec_module(module)
return module


pricing = load_module("pricing", "pricing.py")
service = load_module("service", "service.py")
task_id = sys.argv[1]

if task_id == "orders_ignore_negative_quantity":
items = [
{"price_cents": 200, "quantity": 2},
{"price_cents": 500, "quantity": -3},
]
assert pricing.subtotal(items) == 400
elif task_id == "orders_lowercase_discount_code":
assert pricing.apply_discount(1000, " save10 ") == 900
elif task_id == "orders_format_total_helper":
result = service.quote_order([{"price_cents": 1234, "quantity": 1}], None)
assert result["display"] == "€12.34"
else:
raise AssertionError(f"Unknown task id: {task_id}")
21 changes: 21 additions & 0 deletions SecBenchSuite/Benchmarks/EditRepair/projects/textkit/formatter.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
import re


def format_csv_row(values):
return ",".join(values)


def slugify(name):
text = name.strip().lower()
return re.sub(r"[^a-z0-9]+", "-", text).strip("-")


def build_preview(text, limit=20):
cleaned = text.strip()
if len(cleaned) <= limit:
return cleaned
return cleaned[:limit] + "..."


def build_profile_url(username):
return f"/users/{username}"
24 changes: 24 additions & 0 deletions SecBenchSuite/Benchmarks/EditRepair/projects/textkit/verify.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
import importlib.util
import sys
from pathlib import Path


def load_module(name: str, file_name: str):
spec = importlib.util.spec_from_file_location(name, Path(file_name))
module = importlib.util.module_from_spec(spec)
assert spec.loader is not None
spec.loader.exec_module(module)
return module


formatter = load_module("formatter", "formatter.py")
task_id = sys.argv[1]

if task_id == "textkit_csv_separator":
assert formatter.format_csv_row(["a", "b", "c"]) == "a;b;c"
elif task_id == "textkit_preview_helper":
assert formatter.build_preview(" alpha beta gamma ", limit=12) == "alpha beta g..."
elif task_id == "textkit_profile_url_quote":
assert formatter.build_profile_url("Ada Lovelace/notes") == "/users/Ada%20Lovelace%2Fnotes"
else:
raise AssertionError(f"Unknown task id: {task_id}")
Loading
Loading