diff --git a/.github/workflows/agentic-token-audit.md b/.github/workflows/agentic-token-audit.md index 5a4aef9..89c97d0 100644 --- a/.github/workflows/agentic-token-audit.md +++ b/.github/workflows/agentic-token-audit.md @@ -51,27 +51,46 @@ steps: set -euo pipefail mkdir -p /tmp/gh-aw/token-audit - # Download last 24 hours of agentic workflow logs as JSON - # Allow partial results — gh aw logs streams incrementally, so even if - # it hits an API rate limit partway through, the JSON written so far is - # still valid and should be processed by the agent. - LOGS_EXIT=0 - gh aw logs \ - --start-date -1d \ - --json \ - -c 100 \ - > /tmp/gh-aw/token-audit/workflow-logs.json || LOGS_EXIT=$? - - if [ -s /tmp/gh-aw/token-audit/workflow-logs.json ]; then - TOTAL=$(jq '.runs | length' /tmp/gh-aw/token-audit/workflow-logs.json) - echo "✅ Downloaded $TOTAL agentic workflow runs (last 24 hours)" - if [ "$LOGS_EXIT" -ne 0 ]; then - echo "⚠️ gh aw logs exited with code $LOGS_EXIT (partial results — likely API rate limit)" + # Download last 24 hours of agentic workflow logs as JSON, one workflow + # at a time. `gh aw logs` without a workflow filter scans repo-wide + # `gh run list` batches (newest-first, 250 runs each) and stops + # paginating as soon as one batch contains no processable agentic runs + # (skipped/cancelled runs are dropped before the empty-batch check — + # see github/gh-aw#38782). In a high-CI-volume repo a batch spans only + # a couple of hours, so the unfiltered call usually saw nothing but + # this run itself and reported an empty day. Workflow-scoped listing is + # unaffected by repo CI volume. Partial results are fine — each + # per-workflow file that was written successfully still gets merged. + PARTS_DIR=/tmp/gh-aw/token-audit/log-parts + mkdir -p "$PARTS_DIR" + + for lock in .github/workflows/*.lock.yml; do + id=$(basename "$lock" .lock.yml) + PART_EXIT=0 + gh aw logs "$id" \ + --start-date -1d \ + --json \ + -c 100 \ + > "$PARTS_DIR/$id.json" || PART_EXIT=$? + if [ -s "$PARTS_DIR/$id.json" ]; then + COUNT=$(jq '.runs | length' "$PARTS_DIR/$id.json" 2>/dev/null || echo 0) + echo "✅ $id: $COUNT runs (exit code $PART_EXIT)" + else + echo "⚠️ $id: no log data (exit code $PART_EXIT)" + rm -f "$PARTS_DIR/$id.json" fi + done + + if ls "$PARTS_DIR"/*.json >/dev/null 2>&1; then + jq -s '{summary: {}, runs: (map(.runs // []) | add | unique_by(.run_id))}' \ + "$PARTS_DIR"/*.json > /tmp/gh-aw/token-audit/workflow-logs.json else - echo "❌ No log data downloaded (exit code $LOGS_EXIT)" + echo "❌ No log data downloaded for any workflow" echo '{"runs":[],"summary":{}}' > /tmp/gh-aw/token-audit/workflow-logs.json fi + + TOTAL=$(jq '.runs | length' /tmp/gh-aw/token-audit/workflow-logs.json) + echo "✅ Merged $TOTAL agentic workflow runs (last 24 hours)" timeout-minutes: 25 --- diff --git a/.github/workflows/agentic-token-optimizer.md b/.github/workflows/agentic-token-optimizer.md index 6117f5e..df8d00d 100644 --- a/.github/workflows/agentic-token-optimizer.md +++ b/.github/workflows/agentic-token-optimizer.md @@ -40,24 +40,58 @@ steps: echo "📥 Downloading agentic workflow logs (last 7 days)..." - LOGS_EXIT=0 - gh aw logs \ - --start-date -7d \ - --json \ - -c 50 \ - > /tmp/gh-aw/token-audit/all-runs.json || LOGS_EXIT=$? - - if [ -s /tmp/gh-aw/token-audit/all-runs.json ]; then - TOTAL=$(jq '.runs | length' /tmp/gh-aw/token-audit/all-runs.json) - echo "✅ Downloaded $TOTAL agentic workflow runs (last 7 days)" - if [ "$LOGS_EXIT" -ne 0 ]; then - echo "⚠️ gh aw logs exited with code $LOGS_EXIT (partial results — likely API rate limit)" + # Fetch logs one workflow at a time. `gh aw logs` without a workflow + # filter scans repo-wide `gh run list` batches (newest-first, 250 runs + # each) and stops paginating as soon as one batch contains no + # processable agentic runs (skipped/cancelled runs are dropped before + # the empty-batch check — see github/gh-aw#38782). In a high-CI-volume + # repo a batch spans only a couple of hours, so the unfiltered call + # truncates the candidate pool to whatever ran most recently. + # Workflow-scoped listing is unaffected by repo CI volume. Partial + # results are fine — each per-workflow file that was written + # successfully still gets merged. + PARTS_DIR=/tmp/gh-aw/token-audit/log-parts + mkdir -p "$PARTS_DIR" + + for lock in .github/workflows/*.lock.yml; do + id=$(basename "$lock" .lock.yml) + PART_EXIT=0 + gh aw logs "$id" \ + --start-date -7d \ + --json \ + -c 50 \ + > "$PARTS_DIR/$id.json" || PART_EXIT=$? + if [ -s "$PARTS_DIR/$id.json" ]; then + COUNT=$(jq '.runs | length' "$PARTS_DIR/$id.json" 2>/dev/null || echo 0) + echo "✅ $id: $COUNT runs (exit code $PART_EXIT)" + else + echo "⚠️ $id: no log data (exit code $PART_EXIT)" + rm -f "$PARTS_DIR/$id.json" fi + done + + if ls "$PARTS_DIR"/*.json >/dev/null 2>&1; then + jq -s '{summary: {}, runs: (map(.runs // []) | add | unique_by(.run_id))}' \ + "$PARTS_DIR"/*.json > /tmp/gh-aw/token-audit/all-runs.json else - echo "❌ No log data downloaded (exit code $LOGS_EXIT)" + echo "❌ No log data downloaded for any workflow" echo '{"runs":[],"summary":{}}' > /tmp/gh-aw/token-audit/all-runs.json fi + TOTAL=$(jq '.runs | length' /tmp/gh-aw/token-audit/all-runs.json) + echo "✅ Merged $TOTAL agentic workflow runs (last 7 days)" + + # Exclude the AIC monitoring family (this optimizer + its sibling audit) from the + # candidate pool so the optimizer never selects its own meta-monitoring workflows. + # The in-prompt "Token in the name" guard misses these: their display names are + # "...AIC Usage Optimizer/Audit" (no "Token"), so match on workflow id/name here. + jq '.runs |= map(select( + (((.workflow_path // "") | test("agentic-token-(optimizer|audit)")) + or ((.workflow_name // "") | test("AIC Usage (Optimizer|Audit)"))) | not + ))' /tmp/gh-aw/token-audit/all-runs.json > /tmp/gh-aw/token-audit/all-runs.filtered.json \ + && mv /tmp/gh-aw/token-audit/all-runs.filtered.json /tmp/gh-aw/token-audit/all-runs.json + echo "🚫 Excluded AIC monitoring family — $(jq '.runs | length' /tmp/gh-aw/token-audit/all-runs.json) runs remain in candidate pool" + - name: Aggregate top workflows by AIC usage run: | set -euo pipefail @@ -161,7 +195,7 @@ Treat missing numeric fields (`aic`, `token_usage`, `turns`, `action_minutes`) a - Start from `top-workflows.json`. - Exclude workflows optimized in the last 14 days (use `optimization-log.json`). -- Exclude workflows with "Token" in the name to avoid self-targeting. +- Exclude the AIC monitoring family — the `agentic-token-optimizer` and `agentic-token-audit` workflows (display names "Agentic Workflow AIC Usage Optimizer" / "Daily Agentic Workflow AIC Usage Audit") — to avoid self-targeting. These are also pre-filtered from `all-runs.json`/`top-workflows.json`, but never select them even if a stale snapshot still lists them. - Choose the highest AI-credit-spend workflow that remains. - If no snapshot/history exists, derive candidates directly from `all-runs.json`. diff --git a/workflows/agentic-token-audit.md b/workflows/agentic-token-audit.md index 5a4aef9..89c97d0 100644 --- a/workflows/agentic-token-audit.md +++ b/workflows/agentic-token-audit.md @@ -51,27 +51,46 @@ steps: set -euo pipefail mkdir -p /tmp/gh-aw/token-audit - # Download last 24 hours of agentic workflow logs as JSON - # Allow partial results — gh aw logs streams incrementally, so even if - # it hits an API rate limit partway through, the JSON written so far is - # still valid and should be processed by the agent. - LOGS_EXIT=0 - gh aw logs \ - --start-date -1d \ - --json \ - -c 100 \ - > /tmp/gh-aw/token-audit/workflow-logs.json || LOGS_EXIT=$? - - if [ -s /tmp/gh-aw/token-audit/workflow-logs.json ]; then - TOTAL=$(jq '.runs | length' /tmp/gh-aw/token-audit/workflow-logs.json) - echo "✅ Downloaded $TOTAL agentic workflow runs (last 24 hours)" - if [ "$LOGS_EXIT" -ne 0 ]; then - echo "⚠️ gh aw logs exited with code $LOGS_EXIT (partial results — likely API rate limit)" + # Download last 24 hours of agentic workflow logs as JSON, one workflow + # at a time. `gh aw logs` without a workflow filter scans repo-wide + # `gh run list` batches (newest-first, 250 runs each) and stops + # paginating as soon as one batch contains no processable agentic runs + # (skipped/cancelled runs are dropped before the empty-batch check — + # see github/gh-aw#38782). In a high-CI-volume repo a batch spans only + # a couple of hours, so the unfiltered call usually saw nothing but + # this run itself and reported an empty day. Workflow-scoped listing is + # unaffected by repo CI volume. Partial results are fine — each + # per-workflow file that was written successfully still gets merged. + PARTS_DIR=/tmp/gh-aw/token-audit/log-parts + mkdir -p "$PARTS_DIR" + + for lock in .github/workflows/*.lock.yml; do + id=$(basename "$lock" .lock.yml) + PART_EXIT=0 + gh aw logs "$id" \ + --start-date -1d \ + --json \ + -c 100 \ + > "$PARTS_DIR/$id.json" || PART_EXIT=$? + if [ -s "$PARTS_DIR/$id.json" ]; then + COUNT=$(jq '.runs | length' "$PARTS_DIR/$id.json" 2>/dev/null || echo 0) + echo "✅ $id: $COUNT runs (exit code $PART_EXIT)" + else + echo "⚠️ $id: no log data (exit code $PART_EXIT)" + rm -f "$PARTS_DIR/$id.json" fi + done + + if ls "$PARTS_DIR"/*.json >/dev/null 2>&1; then + jq -s '{summary: {}, runs: (map(.runs // []) | add | unique_by(.run_id))}' \ + "$PARTS_DIR"/*.json > /tmp/gh-aw/token-audit/workflow-logs.json else - echo "❌ No log data downloaded (exit code $LOGS_EXIT)" + echo "❌ No log data downloaded for any workflow" echo '{"runs":[],"summary":{}}' > /tmp/gh-aw/token-audit/workflow-logs.json fi + + TOTAL=$(jq '.runs | length' /tmp/gh-aw/token-audit/workflow-logs.json) + echo "✅ Merged $TOTAL agentic workflow runs (last 24 hours)" timeout-minutes: 25 --- diff --git a/workflows/agentic-token-optimizer.md b/workflows/agentic-token-optimizer.md index 6117f5e..df8d00d 100644 --- a/workflows/agentic-token-optimizer.md +++ b/workflows/agentic-token-optimizer.md @@ -40,24 +40,58 @@ steps: echo "📥 Downloading agentic workflow logs (last 7 days)..." - LOGS_EXIT=0 - gh aw logs \ - --start-date -7d \ - --json \ - -c 50 \ - > /tmp/gh-aw/token-audit/all-runs.json || LOGS_EXIT=$? - - if [ -s /tmp/gh-aw/token-audit/all-runs.json ]; then - TOTAL=$(jq '.runs | length' /tmp/gh-aw/token-audit/all-runs.json) - echo "✅ Downloaded $TOTAL agentic workflow runs (last 7 days)" - if [ "$LOGS_EXIT" -ne 0 ]; then - echo "⚠️ gh aw logs exited with code $LOGS_EXIT (partial results — likely API rate limit)" + # Fetch logs one workflow at a time. `gh aw logs` without a workflow + # filter scans repo-wide `gh run list` batches (newest-first, 250 runs + # each) and stops paginating as soon as one batch contains no + # processable agentic runs (skipped/cancelled runs are dropped before + # the empty-batch check — see github/gh-aw#38782). In a high-CI-volume + # repo a batch spans only a couple of hours, so the unfiltered call + # truncates the candidate pool to whatever ran most recently. + # Workflow-scoped listing is unaffected by repo CI volume. Partial + # results are fine — each per-workflow file that was written + # successfully still gets merged. + PARTS_DIR=/tmp/gh-aw/token-audit/log-parts + mkdir -p "$PARTS_DIR" + + for lock in .github/workflows/*.lock.yml; do + id=$(basename "$lock" .lock.yml) + PART_EXIT=0 + gh aw logs "$id" \ + --start-date -7d \ + --json \ + -c 50 \ + > "$PARTS_DIR/$id.json" || PART_EXIT=$? + if [ -s "$PARTS_DIR/$id.json" ]; then + COUNT=$(jq '.runs | length' "$PARTS_DIR/$id.json" 2>/dev/null || echo 0) + echo "✅ $id: $COUNT runs (exit code $PART_EXIT)" + else + echo "⚠️ $id: no log data (exit code $PART_EXIT)" + rm -f "$PARTS_DIR/$id.json" fi + done + + if ls "$PARTS_DIR"/*.json >/dev/null 2>&1; then + jq -s '{summary: {}, runs: (map(.runs // []) | add | unique_by(.run_id))}' \ + "$PARTS_DIR"/*.json > /tmp/gh-aw/token-audit/all-runs.json else - echo "❌ No log data downloaded (exit code $LOGS_EXIT)" + echo "❌ No log data downloaded for any workflow" echo '{"runs":[],"summary":{}}' > /tmp/gh-aw/token-audit/all-runs.json fi + TOTAL=$(jq '.runs | length' /tmp/gh-aw/token-audit/all-runs.json) + echo "✅ Merged $TOTAL agentic workflow runs (last 7 days)" + + # Exclude the AIC monitoring family (this optimizer + its sibling audit) from the + # candidate pool so the optimizer never selects its own meta-monitoring workflows. + # The in-prompt "Token in the name" guard misses these: their display names are + # "...AIC Usage Optimizer/Audit" (no "Token"), so match on workflow id/name here. + jq '.runs |= map(select( + (((.workflow_path // "") | test("agentic-token-(optimizer|audit)")) + or ((.workflow_name // "") | test("AIC Usage (Optimizer|Audit)"))) | not + ))' /tmp/gh-aw/token-audit/all-runs.json > /tmp/gh-aw/token-audit/all-runs.filtered.json \ + && mv /tmp/gh-aw/token-audit/all-runs.filtered.json /tmp/gh-aw/token-audit/all-runs.json + echo "🚫 Excluded AIC monitoring family — $(jq '.runs | length' /tmp/gh-aw/token-audit/all-runs.json) runs remain in candidate pool" + - name: Aggregate top workflows by AIC usage run: | set -euo pipefail @@ -161,7 +195,7 @@ Treat missing numeric fields (`aic`, `token_usage`, `turns`, `action_minutes`) a - Start from `top-workflows.json`. - Exclude workflows optimized in the last 14 days (use `optimization-log.json`). -- Exclude workflows with "Token" in the name to avoid self-targeting. +- Exclude the AIC monitoring family — the `agentic-token-optimizer` and `agentic-token-audit` workflows (display names "Agentic Workflow AIC Usage Optimizer" / "Daily Agentic Workflow AIC Usage Audit") — to avoid self-targeting. These are also pre-filtered from `all-runs.json`/`top-workflows.json`, but never select them even if a stale snapshot still lists them. - Choose the highest AI-credit-spend workflow that remains. - If no snapshot/history exists, derive candidates directly from `all-runs.json`.