From c9f4ff29156944deed43b8cf4952be96252934c6 Mon Sep 17 00:00:00 2001
From: Scott Brumley <sbrumley@paloaltonetworks.com>
Date: Tue, 16 Jun 2026 15:04:32 -0400
Subject: [PATCH] =?UTF-8?q?fix(auto-triage):=20close=20eligible=20cases=20?=
 =?UTF-8?q?reliably=20=E2=80=94=20offset=20paging,=20in-loop=20close,=20fa?=
 =?UTF-8?q?il-closed=20guards?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

JOB - Auto Triage was selecting cases but draining almost nothing (~70/hr).
Root causes were stacked: it scanned a slice that never reached the low-score
backlog, and the close path couldn't keep up with selection.

SOCAutoTriageScoreFilter.py
- Replace keyset-on-creation_time paging with offset pagination. get_incidents
  has no sort parameter (sort_by_creation_time is silently ignored), so keyset
  walking skipped the eligible backlog entirely (201 scanned / 0 passed).
- Close in-loop: per-case update_incident in a tight Python loop. The API has
  no bulk close (incident_id_list is rejected 400), and the old task 8 forEach
  spun a separatecontext per case — that was the throughput bottleneck.
- Write one execution-dataset row per actual close result instead of per
  selection, ending the re-selection inflation (~40k rows for ~563 closes).
- Record every skip with a reason; score skips were silently dropped and hid
  the real cause for a full session.
- Wall-clock runtime guard (540s, under the 600s automation timeout); partial
  runs resume next schedule. Replaces max_batches=200 that timed out under
  offset paging. Default max_batches 200 -> 20.
- Add fail-closed status backstop (skip unless status is confirmed 'new'),
  mirroring the starred backstop, so in-progress cases can never be closed.

SOCAutoTriageScoreFilter.yml
- Declare dry_run and batch_size args (were being stripped at invocation).
- Update outputs/contract to the close-result fields.

JOB_-_Auto_Triage_V3.yml
- Remove task 5 (condition) and task 8 (per-case forEach close playbook);
  task 12 now closes in-process and flows straight to close-job.
---
 .../Playbooks/JOB_-_Auto_Triage_V3.yml        | 108 +------
 .../SOCAutoTriageScoreFilter.py               | 303 +++++++++++-------
 .../SOCAutoTriageScoreFilter.yml              |  59 +++-
 docs/soc-optimization-unified/overview.md     |   4 +-
 4 files changed, 253 insertions(+), 221 deletions(-)

diff --git a/Packs/soc-optimization-unified/Playbooks/JOB_-_Auto_Triage_V3.yml b/Packs/soc-optimization-unified/Playbooks/JOB_-_Auto_Triage_V3.yml
index 6276dde0..4d0205ca 100644
--- a/Packs/soc-optimization-unified/Playbooks/JOB_-_Auto_Triage_V3.yml
+++ b/Packs/soc-optimization-unified/Playbooks/JOB_-_Auto_Triage_V3.yml
@@ -51,50 +51,6 @@ tasks:
     isoversize: false
     isautoswitchedtoquietmode: false
 
-  "5":
-    id: "5"
-    taskid: 11786de0-5a13-412d-8a8a-1ab69922d510
-    type: condition
-    task:
-      id: 11786de0-5a13-412d-8a8a-1ab69922d510
-      version: -1
-      name: Cases to Triage?
-      type: condition
-      iscommand: false
-      brand: ""
-      playbooktaskmissingcomponent: null
-      istaskmissingcomponenterrordismissed: false
-    nexttasks:
-      '#default#':
-        - "9"
-      "yes":
-        - "8"
-    separatecontext: false
-    conditions:
-      - label: "yes"
-        condition:
-          - - operator: isExists
-              left:
-                value:
-                  simple: AutoTriage.filtered_incidents.incident_id
-                iscontext: true
-              right:
-                value: {}
-    continueonerrortype: ""
-    view: |-
-      {
-        "position": {
-          "x": 50,
-          "y": 420
-        }
-      }
-    note: false
-    timertriggers: []
-    ignoreworker: false
-    skipunavailable: false
-    quietmode: 0
-    isoversize: false
-    isautoswitchedtoquietmode: false
   "7":
     id: "7"
     taskid: 6ae6dff9-3719-4b2e-8758-d1cf54a24c13
@@ -124,52 +80,6 @@ tasks:
     quietmode: 0
     isoversize: false
     isautoswitchedtoquietmode: false
-  "8":
-    id: "8"
-    taskid: 481cd8d5-7bca-4923-8cd0-77d48d883e8f
-    type: playbook
-    task:
-      id: 481cd8d5-7bca-4923-8cd0-77d48d883e8f
-      version: -1
-      name: SOC Close Cases_V3
-      playbookName: SOC Close Cases_V3
-      type: playbook
-      iscommand: false
-      brand: ""
-      playbooktaskmissingcomponent: null
-      istaskmissingcomponenterrordismissed: false
-    nexttasks:
-      '#none#':
-        - "9"
-    scriptarguments:
-      incident_id:
-        simple: ${AutoTriage.filtered_incidents.incident_id}
-      resolve_comment:
-        simple: 'SOC Framework Auto Triage: case exceeded age threshold, aggregated_score
-          below threshold, no analyst activity detected. Auto-closed by JOB.'
-    separatecontext: true
-    continueonerror: true
-    continueonerrortype: ""
-    loop:
-      iscommand: false
-      exitCondition: ""
-      wait: 1
-      max: 500
-      forEach: true
-    view: |-
-      {
-        "position": {
-          "x": 162.5,
-          "y": 610
-        }
-      }
-    note: false
-    timertriggers: []
-    ignoreworker: false
-    skipunavailable: false
-    quietmode: 0
-    isoversize: false
-    isautoswitchedtoquietmode: false
   "9":
     id: "9"
     taskid: ecad854e-0b30-4494-8c61-183d70ee7952
@@ -211,12 +121,12 @@ tasks:
     task:
       id: ba55380b-2e2c-44ef-8fcf-d828bfbfa09a
       version: -1
-      name: Fetch and Filter Cases by Score Threshold
-      description: Fetches unstarred new cases via core-api-post in paginated batches
-        of 100, sorted by creation_time asc. Skips cases with aggregated_score above
-        TriageScoreThreshold. Applies age window filter (TriageWindowHours) and skips
-        analyst-touched cases. Stops when eligible cases are found or max_batches
-        reached. Passes only eligible cases to the close loop.
+      name: Fetch, Filter, and Close Cases by Score Threshold
+      description: Fetches unstarred new cases via core-api-post using offset
+        pagination, gated by TriageScoreThreshold / TriageWindowHours / no analyst
+        activity, then closes each eligible case in-process via update_incident
+        (one ID per call) and writes one execution-dataset row per close result.
+        A wall-clock budget caps the run; partial runs resume next schedule.
       script: SOCAutoTriageScoreFilter
       type: regular
       iscommand: false
@@ -225,7 +135,7 @@ tasks:
       istaskmissingcomponenterrordismissed: false
     nexttasks:
       '#none#':
-        - "5"
+        - "9"
     scriptarguments:
       score_threshold:
         complex:
@@ -327,9 +237,7 @@ tasks:
 system: true
 view: |-
   {
-    "linkLabelsPosition": {
-      "5_9_#default#": 0.82
-    },
+    "linkLabelsPosition": {},
     "paper": {
       "dimensions": {
         "height": 1170,
diff --git a/Packs/soc-optimization-unified/Scripts/SOCAutoTriageScoreFilter/SOCAutoTriageScoreFilter.py b/Packs/soc-optimization-unified/Scripts/SOCAutoTriageScoreFilter/SOCAutoTriageScoreFilter.py
index bdab1d37..c7e88687 100644
--- a/Packs/soc-optimization-unified/Scripts/SOCAutoTriageScoreFilter/SOCAutoTriageScoreFilter.py
+++ b/Packs/soc-optimization-unified/Scripts/SOCAutoTriageScoreFilter/SOCAutoTriageScoreFilter.py
@@ -5,18 +5,55 @@
 
 
 # 100 is the get_incidents per-request ceiling. War-room arrays may show a
-# cosmetic truncation note at this size, but the forEach close loop is unaffected.
+# cosmetic truncation note at this size; it does not affect processing.
 BATCH_SIZE = 100
-# How many incident IDs to bundle into a single bulk update_incident call.
-# Each emitted batch is a ready-to-post JSON array string for incident_id_list.
-CLOSE_CHUNK_SIZE = 100
-API_URI = '/public_api/v1/incidents/get_incidents'
+GET_INCIDENTS_URI = '/public_api/v1/incidents/get_incidents'
+UPDATE_INCIDENT_URI = '/public_api/v1/incidents/update_incident'
+# update_incident has NO bulk mode: posting incident_id_list returns
+# 400 "incident_id field is missing or incorrect" (verified against tenant,
+# Jun 2026). We close one ID per call, in a tight Python loop here rather than a
+# playbook forEach, to avoid per-iteration context spin-up.
+RESOLVE_STATUS = 'resolved_other'
+RESOLVE_COMMENT = ('SOC Framework Auto Triage: case exceeded age threshold, '
+                   'aggregated_score below threshold, no analyst activity '
+                   'detected. Auto-closed by JOB.')
+API_URI = GET_INCIDENTS_URI  # back-compat alias
+# Per-run wall-clock budget. Two O(n) costs share it: offset pagination during
+# fetch (deep search_from makes get_incidents progressively slower) and the
+# per-case close loop (one update_incident call each). When the budget is hit
+# the run stops cleanly and returns what it has — partial progress is safe
+# because closed cases leave status=new and the next scheduled run resumes.
+#
+# MUST stay safely under the script's automation timeout (Settings > the
+# automation's timeout). Leave ~10% headroom for the dataset write + return.
+# Current automation timeout: 600s.
+MAX_RUNTIME_SECONDS = 540
 FIELDS = [
     'incident_id', 'aggregated_score', 'creation_time',
     'status', 'starred', 'manual_score'
 ]
 
 
+def close_case(incident_id):
+    """Close one case via update_incident (single incident_id — the API rejects
+    incident_id_list). Returns (success: bool, error_message: str). Never raises;
+    one failed close must not abort the rest of the batch."""
+    body = json.dumps({
+        'request_data': {
+            'incident_id': str(incident_id),
+            'update_data': {
+                'status': RESOLVE_STATUS,
+                'resolve_comment': RESOLVE_COMMENT
+            }
+        }
+    })
+    try:
+        execute_command('core-api-post', {'uri': UPDATE_INCIDENT_URI, 'body': body})
+        return True, ''
+    except Exception as e:
+        return False, str(e)
+
+
 def _to_float(value):
     """Best-effort float; returns None if not parseable."""
     try:
@@ -42,35 +79,38 @@ def _is_unstarred(value):
     return False
 
 
-def fetch_batch(upper_ms: int, batch_size: int) -> dict:
-    """Fetch the newest unstarred New incidents created at or before upper_ms.
-
-    KEYSET pagination: callers walk the age-eligible set downward by
-    creation_time, always querying from offset 0 with a shrinking creation_time
-    upper bound. This keeps every call O(batch_size) instead of O(offset) — deep
-    offset pagination (search_from=30000) makes the API skip every preceding row
-    and gets progressively slower, which is what times the job out on large
-    backlogs. There is no growing search_from here.
-
-    Contract notes (confirmed):
-      - status value is the lowercase public-API enum: "new" (the Cases UI
-        capitalizes "New" for display, but the get_incidents filter enum is
-        lowercase). Sending "New" matches zero rows.
+def fetch_batch(cutoff_ms: int, search_from: int, batch_size: int) -> dict:
+    """Fetch one page of unstarred New incidents created at or before cutoff_ms.
+
+    OFFSET pagination. get_incidents has NO sort capability — any sort_by_*
+    key in request_data is silently ignored (verified against the tenant API,
+    Jun 2026), so results come back in arbitrary order. Keyset pagination on
+    creation_time is therefore unsafe: on an unordered page, advancing a cursor
+    below the page's minimum creation_time skips every un-returned row above it,
+    which is exactly how the age-eligible low-score backlog went unscanned.
+    Plain offset pagination (fixed filter, growing search_from) is the only way
+    to cover the whole age-eligible set when the API will not order results.
+    Deep offsets do get progressively slower, but max_batches bounds the cost
+    and closed cases drop out of status=new between runs, so the offset window
+    keeps advancing through fresh cases run over run.
+
+    Contract notes (confirmed against tenant, Jun 2026):
+      - status enum is case-insensitive here: both "new" and "New" match (the
+        earlier "New matches zero rows" claim was wrong).
       - `in` operator -> array value; `lte` -> scalar value.
+      - creation_time is epoch milliseconds; filterable server-side.
       - aggregated_score is NOT filterable server-side -> gated client-side.
-      - `sort_by_creation_time` is the documented working sort key.
     """
     body = json.dumps({
         'request_data': {
             'filters': [
                 {'field': 'status', 'operator': 'in', 'value': ['new']},
                 {'field': 'starred', 'operator': 'in', 'value': [False]},
-                {'field': 'creation_time', 'operator': 'lte', 'value': int(upper_ms)},
+                {'field': 'creation_time', 'operator': 'lte', 'value': int(cutoff_ms)},
             ],
             'fields': FIELDS,
-            'sort_by_creation_time': 'desc',
-            'search_from': 0,
-            'search_to': batch_size
+            'search_from': int(search_from),
+            'search_to': int(search_from) + int(batch_size)
         }
     })
     result = execute_command('core-api-post', {'uri': API_URI, 'body': body})
@@ -86,11 +126,12 @@ def main():
     # pass score_threshold explicitly so the policy lives in one obvious place.
     threshold = args.get('score_threshold', '40')
     window_hours = args.get('window_hours', '6')
-    # Default high enough to page the whole age-eligible backlog to exhaustion
-    # on any tenant (the loop stops on its own when a short page comes back).
-    # This is the safety ceiling, not the normal cost. For this to apply, the
-    # JOB task must NOT pass a literal max_batches (a typed value overrides it).
-    max_batches = args.get('max_batches', '200')
+    # max_batches caps offset depth per run. With offset pagination each batch
+    # is O(offset), so this is a COST ceiling, not a "drain the whole backlog in
+    # one run" knob — large backlogs drain over repeated scheduled runs as closed
+    # cases leave status=new. Keep it modest (deepest offset = max_batches*100);
+    # the wall-clock guard (MAX_RUNTIME_SECONDS) is the real timeout backstop.
+    max_batches = args.get('max_batches', '20')
 
     threshold = _to_float(threshold)
     if threshold is None:
@@ -103,9 +144,9 @@ def main():
     try:
         max_batches = int(max_batches)
     except (ValueError, TypeError):
-        max_batches = 200
+        max_batches = 20
     if max_batches < 1:
-        max_batches = 200
+        max_batches = 20
 
     # batch_size is tunable but hard-capped at the get_incidents per-request
     # ceiling of 100, and floored at 1. Junk/blank falls back to BATCH_SIZE.
@@ -126,12 +167,21 @@ def main():
     skipped = []
     total_scanned = 0
     batches_run = 0
-    seen_ids = set()
-    cursor_ms = cutoff_ms  # creation_time upper bound; walks downward each batch
+    search_from = 0  # offset into the age-eligible result set; grows each batch
+    run_start = time.time()
+    budget_hit = False
 
     for batch_num in range(max_batches):
+        # Wall-clock guard: stop before the Docker automation timeout. Returning
+        # partial progress is safe — the JOB closes what we found, those cases
+        # leave status=new, and the next run resumes from a shallow offset.
+        if time.time() - run_start > MAX_RUNTIME_SECONDS:
+            budget_hit = True
+            demisto.debug(f'Runtime budget {MAX_RUNTIME_SECONDS}s hit after '
+                          f'{batches_run} batches; stopping with partial progress.')
+            break
         try:
-            result = fetch_batch(cursor_ms, batch_size)
+            result = fetch_batch(cutoff_ms, search_from, batch_size)
         except Exception as e:
             # If the very first fetch fails we have scanned nothing — that is an
             # auth/API failure (e.g. 401 unauthorized), NOT an empty backlog.
@@ -154,23 +204,14 @@ def main():
             demisto.debug(f'Batch {batch_num}: no incidents returned, stopping.')
             break
 
-        # Dedup against the boundary record(s) carried over by the <= cursor.
-        new_incidents = [i for i in incidents
-                         if str(i.get('incident_id', '')) not in seen_ids]
-        if not new_incidents:
-            # No forward progress (entire page already seen) -> stop.
-            break
-
         batches_run += 1
-        total_scanned += len(new_incidents)
-        page_min_ct = None
+        total_scanned += len(incidents)
 
-        for inc in new_incidents:
+        for inc in incidents:
             # One malformed incident must never abort the run and leave the rest
             # of the backlog unprocessed.
             try:
                 incident_id = inc.get('incident_id', 'unknown')
-                seen_ids.add(str(incident_id))
                 aggregated_score = _to_float(inc.get('aggregated_score'))
                 manual_score = inc.get('manual_score')
                 creation_time = inc.get('creation_time', 0)
@@ -178,8 +219,6 @@ def main():
                     creation_time = int(creation_time)
                 except (ValueError, TypeError):
                     creation_time = 0
-                if creation_time and (page_min_ct is None or creation_time < page_min_ct):
-                    page_min_ct = creation_time
 
                 # HARD SAFETY BACKSTOP — never auto-close a starred case.
                 # This does not trust the server-side starred filter; it
@@ -194,6 +233,21 @@ def main():
                     })
                     continue
 
+                # HARD SAFETY BACKSTOP — never close a case that isn't still 'new'.
+                # Mirrors the starred backstop: does not trust the server-side
+                # status filter, re-confirms from the returned record, and fails
+                # closed. An in-progress / under-investigation / resolved case can
+                # never reach the close call even if the server filter ever returns
+                # one. Confirmed: get_incidents returns status as lowercase 'new'.
+                status_val = str(inc.get('status', '')).strip().lower()
+                if status_val != 'new':
+                    skipped.append({
+                        'incident_id': incident_id,
+                        'aggregated_score': aggregated_score,
+                        'reason': f"status guard: status={inc.get('status')!r} not confirmed 'new'"
+                    })
+                    continue
+
                 # Skip if an analyst manually scored it (null unless set).
                 if manual_score is not None:
                     skipped.append({
@@ -203,8 +257,25 @@ def main():
                     })
                     continue
 
-                # Skip if score is missing or above threshold.
-                if aggregated_score is None or aggregated_score > threshold:
+                # Skip if the score is missing. RECORDED (was a silent continue):
+                # a silent drop here made 201 scanned / 0 passed / 0 skipped
+                # indistinguishable from "missing score field" vs "above
+                # threshold" and hid the real cause for a whole session.
+                if aggregated_score is None:
+                    skipped.append({
+                        'incident_id': incident_id,
+                        'aggregated_score': None,
+                        'reason': 'aggregated_score missing/None on returned case'
+                    })
+                    continue
+
+                # Skip if the score is above threshold. RECORDED for the same reason.
+                if aggregated_score > threshold:
+                    skipped.append({
+                        'incident_id': incident_id,
+                        'aggregated_score': aggregated_score,
+                        'reason': f'aggregated_score {aggregated_score} > threshold {threshold}'
+                    })
                     continue
 
                 # Defensive client-side age guard (the server-side filter already
@@ -222,38 +293,68 @@ def main():
                 demisto.debug(f"Skipping incident {inc.get('incident_id', 'unknown')}: {e}")
                 continue
 
-        # Fewer than a full page means the eligible set is exhausted.
+        # A short page means the age-eligible set is exhausted.
         if len(incidents) < batch_size:
             break
-        # Advance the cursor strictly below the oldest creation_time seen so the
-        # next page is the next-older slice (keyset, offset stays 0).
-        if page_min_ct is None:
-            break
-        cursor_ms = page_min_ct - 1
-
-    # Write one row per passed incident to the active execution dataset. In a
-    # dry run these are tagged as shadow so dashboards/audits can see what the
-    # job WOULD have closed without it actually closing anything.
-    if passed:
-        rows = []
-        for inc in passed:
-            rows.append({
-                'timestamp': str(int(time.time())),
-                'event_type': 'auto_triage',
-                'universal_command': 'auto_close_incident',
-                'action_taken': 'auto_triage_would_close' if dry_run else 'auto_triage_closed',
-                'action_status': 'dry_run' if dry_run else 'success',
-                'execution_mode': 'shadow' if dry_run else 'production',
-                'shadow_mode_state': 'shadow' if dry_run else 'not_applicable',
-                'lifecycle': 'AUTO_TRIAGE',
-                'phase': 'triage',
-                'incident_id': str(inc.get('incident_id', '')),
-                'aggregated_score': str(inc.get('aggregated_score', '')),
-                'tags': ['auto_triage_would_close' if dry_run else 'auto_triage_closed'],
-                'has_error': False,
-                'error_type': '',
-                'error_message': ''
-            })
+        # Advance the offset to the next page (filter and order are stable within
+        # a run because nothing is closed until the script returns).
+        search_from += batch_size
+
+    # --- Close phase ---------------------------------------------------------
+    # Close each passing case here, in-process, one update_incident call per ID
+    # (the API has no bulk close). Doing it in this loop instead of a playbook
+    # forEach avoids per-iteration context spin-up — the old task 8 bottleneck
+    # that selected ~1,300/run but only closed dozens. Each dataset row is now
+    # keyed to the ACTUAL close result, so the dataset stops over-counting
+    # un-closed cases that get re-selected every run.
+    rows = []
+    closed_ok = []
+    closed_fail = []
+
+    for inc in passed:
+        incident_id = str(inc.get('incident_id', ''))
+        if not incident_id:
+            continue
+
+        if dry_run:
+            # Select only — close nothing. Row tagged shadow so the shadow
+            # value-metrics path can show what WOULD have closed.
+            success, err = True, ''
+        else:
+            # Same wall-clock budget guards the (slower) close loop. Unclosed
+            # passers stay status=new; the next scheduled run resumes them.
+            if time.time() - run_start > MAX_RUNTIME_SECONDS:
+                budget_hit = True
+                demisto.debug(f'Runtime budget hit during close phase after '
+                              f'{len(closed_ok)} closes; stopping with partial progress.')
+                break
+            success, err = close_case(incident_id)
+
+        if success:
+            closed_ok.append(incident_id)
+        else:
+            closed_fail.append({'incident_id': incident_id, 'error': err})
+
+        rows.append({
+            'timestamp': str(int(time.time())),
+            'event_type': 'auto_triage',
+            'universal_command': 'auto_close_incident',
+            'action_taken': 'auto_triage_would_close' if dry_run else 'auto_triage_closed',
+            'action_status': 'dry_run' if dry_run else ('success' if success else 'error'),
+            'execution_mode': 'shadow' if dry_run else 'production',
+            'shadow_mode_state': 'shadow' if dry_run else 'not_applicable',
+            'lifecycle': 'AUTO_TRIAGE',
+            'phase': 'triage',
+            'incident_id': incident_id,
+            'aggregated_score': str(inc.get('aggregated_score', '')),
+            'tags': ['auto_triage_would_close' if dry_run else 'auto_triage_closed'],
+            'has_error': (not dry_run and not success),
+            'error_type': '' if (dry_run or success) else 'update_incident_failed',
+            'error_message': '' if (dry_run or success) else err
+        })
+
+    # One dataset write per run with the actual per-case outcomes.
+    if rows:
         try:
             execute_command(
                 'xql-post-to-dataset',
@@ -266,50 +367,38 @@ def main():
         except Exception as e:
             demisto.debug(f'Dataset write failed: {e}')
 
-    # Build bulk-close batches: each entry is a ready-to-post JSON array string
-    # of incident IDs (<= CLOSE_CHUNK_SIZE), so the close playbook can drop it
-    # straight into incident_id_list without the array-to-JSON interpolation
-    # problem. 32k eligible -> ~320 bulk calls instead of 32k single calls.
-    passed_ids = [str(inc.get('incident_id', '')) for inc in passed
-                  if str(inc.get('incident_id', ''))]
-    all_batches = [
-        json.dumps(passed_ids[i:i + CLOSE_CHUNK_SIZE])
-        for i in range(0, len(passed_ids), CLOSE_CHUNK_SIZE)
-    ]
-
-    # In dry run, hand the close path NOTHING so neither the bulk loop nor a
-    # per-case loop can close anything; surface the would-close set separately.
-    close_batches = [] if dry_run else all_batches
-    filtered_incidents = [] if dry_run else passed
-
     outputs = {
         'dry_run': dry_run,
-        'filtered_incidents': filtered_incidents,
-        'close_batches': close_batches,
         'skipped_incidents': skipped,
         'passed_count': len(passed),
-        'batch_count': len(close_batches),
+        'closed_count': len(closed_ok),
+        'failed_count': len(closed_fail),
+        'closed_ids': closed_ok[:500],      # capped sample for visibility
+        'failed': closed_fail[:200],        # capped sample for visibility
         'skipped_count': len(skipped),
         'total_scanned': total_scanned,
-        'batches_run': batches_run
+        'batches_run': batches_run,
+        'budget_hit': budget_hit
     }
     if dry_run:
-        outputs['would_close_count'] = len(passed_ids)
-        outputs['would_close_ids'] = passed_ids[:500]  # capped sample for visibility
+        outputs['would_close_count'] = len(passed)
+        outputs['would_close_ids'] = closed_ok[:500]
+
+    budget_note = (' [runtime budget hit — partial run, next run resumes]'
+                   if budget_hit else '')
 
     if dry_run:
         readable = (
-            f'DRY RUN — would close {len(passed_ids)} case(s); closed 0. '
+            f'DRY RUN — would close {len(passed)} case(s); closed 0. '
             f'{len(skipped)} skipped (threshold: {threshold}, window: {window_hours}h, '
-            f'scanned: {total_scanned} across {batches_run} batches). '
+            f'scanned: {total_scanned} across {batches_run} batches){budget_note}. '
             f'Set dry_run=false to close for real.'
         )
     else:
         readable = (
-            f'Score filter complete: {len(passed)} passed in {len(close_batches)} '
-            f'close batch(es), {len(skipped)} skipped '
-            f'(threshold: {threshold}, window: {window_hours}h, '
-            f'scanned: {total_scanned} across {batches_run} batches)'
+            f'Auto triage: closed {len(closed_ok)}, failed {len(closed_fail)}, '
+            f'{len(skipped)} skipped (threshold: {threshold}, window: {window_hours}h, '
+            f'scanned: {total_scanned} across {batches_run} batches){budget_note}'
         )
 
     return_results(CommandResults(
diff --git a/Packs/soc-optimization-unified/Scripts/SOCAutoTriageScoreFilter/SOCAutoTriageScoreFilter.yml b/Packs/soc-optimization-unified/Scripts/SOCAutoTriageScoreFilter/SOCAutoTriageScoreFilter.yml
index 32ac9ed3..c40052e9 100644
--- a/Packs/soc-optimization-unified/Scripts/SOCAutoTriageScoreFilter/SOCAutoTriageScoreFilter.yml
+++ b/Packs/soc-optimization-unified/Scripts/SOCAutoTriageScoreFilter/SOCAutoTriageScoreFilter.yml
@@ -20,11 +20,16 @@ tags:
 - SOC
 - SOC_Framework_Unified
 comment: |-
-  Fetches unstarred new cases via core-api-post in paginated batches of 100,
-  sorted by creation_time asc. Filters by aggregated_score <= threshold,
-  creation_time outside window, and manual_score is null. Skips above-threshold
-  cases without holding them in memory. Stops when eligible cases are found or
-  max_batches reached. Part of JOB - Auto Triage V3.
+  Fetches unstarred new cases via core-api-post using offset pagination
+  (get_incidents cannot sort, so keyset paging is unsafe). Server-side filters:
+  status=new, starred=false, creation_time <= now-window_hours. Client-side
+  gates: aggregated_score <= threshold (not filterable server-side) and
+  manual_score is null; every skip is recorded with a reason. Closes each
+  passing case in-process via update_incident (one ID per call — the API has no
+  bulk close), one dataset row per actual close result. A wall-clock budget caps
+  both fetch and close; partial runs are safe because closed cases leave
+  status=new and the next run resumes. dry_run selects and reports without
+  closing. Part of JOB - Auto Triage V3.
   VD3: frees analyst time by auto-closing low-signal noise cases.
 enabled: true
 args:
@@ -41,16 +46,46 @@ args:
 - supportedModules: []
   name: max_batches
   required: false
-  description: Maximum number of 100-case batches to fetch from the API per run.
-    Default 5 (scans up to 500 cases). Increase if backlog consistently exceeds 500.
-  defaultValue: "5"
+  description: Per-run cap on 100-case offset pages (deepest offset = max_batches*100).
+    With offset pagination each page is O(offset), so this is a cost ceiling, not a
+    full-backlog drain — large backlogs clear over repeated scheduled runs. A wall-clock
+    guard stops the run before the automation timeout regardless of this value.
+  defaultValue: "20"
+- supportedModules: []
+  name: batch_size
+  required: false
+  description: Incidents to fetch per get_incidents call. Hard-capped at the API
+    per-request ceiling of 100 and floored at 1. Junk/blank falls back to 100.
+  defaultValue: "100"
+- supportedModules: []
+  name: dry_run
+  required: false
+  description: When true, select eligible cases and report what WOULD close but
+    emit nothing to the close path and tag dataset rows as shadow. Use for safe
+    testing. Accepts true/1/yes. Default false.
+  defaultValue: "false"
 outputs:
-- contextPath: AutoTriage.filtered_incidents
-  description: Incident objects that passed both gates (score <= threshold, manual_score
-    is null)
+- contextPath: AutoTriage.closed_count
+  description: Number of cases actually closed this run (0 in a dry run).
+  type: Number
+- contextPath: AutoTriage.failed_count
+  description: Number of cases whose update_incident close call failed this run.
+  type: Number
+- contextPath: AutoTriage.closed_ids
+  description: Capped sample of incident IDs closed this run.
   type: Unknown
+- contextPath: AutoTriage.failed
+  description: Capped sample of {incident_id, error} for closes that failed.
+  type: Unknown
+- contextPath: AutoTriage.dry_run
+  description: Whether this run was a dry run (selected but closed nothing).
+  type: Boolean
+- contextPath: AutoTriage.would_close_count
+  description: In a dry run, the number of cases that would have closed.
+  type: Number
 - contextPath: AutoTriage.skipped_incidents
-  description: Incident objects that failed one or both gates with reason
+  description: Incident objects that failed a gate, each with a reason (includes
+    score-based skips).
   type: Unknown
 - contextPath: AutoTriage.passed_count
   description: Count of incidents that passed the score filter
diff --git a/docs/soc-optimization-unified/overview.md b/docs/soc-optimization-unified/overview.md
index c05c09b1..d388c4b4 100644
--- a/docs/soc-optimization-unified/overview.md
+++ b/docs/soc-optimization-unified/overview.md
@@ -5,7 +5,7 @@
 | Field | Value |
 |---|---|
 | ID | `soc-optimization-unified` |
-| Version | `3.10.7` |
+| Version | `3.10.9` |
 | Category | Use Case |
 | Pack Path | `Packs/soc-optimization-unified` |
 | Manifest | [`Packs/soc-optimization-unified/xsoar_config.json`](https://github.com/Palo-Cortex/secops-framework/blob/main/Packs/soc-optimization-unified/xsoar_config.json) |
@@ -24,7 +24,7 @@ Additional custom packs the installer pulls in alongside this pack.
 
 | Pack | System | Source |
 |---|---|---|
-| `soc-optimization-unified.zip` | `yes` | [release](https://github.com/Palo-Cortex/secops-framework/releases/download/soc-optimization-unified-v3.10.7/soc-optimization-unified-v3.10.7.zip) |
+| `soc-optimization-unified.zip` | `yes` | [release](https://github.com/Palo-Cortex/secops-framework/releases/download/soc-optimization-unified-v3.10.9/soc-optimization-unified-v3.10.9.zip) |
 | `soc-framework-nist-ir.zip` | `yes` | [release](https://github.com/Palo-Cortex/secops-framework/releases/download/soc-framework-nist-ir-v1.6.3/soc-framework-nist-ir-v1.6.3.zip) |
 
 ## Marketplace Dependencies