From c9f4ff29156944deed43b8cf4952be96252934c6 Mon Sep 17 00:00:00 2001 From: Scott Brumley Date: Tue, 16 Jun 2026 15:04:32 -0400 Subject: [PATCH] =?UTF-8?q?fix(auto-triage):=20close=20eligible=20cases=20?= =?UTF-8?q?reliably=20=E2=80=94=20offset=20paging,=20in-loop=20close,=20fa?= =?UTF-8?q?il-closed=20guards?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit JOB - Auto Triage was selecting cases but draining almost nothing (~70/hr). Root causes were stacked: it scanned a slice that never reached the low-score backlog, and the close path couldn't keep up with selection. SOCAutoTriageScoreFilter.py - Replace keyset-on-creation_time paging with offset pagination. get_incidents has no sort parameter (sort_by_creation_time is silently ignored), so keyset walking skipped the eligible backlog entirely (201 scanned / 0 passed). - Close in-loop: per-case update_incident in a tight Python loop. The API has no bulk close (incident_id_list is rejected 400), and the old task 8 forEach spun a separatecontext per case — that was the throughput bottleneck. - Write one execution-dataset row per actual close result instead of per selection, ending the re-selection inflation (~40k rows for ~563 closes). - Record every skip with a reason; score skips were silently dropped and hid the real cause for a full session. - Wall-clock runtime guard (540s, under the 600s automation timeout); partial runs resume next schedule. Replaces max_batches=200 that timed out under offset paging. Default max_batches 200 -> 20. - Add fail-closed status backstop (skip unless status is confirmed 'new'), mirroring the starred backstop, so in-progress cases can never be closed. SOCAutoTriageScoreFilter.yml - Declare dry_run and batch_size args (were being stripped at invocation). - Update outputs/contract to the close-result fields. JOB_-_Auto_Triage_V3.yml - Remove task 5 (condition) and task 8 (per-case forEach close playbook); task 12 now closes in-process and flows straight to close-job. --- .../Playbooks/JOB_-_Auto_Triage_V3.yml | 108 +------ .../SOCAutoTriageScoreFilter.py | 303 +++++++++++------- .../SOCAutoTriageScoreFilter.yml | 59 +++- docs/soc-optimization-unified/overview.md | 4 +- 4 files changed, 253 insertions(+), 221 deletions(-) diff --git a/Packs/soc-optimization-unified/Playbooks/JOB_-_Auto_Triage_V3.yml b/Packs/soc-optimization-unified/Playbooks/JOB_-_Auto_Triage_V3.yml index 6276dde0..4d0205ca 100644 --- a/Packs/soc-optimization-unified/Playbooks/JOB_-_Auto_Triage_V3.yml +++ b/Packs/soc-optimization-unified/Playbooks/JOB_-_Auto_Triage_V3.yml @@ -51,50 +51,6 @@ tasks: isoversize: false isautoswitchedtoquietmode: false - "5": - id: "5" - taskid: 11786de0-5a13-412d-8a8a-1ab69922d510 - type: condition - task: - id: 11786de0-5a13-412d-8a8a-1ab69922d510 - version: -1 - name: Cases to Triage? - type: condition - iscommand: false - brand: "" - playbooktaskmissingcomponent: null - istaskmissingcomponenterrordismissed: false - nexttasks: - '#default#': - - "9" - "yes": - - "8" - separatecontext: false - conditions: - - label: "yes" - condition: - - - operator: isExists - left: - value: - simple: AutoTriage.filtered_incidents.incident_id - iscontext: true - right: - value: {} - continueonerrortype: "" - view: |- - { - "position": { - "x": 50, - "y": 420 - } - } - note: false - timertriggers: [] - ignoreworker: false - skipunavailable: false - quietmode: 0 - isoversize: false - isautoswitchedtoquietmode: false "7": id: "7" taskid: 6ae6dff9-3719-4b2e-8758-d1cf54a24c13 @@ -124,52 +80,6 @@ tasks: quietmode: 0 isoversize: false isautoswitchedtoquietmode: false - "8": - id: "8" - taskid: 481cd8d5-7bca-4923-8cd0-77d48d883e8f - type: playbook - task: - id: 481cd8d5-7bca-4923-8cd0-77d48d883e8f - version: -1 - name: SOC Close Cases_V3 - playbookName: SOC Close Cases_V3 - type: playbook - iscommand: false - brand: "" - playbooktaskmissingcomponent: null - istaskmissingcomponenterrordismissed: false - nexttasks: - '#none#': - - "9" - scriptarguments: - incident_id: - simple: ${AutoTriage.filtered_incidents.incident_id} - resolve_comment: - simple: 'SOC Framework Auto Triage: case exceeded age threshold, aggregated_score - below threshold, no analyst activity detected. Auto-closed by JOB.' - separatecontext: true - continueonerror: true - continueonerrortype: "" - loop: - iscommand: false - exitCondition: "" - wait: 1 - max: 500 - forEach: true - view: |- - { - "position": { - "x": 162.5, - "y": 610 - } - } - note: false - timertriggers: [] - ignoreworker: false - skipunavailable: false - quietmode: 0 - isoversize: false - isautoswitchedtoquietmode: false "9": id: "9" taskid: ecad854e-0b30-4494-8c61-183d70ee7952 @@ -211,12 +121,12 @@ tasks: task: id: ba55380b-2e2c-44ef-8fcf-d828bfbfa09a version: -1 - name: Fetch and Filter Cases by Score Threshold - description: Fetches unstarred new cases via core-api-post in paginated batches - of 100, sorted by creation_time asc. Skips cases with aggregated_score above - TriageScoreThreshold. Applies age window filter (TriageWindowHours) and skips - analyst-touched cases. Stops when eligible cases are found or max_batches - reached. Passes only eligible cases to the close loop. + name: Fetch, Filter, and Close Cases by Score Threshold + description: Fetches unstarred new cases via core-api-post using offset + pagination, gated by TriageScoreThreshold / TriageWindowHours / no analyst + activity, then closes each eligible case in-process via update_incident + (one ID per call) and writes one execution-dataset row per close result. + A wall-clock budget caps the run; partial runs resume next schedule. script: SOCAutoTriageScoreFilter type: regular iscommand: false @@ -225,7 +135,7 @@ tasks: istaskmissingcomponenterrordismissed: false nexttasks: '#none#': - - "5" + - "9" scriptarguments: score_threshold: complex: @@ -327,9 +237,7 @@ tasks: system: true view: |- { - "linkLabelsPosition": { - "5_9_#default#": 0.82 - }, + "linkLabelsPosition": {}, "paper": { "dimensions": { "height": 1170, diff --git a/Packs/soc-optimization-unified/Scripts/SOCAutoTriageScoreFilter/SOCAutoTriageScoreFilter.py b/Packs/soc-optimization-unified/Scripts/SOCAutoTriageScoreFilter/SOCAutoTriageScoreFilter.py index bdab1d37..c7e88687 100644 --- a/Packs/soc-optimization-unified/Scripts/SOCAutoTriageScoreFilter/SOCAutoTriageScoreFilter.py +++ b/Packs/soc-optimization-unified/Scripts/SOCAutoTriageScoreFilter/SOCAutoTriageScoreFilter.py @@ -5,18 +5,55 @@ # 100 is the get_incidents per-request ceiling. War-room arrays may show a -# cosmetic truncation note at this size, but the forEach close loop is unaffected. +# cosmetic truncation note at this size; it does not affect processing. BATCH_SIZE = 100 -# How many incident IDs to bundle into a single bulk update_incident call. -# Each emitted batch is a ready-to-post JSON array string for incident_id_list. -CLOSE_CHUNK_SIZE = 100 -API_URI = '/public_api/v1/incidents/get_incidents' +GET_INCIDENTS_URI = '/public_api/v1/incidents/get_incidents' +UPDATE_INCIDENT_URI = '/public_api/v1/incidents/update_incident' +# update_incident has NO bulk mode: posting incident_id_list returns +# 400 "incident_id field is missing or incorrect" (verified against tenant, +# Jun 2026). We close one ID per call, in a tight Python loop here rather than a +# playbook forEach, to avoid per-iteration context spin-up. +RESOLVE_STATUS = 'resolved_other' +RESOLVE_COMMENT = ('SOC Framework Auto Triage: case exceeded age threshold, ' + 'aggregated_score below threshold, no analyst activity ' + 'detected. Auto-closed by JOB.') +API_URI = GET_INCIDENTS_URI # back-compat alias +# Per-run wall-clock budget. Two O(n) costs share it: offset pagination during +# fetch (deep search_from makes get_incidents progressively slower) and the +# per-case close loop (one update_incident call each). When the budget is hit +# the run stops cleanly and returns what it has — partial progress is safe +# because closed cases leave status=new and the next scheduled run resumes. +# +# MUST stay safely under the script's automation timeout (Settings > the +# automation's timeout). Leave ~10% headroom for the dataset write + return. +# Current automation timeout: 600s. +MAX_RUNTIME_SECONDS = 540 FIELDS = [ 'incident_id', 'aggregated_score', 'creation_time', 'status', 'starred', 'manual_score' ] +def close_case(incident_id): + """Close one case via update_incident (single incident_id — the API rejects + incident_id_list). Returns (success: bool, error_message: str). Never raises; + one failed close must not abort the rest of the batch.""" + body = json.dumps({ + 'request_data': { + 'incident_id': str(incident_id), + 'update_data': { + 'status': RESOLVE_STATUS, + 'resolve_comment': RESOLVE_COMMENT + } + } + }) + try: + execute_command('core-api-post', {'uri': UPDATE_INCIDENT_URI, 'body': body}) + return True, '' + except Exception as e: + return False, str(e) + + def _to_float(value): """Best-effort float; returns None if not parseable.""" try: @@ -42,35 +79,38 @@ def _is_unstarred(value): return False -def fetch_batch(upper_ms: int, batch_size: int) -> dict: - """Fetch the newest unstarred New incidents created at or before upper_ms. - - KEYSET pagination: callers walk the age-eligible set downward by - creation_time, always querying from offset 0 with a shrinking creation_time - upper bound. This keeps every call O(batch_size) instead of O(offset) — deep - offset pagination (search_from=30000) makes the API skip every preceding row - and gets progressively slower, which is what times the job out on large - backlogs. There is no growing search_from here. - - Contract notes (confirmed): - - status value is the lowercase public-API enum: "new" (the Cases UI - capitalizes "New" for display, but the get_incidents filter enum is - lowercase). Sending "New" matches zero rows. +def fetch_batch(cutoff_ms: int, search_from: int, batch_size: int) -> dict: + """Fetch one page of unstarred New incidents created at or before cutoff_ms. + + OFFSET pagination. get_incidents has NO sort capability — any sort_by_* + key in request_data is silently ignored (verified against the tenant API, + Jun 2026), so results come back in arbitrary order. Keyset pagination on + creation_time is therefore unsafe: on an unordered page, advancing a cursor + below the page's minimum creation_time skips every un-returned row above it, + which is exactly how the age-eligible low-score backlog went unscanned. + Plain offset pagination (fixed filter, growing search_from) is the only way + to cover the whole age-eligible set when the API will not order results. + Deep offsets do get progressively slower, but max_batches bounds the cost + and closed cases drop out of status=new between runs, so the offset window + keeps advancing through fresh cases run over run. + + Contract notes (confirmed against tenant, Jun 2026): + - status enum is case-insensitive here: both "new" and "New" match (the + earlier "New matches zero rows" claim was wrong). - `in` operator -> array value; `lte` -> scalar value. + - creation_time is epoch milliseconds; filterable server-side. - aggregated_score is NOT filterable server-side -> gated client-side. - - `sort_by_creation_time` is the documented working sort key. """ body = json.dumps({ 'request_data': { 'filters': [ {'field': 'status', 'operator': 'in', 'value': ['new']}, {'field': 'starred', 'operator': 'in', 'value': [False]}, - {'field': 'creation_time', 'operator': 'lte', 'value': int(upper_ms)}, + {'field': 'creation_time', 'operator': 'lte', 'value': int(cutoff_ms)}, ], 'fields': FIELDS, - 'sort_by_creation_time': 'desc', - 'search_from': 0, - 'search_to': batch_size + 'search_from': int(search_from), + 'search_to': int(search_from) + int(batch_size) } }) result = execute_command('core-api-post', {'uri': API_URI, 'body': body}) @@ -86,11 +126,12 @@ def main(): # pass score_threshold explicitly so the policy lives in one obvious place. threshold = args.get('score_threshold', '40') window_hours = args.get('window_hours', '6') - # Default high enough to page the whole age-eligible backlog to exhaustion - # on any tenant (the loop stops on its own when a short page comes back). - # This is the safety ceiling, not the normal cost. For this to apply, the - # JOB task must NOT pass a literal max_batches (a typed value overrides it). - max_batches = args.get('max_batches', '200') + # max_batches caps offset depth per run. With offset pagination each batch + # is O(offset), so this is a COST ceiling, not a "drain the whole backlog in + # one run" knob — large backlogs drain over repeated scheduled runs as closed + # cases leave status=new. Keep it modest (deepest offset = max_batches*100); + # the wall-clock guard (MAX_RUNTIME_SECONDS) is the real timeout backstop. + max_batches = args.get('max_batches', '20') threshold = _to_float(threshold) if threshold is None: @@ -103,9 +144,9 @@ def main(): try: max_batches = int(max_batches) except (ValueError, TypeError): - max_batches = 200 + max_batches = 20 if max_batches < 1: - max_batches = 200 + max_batches = 20 # batch_size is tunable but hard-capped at the get_incidents per-request # ceiling of 100, and floored at 1. Junk/blank falls back to BATCH_SIZE. @@ -126,12 +167,21 @@ def main(): skipped = [] total_scanned = 0 batches_run = 0 - seen_ids = set() - cursor_ms = cutoff_ms # creation_time upper bound; walks downward each batch + search_from = 0 # offset into the age-eligible result set; grows each batch + run_start = time.time() + budget_hit = False for batch_num in range(max_batches): + # Wall-clock guard: stop before the Docker automation timeout. Returning + # partial progress is safe — the JOB closes what we found, those cases + # leave status=new, and the next run resumes from a shallow offset. + if time.time() - run_start > MAX_RUNTIME_SECONDS: + budget_hit = True + demisto.debug(f'Runtime budget {MAX_RUNTIME_SECONDS}s hit after ' + f'{batches_run} batches; stopping with partial progress.') + break try: - result = fetch_batch(cursor_ms, batch_size) + result = fetch_batch(cutoff_ms, search_from, batch_size) except Exception as e: # If the very first fetch fails we have scanned nothing — that is an # auth/API failure (e.g. 401 unauthorized), NOT an empty backlog. @@ -154,23 +204,14 @@ def main(): demisto.debug(f'Batch {batch_num}: no incidents returned, stopping.') break - # Dedup against the boundary record(s) carried over by the <= cursor. - new_incidents = [i for i in incidents - if str(i.get('incident_id', '')) not in seen_ids] - if not new_incidents: - # No forward progress (entire page already seen) -> stop. - break - batches_run += 1 - total_scanned += len(new_incidents) - page_min_ct = None + total_scanned += len(incidents) - for inc in new_incidents: + for inc in incidents: # One malformed incident must never abort the run and leave the rest # of the backlog unprocessed. try: incident_id = inc.get('incident_id', 'unknown') - seen_ids.add(str(incident_id)) aggregated_score = _to_float(inc.get('aggregated_score')) manual_score = inc.get('manual_score') creation_time = inc.get('creation_time', 0) @@ -178,8 +219,6 @@ def main(): creation_time = int(creation_time) except (ValueError, TypeError): creation_time = 0 - if creation_time and (page_min_ct is None or creation_time < page_min_ct): - page_min_ct = creation_time # HARD SAFETY BACKSTOP — never auto-close a starred case. # This does not trust the server-side starred filter; it @@ -194,6 +233,21 @@ def main(): }) continue + # HARD SAFETY BACKSTOP — never close a case that isn't still 'new'. + # Mirrors the starred backstop: does not trust the server-side + # status filter, re-confirms from the returned record, and fails + # closed. An in-progress / under-investigation / resolved case can + # never reach the close call even if the server filter ever returns + # one. Confirmed: get_incidents returns status as lowercase 'new'. + status_val = str(inc.get('status', '')).strip().lower() + if status_val != 'new': + skipped.append({ + 'incident_id': incident_id, + 'aggregated_score': aggregated_score, + 'reason': f"status guard: status={inc.get('status')!r} not confirmed 'new'" + }) + continue + # Skip if an analyst manually scored it (null unless set). if manual_score is not None: skipped.append({ @@ -203,8 +257,25 @@ def main(): }) continue - # Skip if score is missing or above threshold. - if aggregated_score is None or aggregated_score > threshold: + # Skip if the score is missing. RECORDED (was a silent continue): + # a silent drop here made 201 scanned / 0 passed / 0 skipped + # indistinguishable from "missing score field" vs "above + # threshold" and hid the real cause for a whole session. + if aggregated_score is None: + skipped.append({ + 'incident_id': incident_id, + 'aggregated_score': None, + 'reason': 'aggregated_score missing/None on returned case' + }) + continue + + # Skip if the score is above threshold. RECORDED for the same reason. + if aggregated_score > threshold: + skipped.append({ + 'incident_id': incident_id, + 'aggregated_score': aggregated_score, + 'reason': f'aggregated_score {aggregated_score} > threshold {threshold}' + }) continue # Defensive client-side age guard (the server-side filter already @@ -222,38 +293,68 @@ def main(): demisto.debug(f"Skipping incident {inc.get('incident_id', 'unknown')}: {e}") continue - # Fewer than a full page means the eligible set is exhausted. + # A short page means the age-eligible set is exhausted. if len(incidents) < batch_size: break - # Advance the cursor strictly below the oldest creation_time seen so the - # next page is the next-older slice (keyset, offset stays 0). - if page_min_ct is None: - break - cursor_ms = page_min_ct - 1 - - # Write one row per passed incident to the active execution dataset. In a - # dry run these are tagged as shadow so dashboards/audits can see what the - # job WOULD have closed without it actually closing anything. - if passed: - rows = [] - for inc in passed: - rows.append({ - 'timestamp': str(int(time.time())), - 'event_type': 'auto_triage', - 'universal_command': 'auto_close_incident', - 'action_taken': 'auto_triage_would_close' if dry_run else 'auto_triage_closed', - 'action_status': 'dry_run' if dry_run else 'success', - 'execution_mode': 'shadow' if dry_run else 'production', - 'shadow_mode_state': 'shadow' if dry_run else 'not_applicable', - 'lifecycle': 'AUTO_TRIAGE', - 'phase': 'triage', - 'incident_id': str(inc.get('incident_id', '')), - 'aggregated_score': str(inc.get('aggregated_score', '')), - 'tags': ['auto_triage_would_close' if dry_run else 'auto_triage_closed'], - 'has_error': False, - 'error_type': '', - 'error_message': '' - }) + # Advance the offset to the next page (filter and order are stable within + # a run because nothing is closed until the script returns). + search_from += batch_size + + # --- Close phase --------------------------------------------------------- + # Close each passing case here, in-process, one update_incident call per ID + # (the API has no bulk close). Doing it in this loop instead of a playbook + # forEach avoids per-iteration context spin-up — the old task 8 bottleneck + # that selected ~1,300/run but only closed dozens. Each dataset row is now + # keyed to the ACTUAL close result, so the dataset stops over-counting + # un-closed cases that get re-selected every run. + rows = [] + closed_ok = [] + closed_fail = [] + + for inc in passed: + incident_id = str(inc.get('incident_id', '')) + if not incident_id: + continue + + if dry_run: + # Select only — close nothing. Row tagged shadow so the shadow + # value-metrics path can show what WOULD have closed. + success, err = True, '' + else: + # Same wall-clock budget guards the (slower) close loop. Unclosed + # passers stay status=new; the next scheduled run resumes them. + if time.time() - run_start > MAX_RUNTIME_SECONDS: + budget_hit = True + demisto.debug(f'Runtime budget hit during close phase after ' + f'{len(closed_ok)} closes; stopping with partial progress.') + break + success, err = close_case(incident_id) + + if success: + closed_ok.append(incident_id) + else: + closed_fail.append({'incident_id': incident_id, 'error': err}) + + rows.append({ + 'timestamp': str(int(time.time())), + 'event_type': 'auto_triage', + 'universal_command': 'auto_close_incident', + 'action_taken': 'auto_triage_would_close' if dry_run else 'auto_triage_closed', + 'action_status': 'dry_run' if dry_run else ('success' if success else 'error'), + 'execution_mode': 'shadow' if dry_run else 'production', + 'shadow_mode_state': 'shadow' if dry_run else 'not_applicable', + 'lifecycle': 'AUTO_TRIAGE', + 'phase': 'triage', + 'incident_id': incident_id, + 'aggregated_score': str(inc.get('aggregated_score', '')), + 'tags': ['auto_triage_would_close' if dry_run else 'auto_triage_closed'], + 'has_error': (not dry_run and not success), + 'error_type': '' if (dry_run or success) else 'update_incident_failed', + 'error_message': '' if (dry_run or success) else err + }) + + # One dataset write per run with the actual per-case outcomes. + if rows: try: execute_command( 'xql-post-to-dataset', @@ -266,50 +367,38 @@ def main(): except Exception as e: demisto.debug(f'Dataset write failed: {e}') - # Build bulk-close batches: each entry is a ready-to-post JSON array string - # of incident IDs (<= CLOSE_CHUNK_SIZE), so the close playbook can drop it - # straight into incident_id_list without the array-to-JSON interpolation - # problem. 32k eligible -> ~320 bulk calls instead of 32k single calls. - passed_ids = [str(inc.get('incident_id', '')) for inc in passed - if str(inc.get('incident_id', ''))] - all_batches = [ - json.dumps(passed_ids[i:i + CLOSE_CHUNK_SIZE]) - for i in range(0, len(passed_ids), CLOSE_CHUNK_SIZE) - ] - - # In dry run, hand the close path NOTHING so neither the bulk loop nor a - # per-case loop can close anything; surface the would-close set separately. - close_batches = [] if dry_run else all_batches - filtered_incidents = [] if dry_run else passed - outputs = { 'dry_run': dry_run, - 'filtered_incidents': filtered_incidents, - 'close_batches': close_batches, 'skipped_incidents': skipped, 'passed_count': len(passed), - 'batch_count': len(close_batches), + 'closed_count': len(closed_ok), + 'failed_count': len(closed_fail), + 'closed_ids': closed_ok[:500], # capped sample for visibility + 'failed': closed_fail[:200], # capped sample for visibility 'skipped_count': len(skipped), 'total_scanned': total_scanned, - 'batches_run': batches_run + 'batches_run': batches_run, + 'budget_hit': budget_hit } if dry_run: - outputs['would_close_count'] = len(passed_ids) - outputs['would_close_ids'] = passed_ids[:500] # capped sample for visibility + outputs['would_close_count'] = len(passed) + outputs['would_close_ids'] = closed_ok[:500] + + budget_note = (' [runtime budget hit — partial run, next run resumes]' + if budget_hit else '') if dry_run: readable = ( - f'DRY RUN — would close {len(passed_ids)} case(s); closed 0. ' + f'DRY RUN — would close {len(passed)} case(s); closed 0. ' f'{len(skipped)} skipped (threshold: {threshold}, window: {window_hours}h, ' - f'scanned: {total_scanned} across {batches_run} batches). ' + f'scanned: {total_scanned} across {batches_run} batches){budget_note}. ' f'Set dry_run=false to close for real.' ) else: readable = ( - f'Score filter complete: {len(passed)} passed in {len(close_batches)} ' - f'close batch(es), {len(skipped)} skipped ' - f'(threshold: {threshold}, window: {window_hours}h, ' - f'scanned: {total_scanned} across {batches_run} batches)' + f'Auto triage: closed {len(closed_ok)}, failed {len(closed_fail)}, ' + f'{len(skipped)} skipped (threshold: {threshold}, window: {window_hours}h, ' + f'scanned: {total_scanned} across {batches_run} batches){budget_note}' ) return_results(CommandResults( diff --git a/Packs/soc-optimization-unified/Scripts/SOCAutoTriageScoreFilter/SOCAutoTriageScoreFilter.yml b/Packs/soc-optimization-unified/Scripts/SOCAutoTriageScoreFilter/SOCAutoTriageScoreFilter.yml index 32ac9ed3..c40052e9 100644 --- a/Packs/soc-optimization-unified/Scripts/SOCAutoTriageScoreFilter/SOCAutoTriageScoreFilter.yml +++ b/Packs/soc-optimization-unified/Scripts/SOCAutoTriageScoreFilter/SOCAutoTriageScoreFilter.yml @@ -20,11 +20,16 @@ tags: - SOC - SOC_Framework_Unified comment: |- - Fetches unstarred new cases via core-api-post in paginated batches of 100, - sorted by creation_time asc. Filters by aggregated_score <= threshold, - creation_time outside window, and manual_score is null. Skips above-threshold - cases without holding them in memory. Stops when eligible cases are found or - max_batches reached. Part of JOB - Auto Triage V3. + Fetches unstarred new cases via core-api-post using offset pagination + (get_incidents cannot sort, so keyset paging is unsafe). Server-side filters: + status=new, starred=false, creation_time <= now-window_hours. Client-side + gates: aggregated_score <= threshold (not filterable server-side) and + manual_score is null; every skip is recorded with a reason. Closes each + passing case in-process via update_incident (one ID per call — the API has no + bulk close), one dataset row per actual close result. A wall-clock budget caps + both fetch and close; partial runs are safe because closed cases leave + status=new and the next run resumes. dry_run selects and reports without + closing. Part of JOB - Auto Triage V3. VD3: frees analyst time by auto-closing low-signal noise cases. enabled: true args: @@ -41,16 +46,46 @@ args: - supportedModules: [] name: max_batches required: false - description: Maximum number of 100-case batches to fetch from the API per run. - Default 5 (scans up to 500 cases). Increase if backlog consistently exceeds 500. - defaultValue: "5" + description: Per-run cap on 100-case offset pages (deepest offset = max_batches*100). + With offset pagination each page is O(offset), so this is a cost ceiling, not a + full-backlog drain — large backlogs clear over repeated scheduled runs. A wall-clock + guard stops the run before the automation timeout regardless of this value. + defaultValue: "20" +- supportedModules: [] + name: batch_size + required: false + description: Incidents to fetch per get_incidents call. Hard-capped at the API + per-request ceiling of 100 and floored at 1. Junk/blank falls back to 100. + defaultValue: "100" +- supportedModules: [] + name: dry_run + required: false + description: When true, select eligible cases and report what WOULD close but + emit nothing to the close path and tag dataset rows as shadow. Use for safe + testing. Accepts true/1/yes. Default false. + defaultValue: "false" outputs: -- contextPath: AutoTriage.filtered_incidents - description: Incident objects that passed both gates (score <= threshold, manual_score - is null) +- contextPath: AutoTriage.closed_count + description: Number of cases actually closed this run (0 in a dry run). + type: Number +- contextPath: AutoTriage.failed_count + description: Number of cases whose update_incident close call failed this run. + type: Number +- contextPath: AutoTriage.closed_ids + description: Capped sample of incident IDs closed this run. type: Unknown +- contextPath: AutoTriage.failed + description: Capped sample of {incident_id, error} for closes that failed. + type: Unknown +- contextPath: AutoTriage.dry_run + description: Whether this run was a dry run (selected but closed nothing). + type: Boolean +- contextPath: AutoTriage.would_close_count + description: In a dry run, the number of cases that would have closed. + type: Number - contextPath: AutoTriage.skipped_incidents - description: Incident objects that failed one or both gates with reason + description: Incident objects that failed a gate, each with a reason (includes + score-based skips). type: Unknown - contextPath: AutoTriage.passed_count description: Count of incidents that passed the score filter diff --git a/docs/soc-optimization-unified/overview.md b/docs/soc-optimization-unified/overview.md index c05c09b1..d388c4b4 100644 --- a/docs/soc-optimization-unified/overview.md +++ b/docs/soc-optimization-unified/overview.md @@ -5,7 +5,7 @@ | Field | Value | |---|---| | ID | `soc-optimization-unified` | -| Version | `3.10.7` | +| Version | `3.10.9` | | Category | Use Case | | Pack Path | `Packs/soc-optimization-unified` | | Manifest | [`Packs/soc-optimization-unified/xsoar_config.json`](https://github.com/Palo-Cortex/secops-framework/blob/main/Packs/soc-optimization-unified/xsoar_config.json) | @@ -24,7 +24,7 @@ Additional custom packs the installer pulls in alongside this pack. | Pack | System | Source | |---|---|---| -| `soc-optimization-unified.zip` | `yes` | [release](https://github.com/Palo-Cortex/secops-framework/releases/download/soc-optimization-unified-v3.10.7/soc-optimization-unified-v3.10.7.zip) | +| `soc-optimization-unified.zip` | `yes` | [release](https://github.com/Palo-Cortex/secops-framework/releases/download/soc-optimization-unified-v3.10.9/soc-optimization-unified-v3.10.9.zip) | | `soc-framework-nist-ir.zip` | `yes` | [release](https://github.com/Palo-Cortex/secops-framework/releases/download/soc-framework-nist-ir-v1.6.3/soc-framework-nist-ir-v1.6.3.zip) | ## Marketplace Dependencies