diff --git a/aws-transform/steering/verify-traceability.py b/aws-transform/steering/verify-traceability.py new file mode 100644 index 0000000..47a6933 --- /dev/null +++ b/aws-transform/steering/verify-traceability.py @@ -0,0 +1,705 @@ +#!/usr/bin/env python3 +""" +Phase 4: Traceability Verification Script + +Deterministic verification that all business rules (captured disposition from +traceability.yaml) and requirements (REQ-* from requirements.md) are referenced +in chapters 1-8 of the generated microservice specification files. + +The search is restricted to sections 1 through 8 of each specification file - +any content from section 9 onward (including traceability matrices and +appendices) is excluded to avoid false positives. + +Produces a self-contained HTML dashboard showing implementation status. + +Usage: + python verify-traceability.py \\ + --inputs-dir inputs/spec \\ + --specs-dir outputs/microservices \\ + --output traceability-dashboard.html +""" + +import argparse +import glob +import html +import os +import re +import sys +from collections import defaultdict # noqa: F401 +from datetime import datetime, timezone + +import yaml + + +# --------------------------------------------------------------------------- +# 1. Extraction helpers +# --------------------------------------------------------------------------- + +def discover_functions(inputs_dir: str) -> list[str]: + """Return sorted list of business-function directory names.""" + functions = [] + for entry in sorted(os.listdir(inputs_dir)): + full = os.path.join(inputs_dir, entry) + if os.path.isdir(full) and not entry.startswith("."): + functions.append(entry) + return functions + + +def extract_rules(rule_file: str) -> list[dict]: + """ + Parse traceability.yaml and return rules with disposition + 'captured'. + + Each returned dict has keys: + rule_id, disposition, program, rule_text_summary, reference + """ + with open(rule_file, "r", encoding="utf-8") as fh: + data = yaml.safe_load(fh) + + if not data or "rules" not in data: + return [] + + results = [] + for rule in data["rules"]: + disposition = rule.get("disposition", "") + if disposition == "captured": + results.append({ + "rule_id": rule.get("rule_id", ""), + "disposition": disposition, + "program": rule.get("program", ""), + "rule_text_summary": rule.get("rule_text_summary", ""), + "reference": rule.get("reference", ""), + }) + return results + + +_REQ_PATTERN = re.compile(r"\b(REQ-[A-Z0-9]+-\d+)\b") + + +def extract_requirements(req_file: str) -> list[dict]: + """ + Parse requirements.md and return every REQ-* identifier with its + full requirement description text. + + Each returned dict has keys: + req_id, text (the full requirement description) + """ + with open(req_file, "r", encoding="utf-8") as fh: + content = fh.read() + + # Split into lines for processing + lines = content.splitlines() + + seen = set() + results = [] + for i, line in enumerate(lines): + for match in _REQ_PATTERN.finditer(line): + req_id = match.group(1) + if req_id not in seen: + seen.add(req_id) + # Capture the full requirement text: the line containing the + # REQ-* identifier plus any continuation lines that follow + # (until the next blank line, next REQ-*, or next heading). + req_lines = [line.strip()] + for j in range(i + 1, len(lines)): + next_line = lines[j].strip() + # Stop at blank line, next requirement, or heading + if (not next_line + or _REQ_PATTERN.search(next_line) + or next_line.startswith("#")): + break + req_lines.append(next_line) + full_text = " ".join(req_lines) + results.append({"req_id": req_id, "text": full_text}) + return results + + +# --------------------------------------------------------------------------- +# 2. Specification scanning +# --------------------------------------------------------------------------- + +_APPENDIX_PATTERN = re.compile( + r"^## (?:Appendix|APPENDIX)", + re.MULTILINE, +) + +_SECTION_9_PATTERN = re.compile( + r"^## 9\.", + re.MULTILINE, +) + + +def _extract_chapters_1_to_8(content: str) -> str: + """Extract only chapters 1–8 from a specification file. + + Strips everything from the first occurrence of '## 9.' or + '## Appendix' onward. This ensures that traceability matrices, + reference dumps, and appendices do not produce false positives. + """ + # Find the earliest cut point + cut_pos = len(content) + + match_s9 = _SECTION_9_PATTERN.search(content) + if match_s9: + cut_pos = min(cut_pos, match_s9.start()) + + match_app = _APPENDIX_PATTERN.search(content) + if match_app: + cut_pos = min(cut_pos, match_app.start()) + + return content[:cut_pos] + + +def load_spec_contents(specs_dir: str) -> dict[str, str]: + """ + Read all *-specification.md files from specs_dir. + Returns {filename: content} restricted to chapters 1–8 only. + """ + specs = {} + pattern = os.path.join(specs_dir, "*-specification.md") + for path in sorted(glob.glob(pattern)): + fname = os.path.basename(path) + with open(path, "r", encoding="utf-8") as fh: + specs[fname] = _extract_chapters_1_to_8(fh.read()) + return specs + + +def find_identifier_in_specs( + identifier: str, specs: dict[str, str] +) -> list[dict]: + """Return list of {filename, sections} dicts for specs containing the identifier. + + For each spec file where the identifier is found, determines which + section heading(s) (## N. ...) contain the match. + """ + _SECTION_HEADING = re.compile(r"^(## \d+\..+)$", re.MULTILINE) + + found_in = [] + for fname, content in specs.items(): + if identifier not in content: + continue + + # Determine which section(s) contain the identifier + # Build a list of (position, heading_text) for all section headings + headings = [] + for m in _SECTION_HEADING.finditer(content): + headings.append((m.start(), m.group(1).strip())) + + # Find all positions of the identifier in the content + sections_found = set() + start = 0 + while True: + pos = content.find(identifier, start) + if pos == -1: + break + # Find which section this position belongs to + section_name = "(before first section)" + for i, (hpos, htxt) in enumerate(headings): + if hpos > pos: + break + section_name = htxt + sections_found.add(section_name) + start = pos + len(identifier) + + found_in.append({ + "filename": fname, + "sections": sorted(sections_found), + }) + return found_in + + +# --------------------------------------------------------------------------- +# 3. Orchestration +# --------------------------------------------------------------------------- + +def run_verification(inputs_dir: str, specs_dir: str) -> dict: + """ + Main verification logic. Returns a result dict with structure: + + { + "generated_at": str, + "inputs_dir": str, + "specs_dir": str, + "spec_files": [str], + "functions": { + "": { + "rules": [ + { + "rule_id": str, + "disposition": str, + "program": str, + "rule_text_summary": str, + "reference": str, + "status": "implemented" | "missing", + "found_in": [str] + } + ], + "requirements": [ + { + "req_id": str, + "text": str, + "status": "implemented" | "missing", + "found_in": [str] + } + ] + } + }, + "summary": { + "total_rules": int, + "implemented_rules": int, + "missing_rules": int, + "total_requirements": int, + "implemented_requirements": int, + "missing_requirements": int, + "by_function": { + "": { + "rules_total": int, + "rules_implemented": int, + "rules_missing": int, + "reqs_total": int, + "reqs_implemented": int, + "reqs_missing": int, + } + } + } + } + """ + specs = load_spec_contents(specs_dir) + functions = discover_functions(inputs_dir) + + result = { + "generated_at": datetime.now(timezone.utc).isoformat(), + "inputs_dir": inputs_dir, + "specs_dir": specs_dir, + "spec_files": sorted(specs.keys()), + "functions": {}, + "summary": { + "total_rules": 0, + "implemented_rules": 0, + "missing_rules": 0, + "total_requirements": 0, + "implemented_requirements": 0, + "missing_requirements": 0, + "by_function": {}, + }, + } + + for func_name in functions: + func_dir = os.path.join(inputs_dir, func_name) + func_result = {"rules": [], "requirements": []} + + # --- Rules (captured disposition from traceability.yaml) --- + rule_file = os.path.join(func_dir, "traceability.yaml") + if os.path.isfile(rule_file): + rules = extract_rules(rule_file) + for rule in rules: + found_in = find_identifier_in_specs(rule["rule_id"], specs) + rule["status"] = "implemented" if found_in else "missing" + rule["found_in"] = found_in + func_result["rules"].append(rule) + + # --- Requirements --- + req_file = os.path.join(func_dir, "requirements.md") + if os.path.isfile(req_file): + reqs = extract_requirements(req_file) + for req in reqs: + found_in = find_identifier_in_specs(req["req_id"], specs) + req["status"] = "implemented" if found_in else "missing" + req["found_in"] = found_in # list of {filename, sections} + func_result["requirements"].append(req) + + result["functions"][func_name] = func_result + + # --- Per-function summary --- + r_total = len(func_result["rules"]) + r_impl = sum(1 for r in func_result["rules"] if r["status"] == "implemented") + q_total = len(func_result["requirements"]) + q_impl = sum(1 for r in func_result["requirements"] if r["status"] == "implemented") + + result["summary"]["total_rules"] += r_total + result["summary"]["implemented_rules"] += r_impl + result["summary"]["total_requirements"] += q_total + result["summary"]["implemented_requirements"] += q_impl + result["summary"]["by_function"][func_name] = { + "rules_total": r_total, + "rules_implemented": r_impl, + "rules_missing": r_total - r_impl, + "reqs_total": q_total, + "reqs_implemented": q_impl, + "reqs_missing": q_total - q_impl, + } + + result["summary"]["missing_rules"] = ( + result["summary"]["total_rules"] - result["summary"]["implemented_rules"] + ) + result["summary"]["missing_requirements"] = ( + result["summary"]["total_requirements"] + - result["summary"]["implemented_requirements"] + ) + + return result + + +# --------------------------------------------------------------------------- +# 4. HTML dashboard generation +# --------------------------------------------------------------------------- + +def _pct(num: int, den: int) -> str: + if den == 0: + return "N/A" + return f"{num / den * 100:.1f}%" + + +def _status_class(status: str) -> str: + return "implemented" if status == "implemented" else "missing" + + +def generate_html(result: dict) -> str: + """Produce a self-contained HTML dashboard string.""" + s = result["summary"] + gen_time = result["generated_at"] + + rules_pct = _pct(s["implemented_rules"], s["total_rules"]) + reqs_pct = _pct(s["implemented_requirements"], s["total_requirements"]) + overall_total = s["total_rules"] + s["total_requirements"] + overall_impl = s["implemented_rules"] + s["implemented_requirements"] + overall_pct = _pct(overall_impl, overall_total) + + # Build per-function rows for summary table + func_rows = "" + for func_name in sorted(result["functions"].keys()): + fs = s["by_function"][func_name] + r_pct = _pct(fs["rules_implemented"], fs["rules_total"]) + q_pct = _pct(fs["reqs_implemented"], fs["reqs_total"]) + func_rows += f""" + + {html.escape(func_name)} + {fs["rules_implemented"]}/{fs["rules_total"]} ({r_pct}) + {fs["reqs_implemented"]}/{fs["reqs_total"]} ({q_pct}) + """ + + # Build per-function detail sections + detail_sections = "" + for func_name in sorted(result["functions"].keys()): + func = result["functions"][func_name] + + # Rules table + rule_rows = "" + for rule in func["rules"]: + sc = _status_class(rule["status"]) + if rule["found_in"]: + found_parts = [] + for entry in rule["found_in"]: + fname = entry["filename"] + sections = entry["sections"] + sections_str = "; ".join(sections) + found_parts.append( + f"{html.escape(fname)}" + f"
{html.escape(sections_str)}" + ) + found_html = "
".join(found_parts) + else: + found_html = "—" + rule_rows += f""" + + {html.escape(rule["rule_id"])} + {html.escape(rule.get("rule_text_summary", ""))} + {html.escape(rule.get("program", ""))} + {rule["status"].upper()} + {found_html} + """ + + # Requirements table + req_rows = "" + for req in func["requirements"]: + sc = _status_class(req["status"]) + if req["found_in"]: + found_parts = [] + for entry in req["found_in"]: + fname = entry["filename"] + sections = entry["sections"] + sections_str = "; ".join(sections) + found_parts.append( + f"{html.escape(fname)}" + f"
{html.escape(sections_str)}" + ) + found_html = "
".join(found_parts) + else: + found_html = "—" + full_text = req["text"] + req_rows += f""" + + {html.escape(req["req_id"])} + {html.escape(full_text)} + {req["status"].upper()} + {found_html} + """ + + detail_sections += f""" +
+

{html.escape(func_name)}

+ +

Business Rules (captured)

+ {"

No captured rules found for this function.

" if not rule_rows else f''' +
+ + + + + + + + + + + {rule_rows} + +
Rule IDRule NameProgramStatusFound In (File & Section)
+
'''} + +

Requirements

+ {"

No REQ-* identifiers found for this function.

" if not req_rows else f''' +
+ + + + + + + + + + {req_rows} + +
REQ IDRequirement DescriptionStatusFound In (File & Section)
+
'''} +
+
""" + + # Spec files list + spec_list = "" + for sf in result["spec_files"]: + spec_list += f"
  • {html.escape(sf)}
  • \n" + if not spec_list: + spec_list = "
  • No specification files found
  • " + + return f""" + + + + +Traceability Verification Dashboard + + + + +

    Traceability Verification Dashboard

    +

    Generated {html.escape(gen_time)} — Inputs: {html.escape(result["inputs_dir"])} — Specs: {html.escape(result["specs_dir"])}

    + + +
    +
    +
    Business Rules Coverage
    +
    {rules_pct}
    +
    {s["implemented_rules"]} / {s["total_rules"]} captured rules traced in chapters 1–8
    +
    0 else "0%"}">
    +
    +
    +
    Requirements Coverage
    +
    {reqs_pct}
    +
    {s["implemented_requirements"]} / {s["total_requirements"]} requirements traced in chapters 1–8
    +
    0 else "0%"}">
    +
    +
    +
    Total Missing
    +
    {s["missing_rules"] + s["missing_requirements"]}
    +
    {s["missing_rules"]} rules + {s["missing_requirements"]} requirements not found in chapters 1–8
    +
    +
    + + +
    + Specification Files Scanned ({len(result["spec_files"])}) +
      {spec_list}
    +
    +
    + + +

    Coverage by Business Function

    +
    + + + + + {func_rows} + +
    Business FunctionRules (in chapters 1–8)Requirements (in chapters 1–8)
    +
    +
    + + +
    + + + +
    + + +{detail_sections} + + + + +""" + + +# --------------------------------------------------------------------------- +# 5. CLI entry point +# --------------------------------------------------------------------------- + +def main(): + parser = argparse.ArgumentParser( + description="Verify traceability of business rules (captured from " + "traceability.yaml) and requirements (REQ-* identifiers) " + "in chapters 1-8 of microservice specifications." + ) + parser.add_argument( + "--inputs-dir", + default="inputs/spec", + help="Path to the inputs/spec directory (default: inputs/spec)", + ) + parser.add_argument( + "--specs-dir", + default="outputs/microservices", + help="Path to the microservice specification files " + "(default: outputs/microservices)", + ) + parser.add_argument( + "--output", + default="traceability-dashboard.html", + help="Output HTML dashboard file path " + "(default: traceability-dashboard.html)", + ) + args = parser.parse_args() + + # Validate paths + if not os.path.isdir(args.inputs_dir): + print(f"ERROR: Inputs directory not found: {args.inputs_dir}", file=sys.stderr) + sys.exit(1) + if not os.path.isdir(args.specs_dir): + print(f"ERROR: Specs directory not found: {args.specs_dir}", file=sys.stderr) + sys.exit(1) + + print(f"Scanning inputs: {args.inputs_dir}") + print(f"Scanning specs: {args.specs_dir}") + + result = run_verification(args.inputs_dir, args.specs_dir) + + # Print summary to stdout + s = result["summary"] + print(f"\n{'='*60}") + print(f"TRACEABILITY VERIFICATION SUMMARY") + print(f"{'='*60}") + print(f"Business Rules (captured): " + f"{s['implemented_rules']}/{s['total_rules']} " + f"({_pct(s['implemented_rules'], s['total_rules'])})") + print(f"Requirements (REQ-*): " + f"{s['implemented_requirements']}/{s['total_requirements']} " + f"({_pct(s['implemented_requirements'], s['total_requirements'])})") + print(f"{'─'*60}") + print(f"Scope: Chapters 1–8 of specification files only") + print(f"{'='*60}") + + missing_rules = s["missing_rules"] + missing_reqs = s["missing_requirements"] + if missing_rules + missing_reqs > 0: + if missing_rules > 0: + print(f"\n⚠ {missing_rules} business rule(s) NOT found in chapters " + f"1–8 of any specification file.") + if missing_reqs > 0: + print(f"\n⚠ {missing_reqs} REQ-* identifiers NOT found in chapters " + f"1–8 of any specification file.") + else: + print(f"\n✅ All business rules and REQ-* identifiers found in " + f"chapters 1–8 of specification files.") + + # Write HTML dashboard + dashboard_html = generate_html(result) + with open(args.output, "w", encoding="utf-8") as fh: + fh.write(dashboard_html) + print(f"\nDashboard written to: {args.output}") + + # Exit code: 0 if all rules and requirements traced, 1 if any missing + sys.exit(0 if s["missing_rules"] + s["missing_requirements"] == 0 else 1) + + +if __name__ == "__main__": + main() diff --git a/aws-transform/steering/workload-mainframe-reimagine-analysis.md b/aws-transform/steering/workload-mainframe-reimagine-analysis.md new file mode 100644 index 0000000..d624d3c --- /dev/null +++ b/aws-transform/steering/workload-mainframe-reimagine-analysis.md @@ -0,0 +1,368 @@ +--- +inclusion: always +--- + +# Phase 1: Business Function Comprehensive Analysis + +## Purpose + +This steering file guides the comprehensive analysis of all business function inputs — requirements, rule dispositions, and discovery artifacts — to produce a single consolidated markdown document (`ddd-analysis.md`). That document serves as the sole input for Phase 2 (DDD bounded context analysis). + +## Goal + +Read, cross-reference, and synthesize every artifact in the `spec/` folder into a structured, self-contained analysis document that preserves full traceability to the original sources. The output must be rich enough that the DDD bounded context analysis can proceed without re-reading the raw inputs. + +## Execution Model: Chunked Output + +**CRITICAL**: Do NOT attempt to produce the entire `ddd-analysis.md` document in a single pass. The volume of data across all business functions (typically 40–60 source files, 1000+ rules, 1000+ requirements) will exceed context limits and produce incomplete or shallow output. + +**Instead, produce the document in small, incremental chunks. Each file-write or file-append must be small enough to succeed without hitting network timeouts or token limits.** + +### Chunk 1: Section 1 (Inventory Only) + +1. Read `_index.yaml` and all `capability.yaml` files to build the inventory +2. Read each function's `discovery/data-stores.yaml` for owned/consumed stores +3. Read each function's `traceability.yaml` summary section for disposition counts +4. **Write** (create) `ddd-analysis-wip.md` with the document header and Section 1 (Business Functions Inventory table) + +### Chunk 2+: Section 2 (One Business Function Per Chunk) + +For EACH business function, produce a SEPARATE append: + +1. Read that function's `requirements.md` completely +2. Read that function's `traceability.yaml` completely (captured rules, dispositions) +3. Read that function's `capability.yaml` for shared dependencies +4. **Append** one subsection to `ddd-analysis-wip.md`: "### 2.N \" with Requirements Summary, Traceability Summary, and Shared Dependencies Summary + +**Repeat for each business function.** Do NOT combine multiple functions in a single write — one function per append operation. + +### Next Chunk: Section 2 Shared Capabilities + +1. Read all files under `_shared/` (capability.yaml, requirements.md for each shared program) +2. **Append** a "### 2.X Shared Capabilities" subsection summarizing all shared programs + +### Next Chunk: Section 3 (Data Store Analysis) + +1. Read all `data-stores.yaml` files completely for field-level detail +2. Cross-reference `data-model.md` for the system-wide data store access map +3. Cross-reference the `.function-list.yaml` shared subroutine relationships +4. **Append** Section 3 to `ddd-analysis-wip.md` (Core Business Data Stores with full field dictionaries, Data Ownership Matrix, Entity Relationships, Shared Data Hotspots) + +If Section 3 is too large (many data stores with full field dictionaries), split further: +- Append 3.1 and 3.2 (Core Business Data Stores + Data Ownership Matrix) +- Append 3.3 and 3.4 (Entity Relationships + Shared Data Hotspots) + +### Next Chunk: Section 4 (Programs and Batch Jobs) + +1. Read all `programs.yaml` and `batch-jobs.yaml` files +2. Read all `screens.yaml` files +3. **Append** Section 4 to `ddd-analysis-wip.md` (Program-to-Data-Store Map, Batch Job Data Flows, Program Dependency Graph) + +### Final Chunk: Section 5 (Cross-Function Synthesis + Appendix) + +1. Synthesize cross-function interactions from data already gathered +2. **Append** Section 5 to `ddd-analysis-wip.md` (Function Interaction Map, Business Rule Distribution, Screen-to-Program-to-Data Flows, Integration Points Summary) +3. **Append** Appendix A (Source File References) +4. **Rename** `ddd-analysis-wip.md` to `ddd-analysis.md` — this signals Phase 1 is complete + +### Why Chunked Execution + +- **Prevents network timeouts**: Each write is small enough to complete within API limits +- **Prevents context overflow**: Each chunk reads a focused subset of files and produces a bounded output section +- **Enables incremental verification**: Each chunk can be reviewed before proceeding +- **Preserves depth**: Smaller focused passes produce richer cross-referencing than one shallow pass over everything +- **Handles large systems**: Systems with 10+ business functions and 100+ programs cannot fit in a single context window + +### File Operations + +- **Chunk 1**: Use file-write to CREATE `ddd-analysis-wip.md` with Section 1 only +- **All subsequent chunks**: Use file-APPEND to add content to `ddd-analysis-wip.md` +- **One business function per append** in Section 2 — never combine multiple functions +- If any single append fails with a network error, reduce the content size and retry with a smaller portion +- Each chunk should read its required source files, produce the output, then move to the next chunk +- **After the final chunk** (Section 5 + Appendix): Rename `ddd-analysis-wip.md` to `ddd-analysis.md`. This signals Phase 1 is complete for resume detection. + +## Input Sources + +### 1. System-Level Files + +| Source | Path | Content | +|--------|------|---------| +| Function List | `spec/.function-list.yaml` | System name, all business functions with program counts, subroutines (classified as dedicated/shared), and shared_with relationships | +| Capability Index | `spec/_index.yaml` | Generated index listing all business capabilities with their IDs and paths | +| Data Model | `spec/data-model.md` | System-wide data model listing all data stores (type, accessing capabilities) | +| Glossary | `spec/.glossary.yaml` | Domain glossary terms | +| Term Preferences | `spec/.term-preferences.yaml` | Preferred terminology conventions | + +### 2. Business Function Specifications + +Located under `spec/`. Each subdirectory (excluding `_shared/` and files) is a business function: + +| Source | Path Pattern | Content | +|--------|-------------|---------| +| Capability | `spec//capability.yaml` | Function metadata: id, kind (business_capability), capability_name, program_count, summary, depends_on (shared programs), and outputs list | +| Requirements | `spec//requirements.md` | User workflows, functional requirements (REQ-*), preconditions, cross-boundary constraints | +| Traceability | `spec//traceability.yaml` | Business rules extracted from legacy programs with disposition (captured, not_applicable, unreachable, not_accounted_for), rule_id, program, req_ids, and traceability_lines | + +### 3. Discovery Artifacts + +Each business function has a `discovery/` subfolder containing structured analysis of the legacy system: + +| Source | Path Pattern | Content | +|--------|-------------|---------| +| Data Stores | `spec//discovery/data-stores.yaml` | Owned and consumed data stores with copybook views, field definitions (name, PIC type, offset, length), record lengths, internal/external readers and writers | +| Programs | `spec//discovery/programs.yaml` | Programs (COBOL, JCL) with execution type (online/batch/subroutine), transaction IDs, data stores read/written, screen references, calls, and includes | +| Batch Jobs | `spec//discovery/batch-jobs.yaml` | Batch jobs classified by type (migrate, interface, setup, skip) with descriptions, programs invoked, and data stores accessed | +| Screens | `spec//discovery/screens.yaml` | Screen definitions (BMS maps) with field lists (name, type, length) and available actions | + +### 4. Shared Capabilities + +Located under `spec/_shared/`. Each subdirectory is a shared subroutine/program used across multiple business functions: + +| Source | Path Pattern | Content | +|--------|-------------|---------| +| Capability | `spec/_shared//capability.yaml` | Shared program metadata: id, kind (shared_capability), capability_name, directory, source_programs, rule_count | +| Requirements | `spec/_shared//requirements.md` | Functional requirements for the shared program (REQ-* identifiers), open questions | + +## Analysis Process + +Execute the following steps sequentially. Each step builds on the previous one. + +### Step 1: Discover and Inventory All Business Functions + +1. Read `spec/_index.yaml` to get the system-level list of all business capabilities +2. Read `spec/.function-list.yaml` to get program counts, subroutine dependencies, and shared_with relationships +3. List all subdirectories under `spec/` (excluding `_shared/` and dot-files) to confirm the function list +4. For each subdirectory, read `capability.yaml` for metadata (id, program_count, summary, depends_on) +5. For each subdirectory, verify the presence of `requirements.md`, `traceability.yaml`, and the `discovery/` folder +6. Read each function's `discovery/data-stores.yaml` to extract owned and consumed data stores +7. Inventory shared capabilities from `spec/_shared/` — read each shared program's `capability.yaml` +8. Build a complete inventory + +**Output section: "Business Functions Inventory"** + +For each function, record: +- Function name (directory name / capability id) +- Capability summary (from capability.yaml) +- Program count (from capability.yaml) +- Whether requirements.md exists +- Whether traceability.yaml exists +- Discovery artifacts present (data-stores, programs, batch-jobs, screens) +- Dependencies on shared capabilities (from capability.yaml `depends_on` field) +- Primary data stores owned (from data-stores.yaml `owned` section) +- Data stores consumed (from data-stores.yaml `consumed` section) +- Internal readers and writers (from data-stores.yaml) +- External readers and writers (from data-stores.yaml — these indicate cross-function dependencies) +- Total rules count and disposition breakdown (from traceability.yaml `summary` section: captured, not_applicable, unreachable, not_accounted_for) + +### Step 2: Analyze Each Business Function + +For each discovered business function, read its `requirements.md` and `traceability.yaml` in full. Produce a structured summary that captures: + +#### 2a. Requirements Summary + +For each function's `requirements.md`: +- **Function purpose**: One-paragraph description of what this function does (also available from `capability.yaml` summary field) +- **Actors**: Who uses this function (user roles) +- **Preconditions**: Global preconditions listed at the top +- **User workflows**: List each numbered section with its title and a brief description +- **Key requirements inventory**: Table of all REQ-* identifiers grouped by workflow, with a one-line summary of each +- **Cross-boundary requirements**: Any REQ-XBND-*, REQ-INTEG-*, REQ-LOCK-* or similar cross-cutting requirements that indicate integration points with other functions +- **Validation rules**: Any REQ-VALID-* requirements that define input validation +- **Error handling patterns**: Key error conditions and their handling +- **Open questions**: Any OQ-* items listed at the end of the requirements document + +**IMPORTANT**: Preserve all REQ-* identifiers exactly as they appear. These are traceability anchors. + +#### 2b. Traceability Summary + +For each function's `traceability.yaml`: +- **Total rules** and **disposition breakdown** (captured, not_applicable, unreachable, not_accounted_for) from the `summary` section +- **Captured rules**: List each with rule_id, program, and req_ids — these are the canonical business rules linked to requirements +- **Not applicable rules**: Count only with a brief note on why (typically platform mechanics, working storage, file I/O verb mechanics) +- **Unreachable rules**: Count only — dead code paths +- **Not accounted for rules**: Count only — rules not yet mapped to requirements + +**IMPORTANT**: Preserve all rule_id values exactly. These are traceability anchors. + +#### 2c. Shared Dependencies Summary + +For each function's `capability.yaml` `depends_on` field: +- List each shared capability dependency with its source_programs +- Cross-reference with `_shared//requirements.md` for the shared program's functional requirements +- Note which shared programs are used and what they provide + +### Step 3: Analyze Discovery Artifacts — Data Stores (Data Model & Field Dictionary) + +Read each function's `discovery/data-stores.yaml` in full and produce: + +**NOTE**: The data-stores.yaml files serve as both the data model and the field dictionary. Each file contains complete field definitions with PIC types, offsets, and lengths — no separate data dictionary enrichment step is needed. + +#### 3a. Core Business Data Stores + +For each data store across all functions (both owned and consumed): +- Store name (full DSN) +- Type (VSAM KSDS, VSAM PATH, etc.) +- Record length +- Owning function (the function whose `owned` section lists it) +- Copybook views with program associations +- Complete field listing with PIC types, offsets, and lengths (this is the field dictionary) +- Internal readers and writers (programs within the owning function) +- External readers and writers (programs from other functions) + +#### 3b. Data Ownership Matrix + +Build a matrix showing which functions own, read, or write each data store. Use the `owned` vs `consumed` classification from each function's data-stores.yaml, supplemented by the system-level `data-model.md` which shows the `Accessed By` relationships for all data stores: + +| Data Store | Owner Function | Internal Readers | Internal Writers | External Readers | External Writers | +|-----------|---------------|-----------------|-----------------|-----------------|-----------------| + +#### 3c. Entity Relationships + +Identify relationships between data stores by analyzing shared key fields: +- Match fields that appear across multiple data stores (e.g., ACCT-ID appearing in both account and transaction stores) +- Note which shared key fields connect entities across functions +- Identify foreign key patterns from field naming conventions + +#### 3d. Shared Data Hotspots + +Identify data stores accessed by 3+ functions — these are integration hotspots that will drive bounded context boundaries: +- List each hotspot store +- List all functions that own or consume it +- List all external readers and writers +- Note the contention pattern (read-heavy, write-heavy, mixed) + +### Step 4: Analyze Discovery Artifacts — Programs and Batch Jobs + +Read each function's `discovery/programs.yaml` and `discovery/batch-jobs.yaml` and produce: + +#### 4a. Program-to-Data-Store Map + +From each function's `programs.yaml`, build a map of which programs access which data stores and how: +- Group by function, then by program +- For each program, list: type (COBOL/JCL), execution mode (online/batch/subroutine), transaction ID +- List data stores read and written +- Note screen associations and program calls +- Note included copybooks + +#### 4b. Batch Job Data Flows + +From each function's `discovery/batch-jobs.yaml`, identify batch processing chains: +- Group by function +- For each batch job, list: classification (migrate/interface/setup/skip), programs invoked, data stores read/written +- Identify input → processing → output patterns +- Note the classification rationale + +#### 4c. Program Dependency Graph + +From the `calls` and `includes` fields in programs.yaml, build a dependency view: +- For each program, list what it calls and what calls it +- Classify as online (CICS transaction) vs batch (JCL invoked) vs subroutine +- Identify cross-function program dependencies (programs called by other functions) + +### Step 5: Cross-Reference and Synthesize + +Produce cross-cutting analysis sections: + +#### 5a. Function Interaction Map + +Using the data ownership matrix, external readers/writers, and cross-boundary requirements, build a function interaction map: +- Which functions depend on which other functions' data (via external readers/writers in data-stores.yaml) +- What the nature of each dependency is (read-only lookup, shared write, event-driven) +- Which requirements drive each interaction (cite REQ-* identifiers) + +#### 5b. Business Rule Distribution + +Map captured business rules to the data stores they operate on: +- Which rules validate or transform data in which stores +- Which rules span multiple stores (cross-aggregate candidates) +- Which rules are function-local vs cross-function + +#### 5c. Screen-to-Program-to-Data Flows + +Read each function's `discovery/screens.yaml` and map the full user interaction chains: +- For each screen: ID, name, associated program, field inventory with types and lengths, available actions +- Map the end-to-end flow: Screen → Program → Data Stores (read/write) +- This reveals the online transaction flows from user input to data persistence and is most valuable when cross-referenced with the function interaction map and program dependency graph + +#### 5d. Integration Points Summary + +Consolidate all evidence of cross-function integration: +- Cross-boundary requirements (REQ-XBND-*, REQ-INTEG-*) +- Data stores with external writers (from data-stores.yaml) — these indicate cross-function write dependencies +- Data stores consumed by multiple functions +- Programs that access data stores owned by different functions (external readers/writers) +- Batch jobs that read from one function's output and write to another's input + +## Output Document Structure + +The output file `ddd-analysis.md` must follow this exact structure. Replace `` with the system name derived from `spec/` content (e.g., folder names, `capability.yaml`, or the job description): + +```markdown +# — Comprehensive Business Function Analysis + +## Document Purpose + + +## 1. Business Functions Inventory + + +## 2. Business Function Detailed Analysis + +### 2.1 +#### 2.1.1 Requirements Summary +#### 2.1.2 Traceability Summary +#### 2.1.3 Shared Dependencies Summary + +### 2.2 +... + + +## 3. Data Store Analysis (Data Model & Field Dictionary) + +### 3.1 Core Business Data Stores +### 3.2 Data Ownership Matrix +### 3.3 Entity Relationships +### 3.4 Shared Data Hotspots + +## 4. Program and Batch Job Analysis + +### 4.1 Program-to-Data-Store Map +### 4.2 Batch Job Data Flows +### 4.3 Program Dependency Graph + +## 5. Cross-Function Synthesis + +### 5.1 Function Interaction Map +### 5.2 Business Rule Distribution +### 5.3 Screen-to-Program-to-Data Flows +### 5.4 Integration Points Summary + +## Appendix A: Source File References + + +``` + +## Traceability Rules + +Throughout the output document, maintain these traceability conventions: + +1. **Requirement references**: Always cite as `REQ-XXX-NNN` exactly as they appear in the source requirements.md +2. **Rule references**: Always cite as `rule_id: ` exactly as they appear in the source traceability.yaml (underscore-separated UUID segments) +3. **Data store references**: Always cite using the full DSN name as it appears in data-stores.yaml (e.g., `%%CICV.MU.MUTB00.TAB.KLALI`) +4. **Program references**: Always cite the program name (e.g., MUBGVEIN) as it appears in programs.yaml +5. **Shared capability references**: Always cite the shared program ID (e.g., MUIALXXB) as it appears in capability.yaml `depends_on` +6. **Source file references**: When citing a specific fact, note the source file path in parentheses + +## Key Principles + +- **Completeness over brevity**: Include all requirements, all captured rules, all data stores. The DDD analysis cannot go back to re-read inputs. +- **Preserve identifiers**: Every REQ-*, rule_id, program name, and data store identifier must be preserved verbatim for traceability. +- **Cross-reference aggressively**: The value of this document is in connecting requirements to rules to data stores to programs. Make these connections explicit. +- **Include shared capabilities**: The `_shared/` programs represent cross-cutting logic used by multiple business functions — their requirements must be attributed to the consuming functions. +- **Flag ambiguities**: If ownership is unclear, if rules conflict, or if discovery data shows unexpected patterns, call them out explicitly as items for the DDD analysis to resolve. +- **Separate facts from inference**: Clearly mark any inferred relationships or assumptions with [INFERRED] tags. +- **Use discovery data as ground truth**: The discovery artifacts (data-stores.yaml, programs.yaml, batch-jobs.yaml) represent the actual system structure. Use them to validate and enrich the requirements and traceability data. +- **Disposition mapping**: Rules with disposition `captured` are the canonical business rules to model (equivalent to the old "consolidated" disposition). Rules with `not_applicable` are platform mechanics to exclude. Rules with `not_accounted_for` indicate gaps requiring investigation. diff --git a/aws-transform/steering/workload-mainframe-reimagine-ddd.md b/aws-transform/steering/workload-mainframe-reimagine-ddd.md new file mode 100644 index 0000000..1d3c103 --- /dev/null +++ b/aws-transform/steering/workload-mainframe-reimagine-ddd.md @@ -0,0 +1,289 @@ +--- +inclusion: always +--- + +# Phase 2: DDD Bounded Context Analysis + +## Purpose + +This steering file guides the identification of bounded contexts and their associated domain objects from the consolidated business function analysis document produced in Phase 1. + +## Input Sources + +The **primary input** for this analysis is: + +- **`ddd-analysis.md`** — The consolidated analysis document that synthesizes all business function requirements, rule dispositions, discovery artifacts (data stores, programs, batch jobs, screens), and cross-function dependencies into a single reference. This document preserves full traceability to the original sources. + +If additional detail is needed beyond what `ddd-analysis.md` provides, the original sources can be consulted: + +1. **Business Requirements** — `spec//requirements.md` +2. **Traceability** — `spec//traceability.yaml` +3. **Discovery Artifacts** — `spec//discovery/` (data-stores.yaml, programs.yaml, batch-jobs.yaml, screens.yaml) +4. **Capability Metadata** — `spec//capability.yaml` +5. **System Inventory** — `spec/.function-list.yaml` +6. **Capability Index** — `spec/_index.yaml` +7. **System Data Model** — `spec/data-model.md` +8. **Shared Capabilities** — `spec/_shared//` (capability.yaml, requirements.md) + +## Analysis Process + +Follow these steps sequentially. Each step builds on the previous one. + +### Step 0: Discover Business Functions + +Before starting the DDD analysis, discover all available business functions: + +1. List all subdirectories under `spec/` (excluding files like `.function-list.yaml`) +2. Each subdirectory name represents a business function +3. Verify each function has a `requirements.md` and optionally a `traceability.yaml` +4. Check each function's `discovery/` folder for data-stores.yaml to identify owned data stores +5. Read each function's `capability.yaml` for metadata (summary, program_count, depends_on) +6. Build the inventory of functions to analyze + +**Output format:** +```markdown +## Business Functions Inventory + +| # | Function Name | Has Requirements | Has Traceability | Primary Data Stores (Owned) | Data Stores (Consumed) | +|---|--------------|-----------------|---------------------|---------------------------|----------------------| +| 1 | | Yes/No | Yes/No | | | +``` + +### Step 1: Identify Bounded Contexts + +For each discovered business function, analyze: +- The key capabilities described in its `requirements.md` +- The data stores owned or consumed (from `discovery/data-stores.yaml`) +- The cross-boundary requirements (REQ-XBND-*, REQ-INTEG-*) that reveal integration points +- The traceability data to understand which rules are core vs supporting (captured rules are canonical) +- The shared capability dependencies (from capability.yaml `depends_on`) that reveal cross-function program sharing +- The external readers/writers in data-stores.yaml that reveal cross-function data dependencies + +**Criteria for grouping into bounded contexts:** +- **Cohesion**: Functions that share data ownership and change together belong in the same context +- **Autonomy**: A context should be independently deployable +- **Data Ownership**: Use the `owned` vs `consumed` classification in data-stores.yaml — entities owned by a single function strongly indicate a context boundary +- **Business Alignment**: Group by business capability, not by technical similarity +- **Language Consistency**: Terms used consistently within a function group define the ubiquitous language + +**Classification:** +- **Core Domain**: Business functions that provide competitive advantage +- **Supporting Subdomain**: Functions that support the core but are not differentiating +- **Generic Subdomain**: Cross-cutting infrastructure concerns + +**Output format for each bounded context:** +```markdown +### + +- **Responsibility**: +- **Classification**: Core Domain | Supporting Subdomain | Generic Subdomain +- **Source Functions**: +- **Owned Data Stores**: +- **Consumed Data Stores**: +- **Key Concepts** (ubiquitous language preview): + - : + - : +``` + +### Step 2: Define Ubiquitous Language + +For each bounded context identified in Step 1: +- Extract domain terms from the requirements (field names, entity names, operation names) +- Review field definitions from data-stores.yaml copybook views +- Identify business concepts vs technical/legacy terms +- Define each term in business language +- Note context-specific meanings (same term may mean different things in different contexts) + +**Output format:** +```markdown +| Term | Business Definition | Context-Specific Meaning | Related Terms | +|------|-------------------|-------------------------|---------------| +``` + +### Step 3: Identify Aggregates and Aggregate Roots + +For each bounded context: +- Identify candidate entities from the data stores owned by the context (from data-stores.yaml `owned` section) +- Determine transactional boundaries from the requirements (look for REQ-LOCK-*, REQ-INTEG-* patterns) +- Identify consistency requirements from business rules in traceability.yaml +- Group entities that must change together atomically +- Select the aggregate root (the entity with the primary identity that controls access) + +**Aggregate root selection criteria:** +- Has a unique identifier (primary key field from data-stores.yaml) +- Manages lifecycle of contained entities +- Enforces business rules across the aggregate (from captured rules in traceability.yaml) +- All external access goes through it +- Changes are atomic within the aggregate boundary + +**Output format:** +```markdown +### + +- **Aggregate Root**: + - Source: + - Identity: +- **Contained Entities**: +- **Invariants**: + - +- **Field Mapping**: + | Domain Attribute | Source Field | PIC Type | Offset | Length | + |-----------------|-------------|----------|--------|--------| +``` + +### Step 4: Define Entities + +For each aggregate, define the entities within it: +- Extract attributes from the data-stores.yaml field definitions (name, PIC type, offset, length) +- Map field names to business-meaningful attribute names +- Identify data types, lengths, and constraints +- Define entity operations from the requirements + +**Output format:** +```markdown +### + +- **Source**: +- **Identity**: (, ) +- **Attributes**: + | Attribute | Source Field | PIC Type | Offset | Length | Required | Description | + |-----------|-------------|----------|--------|--------|----------|-------------| +- **Operations**: +- **Lifecycle**: +``` + +### Step 5: Define Value Objects + +Identify attributes that should be modeled as value objects: +- Composite values (e.g., Address = line1 + line2 + line3 + state + country + zip) +- Values with validation rules (from REQ-VALID-* requirements) +- Values with formatting requirements (e.g., phone numbers with area code/prefix/line) +- Domain concepts without identity (e.g., Money, DateRange, CreditScore) + +**Output format:** +```markdown +### + +- **Source Fields**: +- **Attributes**: + | Attribute | PIC Type | Length | Validation | + |-----------|----------|--------|------------| +- **Validation Rules**: +- **Operations**: +- **Immutability**: Guaranteed — new instance created for any change +``` + +### Step 6: Identify Domain Events + +Look for state changes and integration points: +- Aggregate creation/update/deletion patterns in requirements +- Cross-boundary requirements (REQ-XBND-*, REQ-INTEG-*) that indicate events needed between contexts +- External writers in data-stores.yaml that indicate cross-function state changes +- Audit requirements +- Notification patterns (message queue interactions in programs.yaml — look for MQ calls) + +**Output format:** +```markdown +### (past tense) + +- **Trigger**: +- **Source Context**: +- **Payload**: + - aggregateId: + - + - occurredAt: timestamp +- **Subscribers**: +``` + +### Step 7: Define Domain Services + +Identify operations that span multiple entities or aggregates: +- Operations involving multiple data stores (from programs.yaml data_stores_read/data_stores_write) +- Complex validations across entities (from traceability.yaml captured rules) +- Calculations using multiple data sources +- Coordination logic from requirements + +**Output format:** +```markdown +### + +- **Responsibility**: +- **Operations**: + - () → +- **Dependencies**: +``` + +### Step 8: Define Application Services (Use Cases) + +Map each major workflow from requirements.md to a use case: +- Each numbered section in requirements.md typically maps to one use case +- Include the step-by-step flow with embedded business rules +- Reference the traceability.yaml for detailed business logic +- Use programs.yaml to understand the implementation flow (program → data store access) + +**Output format:** +```markdown +### + +- **Actor**: +- **Input**: +- **Output**: +- **Flow**: + 1. + 2. +- **Error Handling**: + - +- **Business Rules**: +- **Legacy Programs**: +``` + +### Step 9: Define Context Mapping + +Identify relationships between bounded contexts: +- Use the external readers/writers from data-stores.yaml to identify data sharing across functions +- Use cross-boundary requirements to identify integration patterns +- Determine upstream/downstream relationships based on data ownership (owned vs consumed) +- Use program call graphs from programs.yaml to identify runtime dependencies + +**Relationship patterns to look for:** +- Data stores consumed by multiple functions → potential Shared Kernel or Customer-Supplier +- External read-only access to another context's data → Anti-Corruption Layer +- Message queue interactions (MQ calls in programs.yaml) → Published Language / Open Host Service +- No direct data sharing → independent contexts + +**Output format:** +```markdown +### Context Map + +| Upstream Context | Downstream Context | Pattern | Shared Concept | Integration Mechanism | +|-----------------|-------------------|---------|----------------|----------------------| +``` + +## Output Structure + +**Working file**: Write each step's output incrementally to `ddd-working.md` as it completes — do NOT accumulate all steps in context before writing. When all steps are done, copy the completed file to `ddd-bounded-contexts.md` in the workspace root. This final filename is used by the Resume Detection logic to skip Phase 2 on subsequent runs. + +The document should follow this structure: + +1. **Executive Summary** — Artifact counts, context overview, key decisions +2. **Business Functions Inventory** — Discovered functions and their data store associations +3. **Bounded Contexts** — Each context with responsibility, classification, language, and aggregates +4. **Aggregate Definitions** — Roots, entities, value objects, and field mappings +5. **Domain Events** — Grouped by context with triggers and subscribers +6. **Domain Services** — Stateless operations spanning aggregates +7. **Application Services** — Use cases with flows and business rules +8. **Context Map** — Relationships and integration patterns + +## Key Principles + +- Start with discovery — never assume which business functions exist; always scan `spec/` first +- Keep aggregates small — prefer multiple small aggregates over one large one +- Reference other aggregates by ID only (use shared key fields from data-stores.yaml as evidence) +- Use eventual consistency between aggregates +- One transaction = one aggregate modification +- Data stores listed as `consumed` (not `owned`) indicate cross-context dependencies +- External readers/writers in data-stores.yaml reveal cross-function integration points +- Rule dispositions marked "not_applicable" (platform mechanics, working storage) should be excluded from domain modeling +- Rule dispositions marked "captured" represent the canonical business rules to model +- Rule dispositions marked "not_accounted_for" indicate rules not yet mapped to requirements that may need further investigation +- Shared capabilities (from `_shared/`) represent cross-cutting logic — consider whether they form a Generic Subdomain or should be distributed across contexts diff --git a/aws-transform/steering/workload-mainframe-reimagine-specgen.md b/aws-transform/steering/workload-mainframe-reimagine-specgen.md new file mode 100644 index 0000000..3925380 --- /dev/null +++ b/aws-transform/steering/workload-mainframe-reimagine-specgen.md @@ -0,0 +1,486 @@ +--- +inclusion: always +--- + +# Phase 3: Microservice Specification Generation + +## Purpose + +This steering file guides the generation of microservice specifications from a completed DDD bounded context analysis produced in Phase 2. + +## Input Sources + +This process requires two categories of inputs: + +### 1. DDD Analysis Output (Primary Input) + +**`ddd-bounded-contexts.md`** — The completed DDD bounded context analysis document produced by Phase 2. This provides: +- Bounded contexts with responsibilities and classifications +- Aggregates, entities, and value objects with field mappings +- Domain events with triggers and subscribers +- Domain services and application services (use cases) +- Context map with integration patterns + +### 2. Original Business Requirements (Reference Input) + +Located under `spec/`: +- **Capability Metadata** — `spec//capability.yaml` for function summary, program count, and shared dependencies +- **Requirements** — `spec//requirements.md` for detailed use case flows and business rules +- **Traceability** — `spec//traceability.yaml` for canonical business rules (disposition `captured`) with rule_id and req_ids mappings +- **Discovery Artifacts** — `spec//discovery/` for field-level details, data types, data store ownership, program structures, and screen definitions +- **Shared Capabilities** — `spec/_shared//requirements.md` for requirements of shared subroutines +- **System Data Model** — `spec/data-model.md` for cross-capability data store access patterns + +## Output Rules + +**MANDATORY**: Each identified microservice MUST have its own dedicated specification file. +- Do NOT combine multiple microservice specifications into a single document +- If a bounded context is decomposed into N services, produce N separate files +- File naming convention: `-specification.md` +- Output location: a dedicated output folder (e.g., `outputs/microservices/`) + +## Execution Model: One Service at a Time + +**CRITICAL**: Do NOT generate all microservice specifications in a single pass. Process **one service at a time** to ensure maximum depth and traceability. + +**Execution sequence:** +1. Complete Step 1 (identify all microservices from bounded contexts) — this produces the full inventory +2. For EACH service in the inventory, execute Steps 2–10 completely before moving to the next service +3. For each service, read the relevant source artifacts BEFORE generating the specification (see Mandatory Source Reading below) + +**Why:** Generating all services at once causes context pressure that leads to shallow specifications, merged services, and missing traceability. Processing one service at a time ensures each specification gets full attention and complete source cross-referencing. + +--- + +## Mandatory Source Reading Per Service + +**BEFORE generating each service specification**, you MUST read the following source artifacts for every business function mapped to that service's bounded context: + +### Required Reads (Non-Negotiable) + +For each `` mapped to the service: + +1. **`spec//requirements.md`** — Read completely. Extract ALL REQ-* identifiers relevant to this service's use cases. +2. **`spec//traceability.yaml`** — Read completely. Extract ALL rules with disposition `captured` — these are the canonical business rules that MUST appear in the specification. Each rule has a `rule_id` and `req_ids` linking it to requirements. +3. **`spec//discovery/data-stores.yaml`** — Read completely. Extract field-level metadata (PIC types, offsets, lengths, source copybook names) for all data stores owned by this service. + +### Required Reads (For Completeness) + +4. **`spec//discovery/programs.yaml`** — Read to identify legacy program names (e.g., COACTUPC, COACCT01) that must be referenced in use cases. +5. **`spec//discovery/batch-jobs.yaml`** — Read to identify batch use cases that must be included (store initialization, data extraction, provisioning). + +### What to Extract + +From each source file, extract and keep available while generating the specification: +- Every `REQ-*` identifier and its description +- Every `captured` rule's `rule_id` hash and its associated `req_ids` +- Every field's PIC type, byte offset, and length from data-stores.yaml +- Every legacy program name and its purpose +- Every batch job and its classification +- Shared capability requirements from `spec/_shared//requirements.md` for any shared programs referenced in `capability.yaml` `depends_on` + +**FAILURE MODE**: If you skip these reads, the specification will lack legacy traceability (no PIC types, no rule hashes, no program names) and will be too shallow for implementation. This is the #1 quality differentiator. + +--- + +## Generation Process + +Follow these steps sequentially for each microservice identified. + +### Step 1: Identify Microservices from Bounded Contexts + +Analyze each bounded context from the DDD analysis and determine the service decomposition: + +**Mapping patterns:** + +| Pattern | When to Use | +|---------|-------------| +| 1 BC → 1 Service | Small, cohesive domain with a single team owning it | +| 1 BC → N Services | Large bounded context that can be decomposed further based on distinct sub-capabilities | + +**Decomposition criteria:** +- **Business Capability**: Each service owns a complete business capability +- **Autonomy**: Can be developed, deployed, and scaled independently +- **Data Ownership**: Clear ownership of data entities within bounded context only +- **Team Size**: Can be owned by a single team (2-pizza rule) +- **Cohesion**: Operations within the service change together + +**Classification:** +- **Core Domain**: High business value, competitive advantage +- **Supporting Subdomain**: Necessary but not differentiating +- **Generic Subdomain**: Common functionality, consider off-the-shelf + +**Output format:** +```markdown +## Microservice Inventory + +| # | Service Name | Bounded Context | Mapping Pattern | Domain Type | Key Capabilities | +|---|-------------|----------------|-----------------|-------------|-----------------| +| 1 | | | 1:1 or 1:N | Core/Supporting/Generic | | +``` + +### Step 2: Generate Service Overview + +For each microservice, define: + +```markdown +## Service Overview + +- **Service Name**: +- **Bounded Context**: +- **Domain Type**: Core Domain | Supporting Subdomain | Generic Subdomain +- **Purpose**: <2-3 sentence description of what this service does> +- **Business Capabilities**: + - + - +- **Team Ownership**: +``` + +### Step 3: Define Service Boundaries + +**MANDATORY rules:** +- List ONLY what the service owns within its specific bounded context +- Explicitly list what the service does NOT own +- Identify dependencies on other microservices + +```markdown +## Service Boundaries + +### Owns +- + +### Does NOT Own (belongs to other contexts) +- → owned by + +### Dependencies +- : +``` + +### Step 4: Define Data Ownership + +Map aggregates from the DDD analysis to this service's data model: + +**CRITICAL RULES:** +- Only include aggregates, entities, and value objects from this specific bounded context +- No cross-context data — do not include entities belonging to other bounded contexts +- Single database per service — no shared databases +- External data access must be through service calls, not direct database access +- **Value objects MUST be fully specified** — not just listed in a summary table. Each value object requires its own dedicated section with attributes, validation rules (with REQ-* and rule hash references), formatting requirements, and immutability guarantees. + +```markdown +## Data Ownership + +### Domain Model + +#### +- **Aggregate Root**: +- **Identity**: () +- **Source**: / Copybook .cpy +- **Entities**: + | Entity | Key Field | Source Data Store | Description | + |--------|-----------|-------------------|-------------| + +** — Field Mapping**: + +| Domain Attribute | Source Field | PIC Type | Offset | Length | Required | Description | +|-----------------|-------------|----------|--------|--------|----------|-------------| + +**Value Objects**: + +| Value Object | Attributes | Validation | +|-------------|-----------|------------| +| (VO-N) | | | +``` + +#### Value Object Detailed Specifications + +**MANDATORY**: Each value object identified in the aggregate MUST have a fully expanded specification below the summary table. Do NOT just list value objects — specify them completely. + +For each value object, provide: + +```markdown +#### (VO-N) + +- **Source Fields**: +- **Attributes**: + | Attribute | PIC Type | Length | Description | + |-----------|----------|--------|-------------| +- **Validation Rules**: + - (, rule ``) + - (, rule ``) +- **Format**: +- **Immutability**: Guaranteed — new instance created for any change +``` + +Use the field mappings from the DDD analysis and reference `discovery/data-stores.yaml` for complete field metadata (PIC types, offsets, lengths). + +### Step 5: Define API Specification + +Design REST API endpoints for each use case in this bounded context: + +**MANDATORY requirements:** +- ALL use cases MUST have corresponding API endpoints +- ALL API endpoints MUST have complete detailed specifications + +```markdown +## API Specification + +### + +- **Method**: GET | POST | PUT | PATCH | DELETE +- **Path**: `/api/v1//{id}` +- **Description**: +- **Use Case**: + +#### Request +```json +{ + "": "" +} +``` + +#### Response (Success — 200/201) +```json +{ + "": "" +} +``` + +#### Error Responses +| Status | Code | Description | +|--------|------|-------------| +| 400 | VALIDATION_ERROR | | +| 404 | NOT_FOUND | | +| 409 | CONFLICT | | +``` + +### Step 6: Define Event Publishing + +Map domain events from the DDD analysis that originate from this bounded context: + +**MANDATORY REQUIREMENTS for event-driven design:** +- Identify ALL state changes that other bounded contexts need to know about +- For each aggregate write operation, ask: "Does any other service need to react to this change?" If yes, publish an event. +- Events from MQ-based interactions MUST also be modeled as domain events +- Each event MUST have explicit trigger conditions with REQ-* references +- Each event MUST list ALL known subscribers with their bounded context identifier + +**Minimum events per service:** Every service that performs write operations should publish at least: +1. An event for its primary aggregate state change +2. An event for any cross-context operation it receives +3. An event for any async/batch operation completion + +```markdown +## Event Publishing + +### + +- **Trigger**: +- **Topic/Channel**: `..` +- **Schema**: + ```json + { + "eventId": "string (UUID)", + "eventType": "", + "occurredAt": "ISO-8601 timestamp", + "aggregateId": " ()", + "payload": { + "" + } + } + ``` +- **Subscribers**: +``` + +### Step 7: Define Service Communication + +Document how this service interacts with other bounded contexts: + +**Communication patterns to specify:** +- **Synchronous (REST)**: For immediate consistency requirements +- **Asynchronous (Events)**: For eventual consistency and decoupling +- **Resilience patterns**: Circuit breaker, retry, timeout, fallback — **with specific numeric parameters** + +**MANDATORY: Resilience Pattern Specificity** + +Do NOT use generic descriptions like "Circuit Breaker / Retry / Timeout". Each synchronous dependency MUST specify concrete resilience parameters: + +- **Circuit Breaker**: failure threshold, window duration, half-open retry interval +- **Retry**: maximum attempts, backoff strategy, initial delay +- **Timeout**: connection timeout and read timeout in seconds +- **Fallback**: what happens when the circuit is open + +```markdown +## Service Communication + +### Synchronous Dependencies (Outbound REST Calls) + +| Target Service | Endpoint | Purpose | Resilience Pattern | +|---------------|----------|---------|-------------------| +| | | | Circuit Breaker ( failures / s window), Retry ( attempts, exponential backoff), Timeout (s) | + +### Events Consumed (Inbound) + +| Event | Source Context | Handler | Action | +|-------|---------------|---------|--------| +| | | | | + +### Anti-Corruption Layer + +| External Concept | Internal Concept | Translation Logic | +|-----------------|-----------------|-------------------| +| | | | +``` + +### Step 8: Define Domain Services + +Map domain services from the DDD analysis that belong to this bounded context: + +**CRITICAL RULES:** +- Only include domain services operating within this specific bounded context +- Cross-context coordination belongs in application services, not domain services +- **Identify ALL domain services** — not just one per bounded context + +**MANDATORY: Full operation signatures with parameter types and return types** + +```markdown +## Domain Services + +### + +- **Responsibility**: +- **Operations**: + - `(: , : )` → `` — +- **Dependencies**: + - Internal: + - External: +- **Used By**: +``` + +### Step 9: Define Application Services (Use Cases) + +Map use cases from the DDD analysis to this service: + +**CRITICAL RULES:** +- Only include use cases operating within this specific bounded context +- Single transaction boundary within this bounded context +- **Every flow step MUST have a REQ-* reference and/or rule hash** — non-negotiable for traceability + +```markdown +## Application Services (Use Cases) + +### + +- **Actor**: +- **API Endpoint**: +- **Input Command**: + - : +- **Output Result**: + - : +- **Flow**: + 1. (, rule ``) + 2. (, rule ``) + ... +- **Transaction Boundary**: +- **Error Handling**: + | Condition | Error Code | Response | + |-----------|-----------|----------| + | | | () | +- **Legacy Programs**: +``` + +### Step 10: Traceability Verification Checklist + +**MANDATORY**: Before finalizing each service specification, complete this checklist. Do NOT move to the next service until all items pass. + +#### 10a. Captured Rule Coverage + +For each `` mapped to this service: +1. Open `spec//traceability.yaml` +2. List ALL rules with disposition `captured` +3. Verify that EACH captured rule's `rule_id` hash appears verbatim in the specification +4. If any captured rule is missing, add it to the appropriate section + +#### 10b. Requirements Coverage + +For each `` mapped to this service: +1. Open `spec//requirements.md` +2. List ALL REQ-* identifiers that belong to use cases in this service +3. Verify that EACH REQ-* identifier appears in the specification +4. If any REQ-* identifier is missing, add it to the appropriate section + +#### 10c. Legacy Program Traceability + +For each use case in the specification: +1. Verify it references the legacy program name(s) it derives from +2. If missing, add a `Legacy Programs` field to the use case + +#### 10d. Field-Level Traceability + +For each entity in the Data Ownership section: +1. Verify the field mapping table includes: Domain Attribute, Source Field name, PIC Type, Offset, Length +2. Verify the source data store DSN is referenced +3. If missing, read `spec//discovery/data-stores.yaml` and add the complete field mapping + +#### 10e. Batch Operations Coverage + +1. Review `spec//discovery/batch-jobs.yaml` for this service's function(s) +2. Verify that ALL batch operations have corresponding use cases and API endpoints +3. If any batch operation is missing, add it as a use case with its own API endpoint + +**Only after ALL checklist items pass, write the specification file and proceed to the next service.** + +--- + +## Final Specification File Structure + +Each microservice specification file (`-specification.md`) must contain these sections in order: + +``` +# — Microservice Specification + +> **Source**: | **Mapping**: <1:1 or 1:N> | **Source Function**: +> **Validates**: + +--- + +## 1. Service Overview +## 2. Service Boundaries +## 3. Data Ownership + ### Domain Model + #### + - Entity field mappings (full PIC type, offset, length tables) + - Value Object summary table + #### Value Object Detailed Specifications (one subsection per VO) + - Attributes with types + - Validation rules with REQ-* and rule hashes + - Format specifications + #### Invariants (with REQ-* and rule hash references) +## 4. API Specification (all endpoints with full request/response/error schemas) +## 5. Event Publishing (all events with trigger conditions, schemas, and subscriber lists) +## 6. Service Communication (with specific resilience parameters per dependency) +## 7. Domain Services (multiple services with full typed operation signatures) +## 8. Application Services (Use Cases) (with REQ-* on every flow step) +``` + +## Key Principles + +- **One service at a time** — generate each specification in isolation with full source reads before moving to the next +- **One file per microservice** — never combine specifications +- **Never merge bounded contexts** — if the DDD model defines separate contexts, they MUST remain separate services +- **Bounded context scope** — each service only owns data and logic from its bounded context +- **No shared databases** — services communicate through APIs and events +- **Complete API coverage** — every use case (online AND batch) has a corresponding API endpoint +- **Explicit boundaries** — always state what the service does NOT own +- **Resilience by default** — all cross-service communication includes resilience patterns WITH SPECIFIC NUMERIC PARAMETERS +- **Event-driven integration** — prefer asynchronous events for cross-context communication +- **Anti-corruption layers** — protect internal models from external context changes +- **Mandatory source reading** — ALWAYS read requirements.md, traceability.yaml, and data-stores.yaml for each function before generating its service spec +- **Full legacy traceability** — every field must include PIC type, offset, length, and source copybook; every use case must reference its legacy program name; every validation must reference its REQ-* ID and rule hash +- **Batch operations are first-class** — store initialization, data extraction, GDG setup, RACF provisioning, and file operations are full use cases with API endpoints +- **Verify before finalizing** — complete the Step 10 traceability checklist for each service before writing the file +- **Value objects are fully specified** — each VO gets its own section with attributes, validation rules, format, and immutability guarantee +- **Multiple domain services per context** — identify ALL distinct service responsibilities +- **REQ-* on every flow step** — every step must reference the specific requirement it implements +- **Events are comprehensive** — publish events for primary state changes, cross-context operations, AND async/batch completions +- **Resilience parameters are concrete** — specify failure threshold, window, retry count, backoff strategy, and timeout for each dependency diff --git a/aws-transform/steering/workload-mainframe-reimagine-verify.md b/aws-transform/steering/workload-mainframe-reimagine-verify.md new file mode 100644 index 0000000..8e21f49 --- /dev/null +++ b/aws-transform/steering/workload-mainframe-reimagine-verify.md @@ -0,0 +1,196 @@ +--- +inclusion: always +--- + +# Phase 4: Traceability Verification + +## Purpose + +This steering file guides the deterministic verification that all business rules and requirements from the legacy discovery artifacts have been traced into the generated microservice specifications. Unlike Phases 1–3 which rely on analysis and synthesis, Phase 4 is a **purely mechanical, script-based check** — no LLM judgment is involved in determining implementation status. + +## Goal + +Run the verification script against the source inputs and generated specifications, then review the resulting HTML dashboard to identify any gaps in traceability coverage. + +## Prerequisites + +Phase 4 requires that the following phases have been completed: + +1. **Phase 1** produced `ddd-analysis.md` +2. **Phase 2** produced the DDD bounded context model +3. **Phase 3** produced individual microservice specification files (`*-specification.md`) in the output directory (e.g., `outputs/microservices/`) + +The verification script also requires: +- Python 3.9+ installed +- PyYAML package installed (`pip install pyyaml`) + +## What Gets Verified + +### Business Rules (from `traceability.yaml`) + +The script extracts rules with disposition **`captured`** from each business function's `traceability.yaml`: + +- **Captured rules** are the canonical business rules that MUST be modeled in the microservice specifications. Each captured rule has a `rule_id` (underscore-separated UUID format) and typically `req_ids` linking it to REQ-* requirements. + +Rules with other dispositions are **excluded** from verification: +- `not_applicable` — platform mechanics (CICS/BMS, working storage, file I/O verb mechanics) +- `unreachable` — dead code paths not exercised at runtime +- `not_accounted_for` — rules not yet mapped to requirements; tracked separately + +### Requirements (from `requirements.md`) + +The script extracts every `REQ-*` identifier (e.g., `REQ-CUSTVAL-003`, `REQ-UPDEXEC-001`) from each business function's `requirements.md`. Every requirement identifier should appear in at least one microservice specification. + +### How "Implemented" Is Determined + +The verification is a **deterministic text search** that only examines sections 1–8 of each specification file (the core specification). The Appendix section is **excluded** because it contains a reference dump of all identifiers and would produce false positives. + +1. For each `rule_id` (e.g., `42d0bdf3_0644_4051_856a_ab2766f578f8`), the script searches for the verbatim string in sections 1–8 of every `*-specification.md` file. +2. For each `REQ-*` identifier (e.g., `REQ-CUSTVAL-003`), the script searches for the verbatim string in sections 1–8 of every `*-specification.md` file. +3. If the identifier appears in **at least one** specification file (within sections 1–8), the rule/requirement is marked as **IMPLEMENTED**. +4. If the identifier appears in **no** specification file (within sections 1–8), it is marked as **MISSING**. + +The Appendix is detected by the heading pattern `## Appendix` — everything from that heading to the end of the file is stripped before scanning. + +## Verification Process + +### Step 1: Verify Prerequisites + +Before running the script, confirm: + +1. The `spec/` directory exists and contains business function subdirectories with `requirements.md` and `traceability.yaml` files. +2. The microservice specification output directory exists and contains `*-specification.md` files generated by Phase 3. +3. Python 3.9+ is available. +4. PyYAML is installed. + +```bash +# Check Python version +python3 --version + +# Install PyYAML if needed +pip install pyyaml +``` + +### Step 2: Run the Verification Script + +The verification script is located at: +``` +.kiro/steering/verify-traceability.py +``` + +Run it with the appropriate paths: + +```bash +python3 .kiro/steering/verify-traceability.py \ + --inputs-dir inputs/spec \ + --specs-dir outputs/microservices \ + --output traceability-dashboard.html +``` + +**Parameters:** +| Parameter | Default | Description | +|-----------|---------|-------------| +| `--inputs-dir` | `inputs/spec` | Path to the business function input directory | +| `--specs-dir` | `outputs/microservices` | Path to the generated microservice specification files | +| `--output` | `traceability-dashboard.html` | Path for the output HTML dashboard | + +### Step 3: Review Console Output + +The script prints a summary to stdout: + +``` +============================================================ +TRACEABILITY VERIFICATION SUMMARY +============================================================ +Requirements (REQ-*): 189/203 (93.1%) +──────────────────────────────────────────────────────────── +Scope: Chapters 1–8 of specification files only +============================================================ + +⚠ 14 REQ-* identifiers NOT found in chapters 1–8 of any specification file. +``` + +**Exit codes:** +- `0` — All identifiers traced (100% coverage) +- `1` — One or more identifiers missing + +### Step 4: Review the HTML Dashboard + +Open `traceability-dashboard.html` in a browser. The dashboard provides: + +#### Summary Cards (top) +- **Requirements Coverage** — percentage of REQ-* identifiers found in specs (chapters 1–8) +- **Missing** — total count of REQ-* identifiers not found in chapters 1–8 + +#### Coverage by Business Function (table) +- Per-function breakdown showing requirements coverage +- Click a function name to jump to its detail section + +#### Detail Sections (per function) +For each business function: +- **Requirements table** — each REQ-* identifier with its text, implementation status, and which spec file(s) and section(s) reference it + +#### Filtering +Use the filter buttons to show: +- **All** — every rule and requirement +- **Missing Only** — only untraced identifiers (for remediation) +- **Implemented Only** — only traced identifiers (for confirmation) + +### Step 5: Address Missing Identifiers + +For each MISSING identifier, determine the appropriate action: + +#### Missing Business Rules + +| Disposition | Action | +|-------------|--------| +| `captured` | **Must fix** — This is a canonical business rule. Find the appropriate microservice specification and add the rule_id reference in the relevant section (Business Rules, Invariants, Validation Rules, or Use Case flow). | + +#### Missing Requirements + +All missing REQ-* identifiers should be traced to a specification. For each: +1. Identify which bounded context / microservice should own this requirement +2. Add the REQ-* identifier in the appropriate section of that service's specification (typically in Use Cases, API Specification, or Error Handling) +3. If the requirement spans multiple services, reference it in each relevant specification + +### Step 6: Re-run Verification + +After updating specifications, re-run the script to confirm coverage has improved: + +```bash +python3 .kiro/steering/verify-traceability.py \ + --inputs-dir inputs/spec \ + --specs-dir outputs/microservices \ + --output traceability-dashboard.html +``` + +Repeat Steps 4–6 until the desired coverage level is achieved. + +## Key Principles + +- **Deterministic, not probabilistic** — The script uses exact string matching. No LLM, no fuzzy matching, no interpretation. An identifier is either present or absent. +- **Traceability is bidirectional** — Phase 4 verifies forward traceability (source → spec). The identifiers embedded in specifications by Phase 3 also enable backward traceability (spec → source). +- **Coverage target** — Aim for 100% coverage of captured rules and REQ-* requirements. Rules with `not_accounted_for` disposition may have legitimate reasons for exclusion, but each exclusion should be a conscious decision. +- **Script is re-runnable** — Run the verification as many times as needed. Each run produces a fresh dashboard reflecting the current state of the specifications. +- **No modification of source inputs** — Phase 4 never modifies the `spec/` files. It only reads them and compares against the specification outputs. + +## Troubleshooting + +### "ERROR: Specs directory not found" +Phase 3 has not been run yet, or the output directory path is incorrect. Verify the `--specs-dir` argument points to the directory containing `*-specification.md` files. + +### "No specification files found" in dashboard +The specs directory exists but contains no files matching the `*-specification.md` pattern. Check that Phase 3 used the correct naming convention. + +### Rule IDs not matching +Rule IDs in `traceability.yaml` use underscore-separated UUID segments (e.g., `42d0bdf3_0644_4051_856a_ab2766f578f8`). Ensure the specification files reference these IDs in exactly the same format — no hyphens, no curly braces, no prefix. + +### REQ-* IDs not matching +Requirement IDs follow the pattern `REQ-CATEGORY-NNN` (e.g., `REQ-CUSTVAL-003`). The match is case-sensitive. Ensure specifications use the exact same casing and format as the source `requirements.md`. + +### PyYAML not installed +```bash +pip install pyyaml +# or +pip3 install pyyaml +``` diff --git a/aws-transform/steering/workload-mainframe-reimagine.md b/aws-transform/steering/workload-mainframe-reimagine.md new file mode 100644 index 0000000..c2f089a --- /dev/null +++ b/aws-transform/steering/workload-mainframe-reimagine.md @@ -0,0 +1,202 @@ +# Mainframe Reimagine + +> **Last Updated:** 2026-06-10 + +After a mainframe modernization job completes analysis and generates requirements, use this workflow to download the outputs and reimagine the application — decomposing the legacy system into modern microservices through progressive analysis phases. + +## When to Use + +- User asks to "download specs", "download source code", "forward engineer", "reimagine", or "set up workspace" +- A mainframe job has completed the "Generate requirements" step +- User wants to start building modernized code from the generated specifications + +## Prerequisites + +- A completed (or partially completed) mainframe job with spec_gen output +- The job's workspace ID and job ID + +## Phases Overview + +| Phase | Input | Output | Steering File | +|-------|-------|--------|---------------| +| Download & Workspace Setup | S3 assets | `reimagine-/` workspace | This file | +| Business Function Analysis | `spec/` folder | `ddd-analysis.md` | `workload-mainframe-reimagine-analysis.md` | +| DDD Bounded Context Design | `ddd-analysis.md` | Domain model with contexts, aggregates, events | `workload-mainframe-reimagine-ddd.md` | +| Microservice Spec Generation | DDD model + `spec/` | One `*-specification.md` per service | `workload-mainframe-reimagine-specgen.md` | +| Traceability Verification | Specs + `spec/` | `traceability-dashboard.html` (pass/fail) | `workload-mainframe-reimagine-verify.md` | + +**User confirmation is required before starting the pipeline (see Gate below).** Once started, phases run sequentially without additional prompts. + +--- + +## Download & Create Workspace + +**When the user asks to download specs, source code, or set up a workspace for forward engineering/reimagine:** + +Tell the user: "I'll gather the generated specs and original source code from your job, then set up a workspace for reimagining." + +Both files are **connector-backed assets** — use `resource="asset"`. Do NOT use `resource="artifact"`, `list_resources resource="artifacts"`, or browse output folders. + +**Required parameters for `get_resource resource="asset"`:** +- `workspaceId` — the workspace ID +- `jobId` — the job ID +- `connectorId` — get from `list_resources resource="connectors" workspaceId=""` +- `assetKey` — the S3 key (path after bucket name) + +### Step 0: Get the connector ID + +``` +list_resources resource="connectors" workspaceId="" +# Returns the connectorId needed for all asset downloads +``` + +### Step 1: Download the generated spec requirements + +The spec_gen output path is in the agent's message or task: +> "Modernization requirements are stored in your S3 bucket. s3://…/transform-output/\/spec_gen/spec_gen_specs_\.zip" + +``` +# Extract the assetKey (the S3 path after the bucket name) and download +get_resource resource="asset" workspaceId="" jobId="" connectorId="" assetKey="transform-output//spec_gen/spec_gen_specs_.zip" +``` + +### Step 2: Download the original source code ZIP + +The source ZIP is NOT in the job's output folders. The ONLY way to find it: + +``` +# Find the "Specify resource location" task from "Kick off modernization" +list_resources resource="tasks" workspaceId="" jobId="" + +# Get the task — read the HUMAN ARTIFACT (submitted response) for the source ZIP filename +get_resource resource="task" workspaceId="" jobId="" taskId="" + +# Download — use the filename from the human artifact as assetKey +get_resource resource="asset" workspaceId="" jobId="" connectorId="" assetKey=".zip" +``` + +**Do NOT** browse folders or tell the user the source is unavailable — it IS downloadable via the steps above. + +### Step 3: Unzip, organize, and clean up + +After both downloads complete, create a workspace folder named `reimagine-` using the current date and time (e.g., `reimagine-20260610_143022`): + +``` +# Create workspace folder using current timestamp (YYYYMMDD_HHMMSS) +mkdir -p reimagine-/source + +# Unzip source code +unzip .zip -d reimagine-/source/ + +# Unzip generated specs directly into workspace root (ZIP already contains a spec/ folder) +unzip spec_gen_specs_.zip -d reimagine-/ + +# Clean up ZIP files +rm .zip spec_gen_specs_.zip +``` + +Present the resulting structure to the user: +``` +reimagine-20260610_143022/ +├── source/ ← original mainframe legacy source code +└── spec/ ← generated modernization requirements +``` + +--- + +## Gate: After Workspace Setup + +Ask the user via AskUserQuestion: + +**Question:** "Workspace is ready. Would you like to start the reimagine analysis?" + +**Options:** +- **"Start reimagine" (Recommended)** — "I'll run 4 phases to decompose your legacy system into microservice specifications: business function analysis → domain model design → microservice spec generation → traceability verification. I'll give you a summary after each phase." +- **"Explore with AWS Transform plugin"** — "Install the AWS Transform VS Code extension to browse specs and source interactively with traceability and AI-powered docs." +- **"Stop here"** — "Workspace is set up. You can come back later to start." + +If user selects "Explore workspace with AWS Transform plugin", provide more detail: + +> The **AWS Transform** VS Code extension adds these features to your workspace: +> +> - **Forward traceability** — select a line of legacy source and see which business rules trace to it +> - **Reverse traceability** — select a requirement (REQ-*) and jump to the source code locations it maps to +> - **Generate technical documentation** — right-click a program to generate AI-powered docs +> - **Generate business capability summaries** — right-click a spec folder for a capability overview +> - **Generate requirement summaries** — select a REQ-* to get a plain-language explanation +> +> Search "AWS Transform" in the VS Code Extensions marketplace to install. Once installed, open the reimagine workspace folder and the plugin will auto-detect the spec/ structure. + +Then re-ask whether to start reimagine or stop here. + +If user proceeds → continue to the Execution section below. + +--- + +## Resume Detection + +Before starting any phase, check what output files already exist in the workspace to determine where to resume: + +| File exists | Skip to | +|-------------|---------| +| `ddd-analysis-wip.md` (without `ddd-analysis.md`) | Resume Business Function Analysis mid-way — read the WIP file and continue from the last completed section | +| `ddd-analysis.md` | Domain Model Design | +| `ddd-working.md` (without `ddd-bounded-contexts.md`) | Resume Domain Model Design mid-way — read the working file and continue from the last completed step | +| `ddd-bounded-contexts.md` | Microservice Spec Generation | +| `outputs/microservices/*.md` | Traceability Verification | +| `traceability-dashboard.html` | Done — show results | + +If a previous phase's output exists, tell the user: "I can see you've already completed [phase]. Continuing from [next phase]." + +This allows the user to resume in a new conversation if context runs out. + +--- + +## Execution: Run All Phases + +Once the user confirms, tell them: + +> "Starting the reimagine process. I'll run 4 phases and give you a summary along the way:" +> +> 1. **Business function analysis** — consolidate all spec artifacts into a single analysis document +> 2. **Domain model design** — identify bounded contexts, aggregates, and domain events +> 3. **Microservice spec generation** — produce one detailed specification per service +> 4. **Traceability verification** — confirm every business rule and requirement is covered +> +> If we run into context limits, you can start a new conversation — I'll detect the progress and resume from where we left off. + +Then execute each phase sequentially with a brief summary between them. Do NOT ask for confirmation between phases — just notify progress. + +### Business Function Analysis + +Tell the user: "Starting business function analysis..." + +Read and follow `workload-mainframe-reimagine-analysis.md`. + +**Context management:** Read one source file at a time, extract what's needed, write the chunk immediately, then move to the next file. Do NOT accumulate all files in context before writing. + +When complete, summarize to the user: brief stats (number of business functions found, data stores, programs) and confirm `ddd-analysis.md` was created. + +### Domain Model Design + +Tell the user: "Analysis complete. Moving to domain model design..." + +Read and follow `workload-mainframe-reimagine-ddd.md`. + +When complete, summarize: number of bounded contexts identified, key aggregates, notable integration points. + +### Microservice Spec Generation + +Tell the user: "Domain model ready. Generating microservice specifications..." + +Read and follow `workload-mainframe-reimagine-specgen.md`. + +When complete, summarize: number of services generated, list the specification files created. + +### Traceability Verification + +Tell the user: "Specs generated. Running traceability verification..." + +Read and follow `workload-mainframe-reimagine-verify.md`. + +When complete, present the coverage results (percentage, any gaps) and the path to the HTML dashboard. diff --git a/aws-transform/steering/workload-mainframe.md b/aws-transform/steering/workload-mainframe.md index dc5ff70..2f60d10 100644 --- a/aws-transform/steering/workload-mainframe.md +++ b/aws-transform/steering/workload-mainframe.md @@ -8,34 +8,45 @@ AWS Transform for mainframe accelerates the modernization of legacy zOS mainfram | # | Capability | Description | Eligible Files | Requires | |---|-----------|-------------|----------------|----------| -| 1 | Analyze code | Classifies files, counts LOC, maps dependencies, identifies missing files and duplicates | All | — | -| 2 | Data analysis | Data lineage (program/JCL → dataset mapping) and data dictionary (field-level metadata for copybooks and Db2) | All | Code analysis | -| 3 | Activity metrics analysis | Analyzes SMF records (type 30 batch, type 110 CICS) for job frequency, resource usage, unused code identification | SMF records | Recommend code analysis first | -| 4 | Generate technical documentation | PDF/JSON docs per file — summary or detailed functional specification with logic, flows, dependencies | COBOL, JCL | Code analysis + dependency analysis | -| 5 | Extract business logic | Extracts business rules, process flows, and logic — application-level (grouped by transactions/jobs) or file-level | COBOL, JCL | Code analysis + dependency + entry point analysis | -| 6 | Decompose code | Breaks codebase into functional domains using seed programs, produces dependency graphs | All | Code analysis. Recommend BRE first | -| 7 | Migration wave planning | Sequenced migration plan based on decomposed domains with recommended modernization order | Domains | Decomposition | -| 8 | Refactor code | Transforms COBOL → cloud-optimized Java. Configurable target DB, encoding, engine version | COBOL | Code analysis. Recommend decomposition + wave planning first | -| 9 | Reforge code | LLM-powered post-refactor improvement — replaces COBOL-style Java with idiomatic Java patterns | Refactored Java | Refactor. Quota: 3M LOC/job, 50M LOC/user/month | -| 10 | Plan test cases | Creates test plans from code analysis and scheduler paths, prioritizes by complexity, maps business rules | JCL, schedulers | Code analysis. Benefits from BRE | -| 11 | Generate test data collection scripts | Produces JCL scripts to collect before/after test data from mainframe (Db2 unloads, VSAM REPRO, sequential datasets) | Test plan | Test planning | -| 12 | Test automation script generation | Generates scripts to execute test cases on the modernized Java application with data setup and result comparison | Test plan | Test planning + test data collection | +| 1 | Analyze code | Parse and analyze your files, collect statistics, analyze structure and dependencies, generate dependency graphs, and identify missing assets | All | — | +| 2 | Analyze data | Analyze data flow and lineage relationships in your codebase | All | Code analysis | +| 3 | Analyze activity metrics | Analyze mainframe SMF records for job runs and metrics | SMF records | Recommend code analysis first | +| 4 | Generate technical documentation | Create comprehensive technical documentation for your mainframe code | COBOL, JCL | Code analysis + dependency analysis | +| 5 | Extract business logic | Extract and document business rules from your mainframe applications | COBOL, JCL | Code analysis + dependency + entry point analysis | +| 6 | Decompose code | Break down your codebase into functional or logical domains based on seed programs | All | Code analysis. Recommend BRE first | +| 7 | Plan test cases | Create test plans from mainframe code and schedulers | JCL, schedulers | Code analysis. Benefits from BRE | +| 8 | Generate test data collection scripts | Create JCL scripts for data collection | Test plan | Test planning | +| 9 | Generate test automation scripts | Generate execution scripts for modern environments | Test plan | Test planning + test data collection | ## Starting Workflow -1. **Inventory** — Scan for COBOL (.cbl, .cob), JCL (.jcl), copybooks (.cpy), and VSAM definitions -2. **Scope decision** — Ask user: full rewrite, partial modernization, or re-platform? -3. **Complete analysis on AWS Transform** — Based on what the customer wants to do, run relevant agents in AWS Transform. Note: the agent always starts with a "Kick off modernization" step that requires connector setup and source code location before any analysis begins. -4. **Build modernized applications with Kiro** — Based on scope, draft Kiro modernization requirements based on outputs from agents +When the user mentions mainframe modernization, COBOL, JCL, or any mainframe-related topic, present the options directly via AskUserQuestion: -**Key question to ask user:** "Can you tell me what you are looking to accomplish today on your mainframe modernization project? Is this a full re-architecture to microservices, or a lift-and-shift to run COBOL on AWS?" +**Question:** "Here are your mainframe modernization options. You can write out an objective or select from options below:" + +**Options:** +- **"Assess and reimagine"** — "Identify modernization boundaries to identify business functions, and generate requirements to reimagine the business functions. This will analyze code and data to discover discrete data paths and produce a catalog of business functions, then generate modernization requirements to reimagine your selected business functions." +- **"Reimagine"** — "If you already have a scoped application, generate requirements and reimagine this application. This will analyze your programs and data sources, extract business rules, and generate modernization requirements to reimagine your application." +- **"See list of all capabilities"** — "Create a custom job plan by selecting from all available capabilities." +- **"Connect to an existing job"** — "Resume or check progress on a mainframe modernization job you've already started." + +**Based on selection:** + +- **Assess and reimagine:** Create a job with the full end-to-end workflow. The generated plan will include: Kick off modernization → Discover business functions (Analyze code, Analyze data, Discover data paths, Discover business functions). After the "Discover business functions" phase completes, the user will select one or more business functions to reimagine. Then proceed to: Reimagine (Extract business logic, Generate requirements) for the selected business functions. Confirm the plan with the user before executing. + +- **Reimagine:** Create a job scoped to reimagining an already-analyzed application. The generated plan will include: Kick off modernization → Analyze code → Analyze data → Extract business logic → Generate requirements. Confirm the plan with the user before executing. + +- **See list of all capabilities:** Present the capabilities from the Capabilities Overview table above and let the user select which to include (they can select by name or number, and choose multiple). Then generate a custom job plan from their selections. Confirm the plan with the user before executing. + +- **Connect to an existing job:** List the user's workspaces and jobs to find mainframe jobs. Present the job(s) with their current status (phase, progress, pending tasks). Once connected, show the job status and ask what they'd like to do next (e.g., check status, trigger reimagine, handle pending requests, download artifacts). If the user then asks to "reimagine" or "forward engineer" from a connected job, follow `workload-mainframe-reimagine.md`. + +All new job options require a "Kick off modernization" step first (connector setup and source code location) before any analysis begins. Additional steps may be added due to dependencies between capabilities. ## Agents & Transforms | Agent | How to Discover | Purpose | |-------|----------------|---------| -| Mainframe agent | `list_resources` with `resource: "agents"` | End-to-end COBOL → Java/C# modernization | -| AWS/comprehensive-codebase-analysis | CLI: `atx custom def exec` | Static analysis of COBOL programs | +| Mainframe agent | `list_resources` with `resource: "agents"` | End-to-end COBOL → Modern microservices application on AWS | **Discover the agent dynamically** — do not hardcode the agent name: @@ -107,6 +118,10 @@ These are things that work differently through the MCP API vs the AWS Transform The agent requires source code as a **single .zip file** in S3. When the "Specify resource location" task appears, `assetLocation` must point to a `.zip` file. +### Reimagine (Forward Engineering) + +See `workload-mainframe-reimagine.md` for the complete workflow to download specs and source code, organize a workspace, and begin reimagining. + ### Business Logic Extraction (BRE) Configuration When the "Configure settings" task appears for BRE (`MainframeBreInputComponent`), you MUST always populate the `userSelectedFiles` array — regardless of `reportScope`. @@ -119,8 +134,7 @@ Both scopes require the file list. The webapp auto-selects all files for `applic ## Known Limitations - Assembler programs (ASM) are not handled by AWS Transform agents — Kiro can analyze but not convert -- PL/I is supported for BRE and documentation only — not for refactoring +- PL/I is supported for Business Logic Extraction, Technical Documentation and Data Analysis only — not for refactoring - CICS BMS screen conversion may need manual UI design decisions - Complex SORT/MERGE JCL steps may need manual review - Performance tuning of converted Java code is not automated -- Reforge quota: 3M lines of code per job, 50M lines of code per user per month