diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000..bce5599 --- /dev/null +++ b/.gitattributes @@ -0,0 +1,5 @@ +# Skill command-cards are compared byte-for-byte against a generated (LF) fence, +# so they must stay LF on every platform regardless of git's autocrlf setting. +# (The skilldoc tool also normalizes EOL on read; this keeps the committed files +# canonical so Windows checkouts don't materialize them as CRLF.) +skills/flashduty/**/*.md text eol=lf diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 93d9c50..997f173 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -31,3 +31,9 @@ jobs: - name: Build run: go build -v ./cmd/flashduty + + # Skill command-cards must stay in sync with the CLI tree (equivalent to + # `make check-cards`; invoked directly so it runs on the Windows runner, + # which has no make). + - name: Check skill command-cards + run: go run ./internal/cmd/skilldoc check diff --git a/Makefile b/Makefile index cafcdf7..daec75c 100644 --- a/Makefile +++ b/Makefile @@ -70,6 +70,18 @@ test-v: ## Run unit tests with verbose output test-cover: ## Run unit tests with coverage $(GOTEST) -race -cover ./... +# ============================================================================ +# Skill command-cards (internal/skilldoc) +# ============================================================================ + +.PHONY: gen-cards +gen-cards: ## Rewrite every card's generated fence from the CLI tree + $(GOCMD) run ./internal/cmd/skilldoc gen + +.PHONY: check-cards +check-cards: ## Validate skills/flashduty cards against the command oracle + $(GOCMD) run ./internal/cmd/skilldoc check + # ============================================================================ # Pre-push check (recommended before pushing) # ============================================================================ diff --git a/README.md b/README.md index fd59fdd..17922e8 100644 --- a/README.md +++ b/README.md @@ -36,33 +36,6 @@ Download the latest release for your platform from [GitHub Releases](https://git | `MIRROR_URL` | Override installer release asset mirror | `https://static.flashcat.cloud/flashduty-cli` | | `FLASHDUTY_UPDATE_BASE_URL` | Override `flashduty update` and auto update-check base URL | `https://static.flashcat.cloud/flashduty-cli` | -## Agent Skills - -Flashduty CLI ships with 10 agent skills that teach AI coding agents how to operate Flashduty from your terminal. Compatible with 41+ agents including Claude Code, Cursor, GitHub Copilot, Codex, Gemini CLI, Windsurf, and more. - -```bash -npx skills add flashcatcloud/flashduty-cli -y -g -``` - -The installer auto-detects which agents you have and installs skills to all of them. - -### Available Skills - -| Skill | Scope | -|-------|-------| -| `flashduty-shared` | Foundation: authentication, 3-layer model, global flags, safety rules | -| `flashduty-incident` | Incident lifecycle: triage, investigate, resolve, merge, snooze, reassign | -| `flashduty-alert` | Alert and alert event investigation: drill down, trace, merge | -| `flashduty-change` | Change event tracking and deployment frequency trends | -| `flashduty-oncall` | On-call schedule queries: who is on call, shift details | -| `flashduty-channel` | Channel and escalation rule lookups | -| `flashduty-statuspage` | Status page management and Atlassian → Flashduty migration | -| `flashduty-insight` | Analytics: MTTA/MTTR, noise reduction, notification trends | -| `flashduty-admin` | Team/member lookups and audit log search | -| `flashduty-template` | Notification template validation and preview | - ---- - ## Quick Start ### 1. Authenticate diff --git a/README_zh.md b/README_zh.md index a2b66d8..2d4e2c5 100644 --- a/README_zh.md +++ b/README_zh.md @@ -36,33 +36,6 @@ irm https://static.flashcat.cloud/flashduty-cli/install.ps1 | iex | `MIRROR_URL` | 覆盖安装脚本使用的 release 资源镜像 | `https://static.flashcat.cloud/flashduty-cli` | | `FLASHDUTY_UPDATE_BASE_URL` | 覆盖 `flashduty update` 和自动更新检查的 base URL | `https://static.flashcat.cloud/flashduty-cli` | -## Agent Skills(AI 代理技能) - -Flashduty CLI 内置 10 个代理技能,让 AI 编程代理能够通过 CLI 操作 Flashduty 平台。兼容 41+ 编程代理,包括 Claude Code、Cursor、GitHub Copilot、Codex、Gemini CLI、Windsurf 等。 - -```bash -npx skills add flashcatcloud/flashduty-cli -y -g -``` - -安装器会自动检测已安装的代理并为其安装技能。 - -### 可用技能 - -| 技能 | 范围 | -|------|------| -| `flashduty-shared` | 基础:认证、三层降噪模型、全局参数、安全规则 | -| `flashduty-incident` | 故障生命周期:分诊、调查、解决、合并、暂停、转派 | -| `flashduty-alert` | 告警与告警事件调查:下钻、追踪、合并 | -| `flashduty-change` | 变更事件追踪与部署频率趋势 | -| `flashduty-oncall` | 值班查询:当前值班人、排班详情 | -| `flashduty-channel` | 协作空间与升级规则查询 | -| `flashduty-statuspage` | 状态页管理以及从 Atlassian 迁移到 Flashduty | -| `flashduty-insight` | 分析:MTTA/MTTR、降噪率、通知趋势 | -| `flashduty-admin` | 团队/成员查询与审计日志搜索 | -| `flashduty-template` | 通知模板验证与预览 | - ---- - ## 快速开始 ### 1. 认证 diff --git a/internal/cli/dump_commands.go b/internal/cli/dump_commands.go new file mode 100644 index 0000000..ad3b1dd --- /dev/null +++ b/internal/cli/dump_commands.go @@ -0,0 +1,33 @@ +package cli + +import ( + "encoding/json" + + "github.com/spf13/cobra" + + "github.com/flashcatcloud/flashduty-cli/internal/skilldoc" +) + +// RootForDump returns the fully-populated root command so dev tooling (the +// internal/cmd/skilldoc generator/validator) can build the command dump +// in-process, without shelling out to `flashduty __dump-commands`. +func RootForDump() *cobra.Command { return rootCmd } + +// newDumpCommandsCmd builds the hidden `__dump-commands` command. It serializes +// the live cobra tree to indented JSON — the oracle the card generator and +// validator consume. Hidden because it is internal tooling, not a user verb. +func newDumpCommandsCmd() *cobra.Command { + return &cobra.Command{ + Use: "__dump-commands", + Short: "Dump the command tree as JSON (internal tooling)", + Hidden: true, + RunE: func(cmd *cobra.Command, _ []string) error { + out, err := json.MarshalIndent(skilldoc.Build(rootCmd), "", " ") + if err != nil { + return err + } + _, err = cmd.OutOrStdout().Write(append(out, '\n')) + return err + }, + } +} diff --git a/internal/cli/dump_commands_test.go b/internal/cli/dump_commands_test.go new file mode 100644 index 0000000..356db95 --- /dev/null +++ b/internal/cli/dump_commands_test.go @@ -0,0 +1,29 @@ +package cli + +import ( + "bytes" + "encoding/json" + "strings" + "testing" +) + +func TestDumpCommands_EmitsJSONWithStatusPageChangeCreate(t *testing.T) { + cmd := newDumpCommandsCmd() + var out bytes.Buffer + cmd.SetOut(&out) + if err := cmd.RunE(cmd, nil); err != nil { + t.Fatalf("RunE: %v", err) + } + + got := out.String() + if !json.Valid(out.Bytes()) { + t.Fatalf("output is not valid JSON:\n%s", got) + } + if !strings.Contains(got, `"status-page change-create"`) { + head := got + if len(head) > 400 { + head = head[:400] + } + t.Errorf("dump missing status-page change-create path; output head:\n%s", head) + } +} diff --git a/internal/cli/root.go b/internal/cli/root.go index 3901648..f243fb2 100644 --- a/internal/cli/root.go +++ b/internal/cli/root.go @@ -135,6 +135,9 @@ func init() { rootCmd.AddCommand(newMonitQueryCmd()) rootCmd.AddCommand(newMonitAgentCmd()) + // Hidden command-tree oracle for the skill-card tooling (internal/skilldoc). + rootCmd.AddCommand(newDumpCommandsCmd()) + // Generated commands (full OpenAPI coverage). Registered AFTER curated // commands so curated leaves win on any name conflict (see genAddLeaf). registerGenerated(rootCmd) diff --git a/internal/cmd/skilldoc/main.go b/internal/cmd/skilldoc/main.go new file mode 100644 index 0000000..16e57a2 --- /dev/null +++ b/internal/cmd/skilldoc/main.go @@ -0,0 +1,237 @@ +// Command skilldoc is the dev tool for the flashduty skill cards. It builds the +// command-tree dump in-process (via cli.RootForDump) and either rewrites a +// card's generated fence (`skilldoc gen `) or validates every card under +// skills/flashduty against the dump (`skilldoc check`): unknown commands/flags +// in examples and out-of-date generated fences. Run from the repo root. +package main + +import ( + "fmt" + "io" + "os" + "path/filepath" + "sort" + "strings" + + "github.com/spf13/cobra" + + "github.com/flashcatcloud/flashduty-cli/internal/cli" + "github.com/flashcatcloud/flashduty-cli/internal/skilldoc" +) + +// skillDir is the card root relative to the repo root. +const skillDir = "skills/flashduty" + +func main() { + root := &cobra.Command{ + Use: "skilldoc", + Short: "Generate and validate flashduty skill command-cards", + SilenceUsage: true, + SilenceErrors: true, + } + root.AddCommand(genCmd(), checkCmd()) + if err := root.Execute(); err != nil { + fmt.Fprintln(os.Stderr, "skilldoc:", err) + os.Exit(1) + } +} + +func genCmd() *cobra.Command { + return &cobra.Command{ + Use: "gen [group]", + Short: "Rewrite the generated fence in skills/flashduty/reference/.md (every card if no group given)", + Args: cobra.MaximumNArgs(1), + RunE: func(_ *cobra.Command, args []string) error { + base, err := cardBase() + if err != nil { + return err + } + d := dump() + if len(args) == 1 { + return runGen(d, base, args[0]) + } + return runGenAll(d, base) + }, + } +} + +func checkCmd() *cobra.Command { + return &cobra.Command{ + Use: "check", + Short: "Validate every card under skills/flashduty against the command oracle", + Args: cobra.NoArgs, + RunE: func(cmd *cobra.Command, _ []string) error { + base, err := cardBase() + if err != nil { + return err + } + n, err := runCheck(dump(), base, cmd.OutOrStdout()) + if err != nil { + return err + } + if n > 0 { + return fmt.Errorf("%d card issue(s) found", n) + } + _, err = fmt.Fprintln(cmd.OutOrStdout(), "skilldoc: cards OK") + return err + }, + } +} + +// dump builds the command-tree dump from the live CLI root, in-process. +func dump() skilldoc.Dump { return skilldoc.Build(cli.RootForDump()) } + +// runGen rewrites the GENERATED: fence inside /reference/.md +// with a fresh render, leaving all hand-written content outside the fence +// untouched. +func runGen(d skilldoc.Dump, base, group string) error { + card := filepath.Join(base, "reference", group+".md") + raw, err := os.ReadFile(card) + if err != nil { + return fmt.Errorf("read card: %w", err) + } + body := normalizeEOL(string(raw)) + + start, end := skilldoc.FenceStart(group), skilldoc.FenceEnd(group) + si := strings.Index(body, start) + ei := strings.Index(body, end) + if si < 0 || ei < 0 || ei < si { + return fmt.Errorf("%s: no GENERATED:%s fence to fill (add the start/end markers first)", card, group) + } + + fresh := skilldoc.GenerateFence(d, group) + updated := body[:si] + fresh + body[ei+len(end):] + if updated == body { + return nil // already fresh + } + if err := os.WriteFile(card, []byte(updated), 0o644); err != nil { + return fmt.Errorf("write card: %w", err) + } + return nil +} + +// runGenAll regenerates the fence of every dump group that has a card file under +// /reference. The group set is derived from the dump (intersected with the +// cards that actually exist), so it stays correct as domains are added or +// renamed — no hardcoded list. Groups without a card (e.g. webhook) are skipped. +func runGenAll(d skilldoc.Dump, base string) error { + seen := map[string]bool{} + var groups []string + for _, c := range d.Commands { + if c.Group != "" && !seen[c.Group] { + seen[c.Group] = true + groups = append(groups, c.Group) + } + } + sort.Strings(groups) + for _, g := range groups { + if _, err := os.Stat(filepath.Join(base, "reference", g+".md")); err != nil { + continue // no card for this group + } + if err := runGen(d, base, g); err != nil { + return fmt.Errorf("gen %s: %w", g, err) + } + } + return nil +} + +// runCheck loads every *.md under base, validates examples and fence freshness +// against the dump, prints each issue as "relpath:line kind detail", and +// returns the issue count. A missing base directory is not an error (no cards → +// no issues). +func runCheck(d skilldoc.Dump, base string, w io.Writer) (int, error) { + docs, err := loadDocs(base) + if err != nil { + return 0, err + } + + issues := append(skilldoc.Validate(d, docs), skilldoc.CheckFences(d, docs)...) + sort.Slice(issues, func(i, j int) bool { + if issues[i].Doc != issues[j].Doc { + return issues[i].Doc < issues[j].Doc + } + if issues[i].Line != issues[j].Line { + return issues[i].Line < issues[j].Line + } + return issues[i].Kind < issues[j].Kind + }) + for _, is := range issues { + if _, err := fmt.Fprintf(w, "%s:%d %s %s\n", is.Doc, is.Line, is.Kind, is.Detail); err != nil { + return 0, err + } + } + return len(issues), nil +} + +// loadDocs reads every *.md file under base (recursively) into a Doc with its +// path relative to base. A non-existent base yields no docs. +func loadDocs(base string) ([]skilldoc.Doc, error) { + info, err := os.Stat(base) + if err != nil { + if os.IsNotExist(err) { + return nil, nil + } + return nil, err + } + if !info.IsDir() { + return nil, fmt.Errorf("%s is not a directory", base) + } + + var docs []skilldoc.Doc + err = filepath.WalkDir(base, func(path string, e os.DirEntry, err error) error { + if err != nil { + return err + } + if e.IsDir() || !strings.HasSuffix(path, ".md") { + return nil + } + raw, err := os.ReadFile(path) + if err != nil { + return err + } + rel, err := filepath.Rel(base, path) + if err != nil { + rel = path + } + docs = append(docs, skilldoc.Doc{Path: rel, Body: normalizeEOL(string(raw))}) + return nil + }) + if err != nil { + return nil, err + } + return docs, nil +} + +// normalizeEOL collapses Windows CRLF to LF so the byte-exact fence comparison +// and the line-based harvester are insensitive to how git checked the cards out +// (Windows autocrlf would otherwise make every fence look stale). The generated +// fence is always LF, so LF is the canonical form to compare against. +func normalizeEOL(s string) string { return strings.ReplaceAll(s, "\r\n", "\n") } + +// cardBase resolves /skills/flashduty by walking up from the cwd to +// the directory containing go.mod. +func cardBase() (string, error) { + root, err := repoRoot() + if err != nil { + return "", err + } + return filepath.Join(root, skillDir), nil +} + +// repoRoot walks up from the working directory until it finds go.mod. +func repoRoot() (string, error) { + dir, err := os.Getwd() + if err != nil { + return "", err + } + for { + if _, err := os.Stat(filepath.Join(dir, "go.mod")); err == nil { + return dir, nil + } + parent := filepath.Dir(dir) + if parent == dir { + return "", fmt.Errorf("go.mod not found from %s upward", dir) + } + dir = parent + } +} diff --git a/internal/cmd/skilldoc/main_test.go b/internal/cmd/skilldoc/main_test.go new file mode 100644 index 0000000..1722aed --- /dev/null +++ b/internal/cmd/skilldoc/main_test.go @@ -0,0 +1,192 @@ +package main + +import ( + "bytes" + "os" + "path/filepath" + "strings" + "testing" + + "github.com/flashcatcloud/flashduty-cli/internal/skilldoc" +) + +// fixtureDump is a small dump with one status-page leaf, decoupled from the +// real CLI tree so the test stays deterministic. +func fixtureDump() skilldoc.Dump { + return skilldoc.Dump{Commands: []skilldoc.Command{ + { + Path: "status-page change-create", + Group: "status-page", + Short: "Create status page event", + Long: "Create status page event.\n\nRequest fields:\n" + + " --type string (required) — Event type. [incident, maintenance]\n", + Flags: []skilldoc.Flag{{Name: "type", Type: "string"}, {Name: "data", Type: "string"}}, + }, + }} +} + +func writeFile(t *testing.T, path, body string) { + t.Helper() + if err := os.MkdirAll(filepath.Dir(path), 0o755); err != nil { + t.Fatal(err) + } + if err := os.WriteFile(path, []byte(body), 0o644); err != nil { + t.Fatal(err) + } +} + +func TestRunCheck_FlagsStaleAndUnknown(t *testing.T) { + dir := t.TempDir() + d := fixtureDump() + + // A reference card with a deliberately stale fence + a bad-flag example. + body := "# status-page\n\n" + + "```bash\nfduty status-page change-create --type incident --bogus 1\n```\n\n" + + skilldoc.FenceStart("status-page") + "\n\n### change-create\nSTALE WRONG\n\n" + + skilldoc.FenceEnd("status-page") + "\n" + writeFile(t, filepath.Join(dir, "reference", "status-page.md"), body) + + var out bytes.Buffer + n, err := runCheck(d, dir, &out) + if err != nil { + t.Fatalf("runCheck: %v", err) + } + if n == 0 { + t.Fatalf("expected issues, got 0\noutput:\n%s", out.String()) + } + got := out.String() + if !strings.Contains(got, "stale-fence") { + t.Errorf("missing stale-fence in output:\n%s", got) + } + if !strings.Contains(got, "unknown-flag") { + t.Errorf("missing unknown-flag in output:\n%s", got) + } + // Issues are reported with a file:line prefix. + if !strings.Contains(got, "status-page.md:") { + t.Errorf("issues should carry file:line; output:\n%s", got) + } +} + +func TestRunCheck_CleanDirIsZero(t *testing.T) { + dir := t.TempDir() + d := fixtureDump() + + // A clean card: fresh fence + a valid example. + body := "# status-page\n\n" + + "```bash\nfduty status-page change-create --type incident\n```\n\n" + + skilldoc.GenerateFence(d, "status-page") + "\n" + writeFile(t, filepath.Join(dir, "reference", "status-page.md"), body) + + var out bytes.Buffer + n, err := runCheck(d, dir, &out) + if err != nil { + t.Fatalf("runCheck: %v", err) + } + if n != 0 { + t.Errorf("clean dir: want 0 issues, got %d:\n%s", n, out.String()) + } +} + +// A card checked out with Windows CRLF line endings must still validate clean: +// the fence comparison and harvester normalize EOL, so freshness does not depend +// on how git materialized the file (regression test for the Windows CI failure). +func TestRunCheck_CRLFCardIsClean(t *testing.T) { + dir := t.TempDir() + d := fixtureDump() + body := "# status-page\n\n" + + "```bash\nfduty status-page change-create --type incident\n```\n\n" + + skilldoc.GenerateFence(d, "status-page") + "\n" + crlf := strings.ReplaceAll(body, "\n", "\r\n") + writeFile(t, filepath.Join(dir, "reference", "status-page.md"), crlf) + + var out bytes.Buffer + n, err := runCheck(d, dir, &out) + if err != nil { + t.Fatalf("runCheck: %v", err) + } + if n != 0 { + t.Errorf("CRLF card should validate clean, got %d issue(s):\n%s", n, out.String()) + } +} + +func TestRunCheck_MissingDirIsZero(t *testing.T) { + d := fixtureDump() + var out bytes.Buffer + n, err := runCheck(d, filepath.Join(t.TempDir(), "does-not-exist"), &out) + if err != nil { + t.Fatalf("runCheck on missing dir should not error: %v", err) + } + if n != 0 { + t.Errorf("missing skills dir: want 0 issues, got %d", n) + } +} + +// TestRunGenAll covers `skilldoc gen` with no group: it must regenerate every +// card that exists, derive the group set from the dump, and silently skip a dump +// group that has no card file (e.g. webhook) rather than erroring. +func TestRunGenAll_FillsEveryCardAndSkipsCardless(t *testing.T) { + dir := t.TempDir() + d := skilldoc.Dump{Commands: []skilldoc.Command{ + {Path: "status-page change-create", Group: "status-page", Short: "Create", Flags: []skilldoc.Flag{{Name: "type", Type: "string"}}}, + {Path: "incident list", Group: "incident", Short: "List incidents", Flags: []skilldoc.Flag{{Name: "limit", Type: "int"}}}, + {Path: "webhook list", Group: "webhook", Short: "List webhooks"}, // group with NO card file + }} + for _, g := range []string{"status-page", "incident"} { + writeFile(t, filepath.Join(dir, "reference", g+".md"), + "# "+g+"\n\nintro\n\n"+skilldoc.FenceStart(g)+"\n"+skilldoc.FenceEnd(g)+"\n") + } + + if err := runGenAll(d, dir); err != nil { + t.Fatalf("runGenAll must not error on the cardless webhook group: %v", err) + } + + for g, verb := range map[string]string{"status-page": "### change-create", "incident": "### list"} { + raw, err := os.ReadFile(filepath.Join(dir, "reference", g+".md")) + if err != nil { + t.Fatal(err) + } + if !strings.Contains(string(raw), verb) { + t.Errorf("%s card fence not filled (want %q):\n%s", g, verb, raw) + } + if !strings.Contains(string(raw), "intro") { + t.Errorf("%s: gen-all clobbered hand-written content", g) + } + } + var out bytes.Buffer + if n, _ := runCheck(d, dir, &out); n != 0 { + t.Errorf("after gen-all, check should be clean; got %d:\n%s", n, out.String()) + } +} + +func TestRunGen_FillsFence(t *testing.T) { + dir := t.TempDir() + d := fixtureDump() + + // Card with empty fence markers; gen should fill them with a fresh render. + card := filepath.Join(dir, "reference", "status-page.md") + body := "# status-page\n\nintro\n\n" + + skilldoc.FenceStart("status-page") + "\n" + skilldoc.FenceEnd("status-page") + "\n" + writeFile(t, card, body) + + if err := runGen(d, dir, "status-page"); err != nil { + t.Fatalf("runGen: %v", err) + } + + updated, err := os.ReadFile(card) + if err != nil { + t.Fatal(err) + } + if !strings.Contains(string(updated), "### change-create") { + t.Errorf("gen did not fill fence:\n%s", updated) + } + // After gen, the fence must be fresh (check reports 0 stale-fence issues). + var out bytes.Buffer + n, _ := runCheck(d, dir, &out) + if n != 0 { + t.Errorf("after gen, check should be clean; got %d:\n%s", n, out.String()) + } + // Hand-written content outside the fence is preserved. + if !strings.Contains(string(updated), "intro") { + t.Errorf("gen clobbered hand-written content:\n%s", updated) + } +} diff --git a/internal/skilldoc/build.go b/internal/skilldoc/build.go new file mode 100644 index 0000000..9d2a2e7 --- /dev/null +++ b/internal/skilldoc/build.go @@ -0,0 +1,66 @@ +package skilldoc + +import ( + "sort" + "strings" + + "github.com/spf13/cobra" + "github.com/spf13/pflag" +) + +// Build walks the cobra tree rooted at root and returns a structured dump of +// every runnable, non-hidden leaf command. Group containers (non-runnable +// parents like "status-page") are descended into but not emitted themselves. +// +// Path is the space-joined chain of cobra command names below the root, using +// c.Name() so a positional placeholder in Use (e.g. "change-create ") +// is stripped to the bare verb. Required flags are detected via cobra's +// one-required-flag annotation. Enums and nested --data fields are NOT +// re-derived here; they live verbatim in Long, which cligen authored. +func Build(root *cobra.Command) Dump { + var d Dump + walk(root, nil, &d) + sort.Slice(d.Commands, func(i, j int) bool { + return d.Commands[i].Path < d.Commands[j].Path + }) + return d +} + +func walk(c *cobra.Command, parents []string, d *Dump) { + // The root itself contributes no path segment; its children start the path. + var path []string + if len(parents) > 0 || c.Parent() != nil { + path = append(append([]string{}, parents...), c.Name()) + } + + if c.Runnable() && !c.Hidden { + d.Commands = append(d.Commands, command(c, path)) + } + + for _, child := range c.Commands() { + walk(child, path, d) + } +} + +func command(c *cobra.Command, path []string) Command { + cmd := Command{ + Path: strings.Join(path, " "), + Short: c.Short, + Use: c.Use, + Long: c.Long, + Example: c.Example, + } + if len(path) > 0 { + cmd.Group = path[0] + } + c.Flags().VisitAll(func(f *pflag.Flag) { + cmd.Flags = append(cmd.Flags, Flag{ + Name: f.Name, + Type: f.Value.Type(), + Default: f.DefValue, + Usage: f.Usage, + Required: f.Annotations[cobra.BashCompOneRequiredFlag] != nil, + }) + }) + return cmd +} diff --git a/internal/skilldoc/build_test.go b/internal/skilldoc/build_test.go new file mode 100644 index 0000000..705c827 --- /dev/null +++ b/internal/skilldoc/build_test.go @@ -0,0 +1,51 @@ +package skilldoc + +import ( + "testing" + + "github.com/spf13/cobra" +) + +func testTree() *cobra.Command { + root := &cobra.Command{Use: "fduty"} + sp := &cobra.Command{Use: "status-page"} + create := &cobra.Command{Use: "change-create ", Short: "Create status page event", + Long: "Create status page event.\n\nRequest fields:\n --type string (required) — Event type. [incident, maintenance]\n", + Run: func(*cobra.Command, []string) {}} + create.Flags().String("type", "", "Event type.") + _ = create.MarkFlagRequired("type") + create.Flags().String("title", "", "Event title.") + sp.AddCommand(create) + root.AddCommand(sp) + return root +} + +func TestBuild_CapturesLeafWithFlagsAndRequired(t *testing.T) { + d := Build(testTree()) + var got *Command + for i := range d.Commands { + if d.Commands[i].Path == "status-page change-create" { + got = &d.Commands[i] + } + } + if got == nil { + t.Fatalf("missing status-page change-create; got %+v", d.Commands) + } + if got.Group != "status-page" { + t.Errorf("group = %q", got.Group) + } + // Use must be captured verbatim — it carries the positional placeholder that + // Path strips, and is the only runtime signal of cligen's positional fold. + if got.Use != "change-create " { + t.Errorf("Use = %q, want %q", got.Use, "change-create ") + } + var typeFlag *Flag + for i := range got.Flags { + if got.Flags[i].Name == "type" { + typeFlag = &got.Flags[i] + } + } + if typeFlag == nil || !typeFlag.Required { + t.Errorf("--type should be present and required: %+v", got.Flags) + } +} diff --git a/internal/skilldoc/fences_test.go b/internal/skilldoc/fences_test.go new file mode 100644 index 0000000..c799b4e --- /dev/null +++ b/internal/skilldoc/fences_test.go @@ -0,0 +1,51 @@ +package skilldoc + +import ( + "strings" + "testing" +) + +func TestCheckFences_DetectsStale(t *testing.T) { + d := generatorDump() + fresh := GenerateFence(d, "status-page") + + freshDoc := "# Status page\n\nintro\n\n" + fresh + "\n\nfooter\n" + staleDoc := "# Status page\n\n" + + FenceStart("status-page") + "\n\n### change-create\nWRONG STALE CONTENT\n\n" + + FenceEnd("status-page") + "\n" + noFenceDoc := "# Status page\n\nJust prose, no generated fence at all.\n" + + docs := []Doc{ + {Path: "fresh", Body: freshDoc}, + {Path: "stale", Body: staleDoc}, + {Path: "none", Body: noFenceDoc}, + } + issues := CheckFences(d, docs) + + byDoc := map[string][]Issue{} + for _, is := range issues { + byDoc[is.Doc] = append(byDoc[is.Doc], is) + } + if n := len(byDoc["fresh"]); n != 0 { + t.Errorf("fresh doc: want 0 issues, got %d: %+v", n, byDoc["fresh"]) + } + if n := len(byDoc["stale"]); n != 1 || byDoc["stale"][0].Kind != "stale-fence" { + t.Errorf("stale doc: want 1 stale-fence, got %+v", byDoc["stale"]) + } + if n := len(byDoc["none"]); n != 0 { + t.Errorf("no-fence doc: want 0 issues, got %d: %+v", n, byDoc["none"]) + } +} + +func TestCheckFences_MalformedFenceIsStale(t *testing.T) { + d := generatorDump() + // Start marker without a matching end marker. + doc := Doc{Path: "broken", Body: FenceStart("status-page") + "\n### change-create\n(no end marker)\n"} + issues := CheckFences(d, []Doc{doc}) + if len(issues) != 1 || issues[0].Kind != "stale-fence" { + t.Errorf("malformed fence should be stale-fence: %+v", issues) + } + if !strings.Contains(issues[0].Detail, "status-page") { + t.Errorf("detail should name the group: %+v", issues) + } +} diff --git a/internal/skilldoc/generate.go b/internal/skilldoc/generate.go new file mode 100644 index 0000000..dfeb6a6 --- /dev/null +++ b/internal/skilldoc/generate.go @@ -0,0 +1,297 @@ +package skilldoc + +import ( + "fmt" + "regexp" + "sort" + "strings" +) + +// Fence markers. The generator owns ONLY the text between these; intent→verb +// routing, worked examples, and gotchas are hand-written outside the fence. +const ( + fenceStartFmt = "" + fenceEndFmt = "" +) + +// GenerateFence renders the factual fenced block for one command group: a +// section per leaf verb with its short description and a flag table (name, +// type, required, usage + enum), plus a body-only (--data) note when the +// command has nested JSON-only fields. Required-ness and enums are sourced from +// the authoritative "Request fields:" text in each command's Long; the flag +// list falls back to the dump's Flags when no such block exists (read-only +// verbs). Output is deterministic. +func GenerateFence(d Dump, group string) string { + cmds := groupCommands(d, group) + + var b strings.Builder + fmt.Fprintf(&b, fenceStartFmt+"\n\n", group) + for i, c := range cmds { + if i > 0 { + b.WriteString("\n") + } + writeCommand(&b, c) + } + fmt.Fprintf(&b, "\n"+fenceEndFmt, group) + return b.String() +} + +// FenceStart / FenceEnd return the literal markers for a group, used by the +// freshness check to locate fences in docs. +func FenceStart(group string) string { return fmt.Sprintf(fenceStartFmt, group) } +func FenceEnd(group string) string { return fmt.Sprintf(fenceEndFmt, group) } + +func groupCommands(d Dump, group string) []Command { + var cmds []Command + for _, c := range d.Commands { + if c.Group == group { + cmds = append(cmds, c) + } + } + sort.Slice(cmds, func(i, j int) bool { return cmds[i].Path < cmds[j].Path }) + return cmds +} + +func writeCommand(b *strings.Builder, c Command) { + verb := verbOf(c.Path) + positionals := positionalsOf(c.Use) + + // Heading carries the positional signature verbatim from Use (authoritative), + // e.g. "change-active-list ", so the reader sees the exact argument + // order the binary requires. + if len(positionals) > 0 { + fmt.Fprintf(b, "### %s %s\n", verb, strings.Join(positionals, " ")) + } else { + fmt.Fprintf(b, "### %s\n", verb) + } + if c.Short != "" { + fmt.Fprintf(b, "%s\n", c.Short) + } + + // Flag rows as bullets (not a table) so enum pipes render literally without + // markdown-cell escaping. A field cligen folded into a required positional is + // rendered as a positional row, NOT a --flag — passing it as a flag without + // the positional fails the binary's Args check. + fields := parseRequestFields(c.Long) + folded := foldedFlagNames(positionals) + for _, r := range flagRows(c, fields.flags) { + if folded[r.name] { + fmt.Fprintf(b, "- `<%s>` (positional, required) %s%s\n", r.name, r.typ, notesSuffix(r.notes)) + continue + } + fmt.Fprintf(b, "- `--%s` %s%s%s\n", r.name, r.typ, reqSuffix(r.required), notesSuffix(r.notes)) + } + if len(fields.bodyOnly) > 0 { + fmt.Fprintf(b, "- body-only (`--data`): %s\n", strings.Join(fields.bodyOnly, "; ")) + } +} + +// positionalsOf returns the placeholder tokens after the leaf verb in a Use +// string, e.g. "change-active-list " -> [""] and +// "merge [...]" -> ["", "[...]"]. A Use with +// no positional ("list") returns nil. +func positionalsOf(use string) []string { + f := strings.Fields(use) + if len(f) <= 1 { + return nil + } + return f[1:] +} + +// foldedFlagNames returns the EXACT flag names that cligen has folded into a +// REQUIRED positional argument (a "" placeholder). The binary still +// registers a same-named flag, but supplying it as a flag fails the positional +// Args check, so these names render as positionals (in writeCommand) and are +// rejected as flags (in the validator). +// +// A scalar positional "" folds the exact flag "page-id". An array +// positional appears as " [...]" — cligen singularizes the +// "*-ids" wire name for the placeholder — so its folded flag is the plural wire, +// recovered as inner+"s": "" folds "incident-ids". Matching the +// exact name (not a trailing-"s"-stripped key) keeps an unrelated plural flag +// like "--types" from colliding with a scalar "" positional. +func foldedFlagNames(positionals []string) map[string]bool { + out := map[string]bool{} + for i, p := range positionals { + if !strings.HasPrefix(p, "<") { + continue // optional [<...>] or variadic [...] — flag (if any) stays + } + inner := placeholderInner(p) + if i+1 < len(positionals) && strings.HasPrefix(positionals[i+1], "[") { + out[inner+"s"] = true // array positional: the plural "*-ids" wire flag + } else { + out[inner] = true // scalar positional: the exact flag name + } + } + return out +} + +// placeholderInner strips the surrounding <> (and a trailing "...") from a +// REQUIRED Use placeholder, e.g. "" -> "page-id". Only "<...>" tokens +// reach this helper (foldedFlagNames guards on the "<" prefix), so optional +// "[<...>]" brackets never appear here. +func placeholderInner(p string) string { + p = strings.TrimPrefix(p, "<") + p = strings.TrimSuffix(p, "...") + p = strings.TrimSuffix(p, ">") + return p +} + +// verbOf returns the last space-separated segment of a command path (the leaf +// verb), e.g. "status-page change-create" -> "change-create". +func verbOf(path string) string { + parts := strings.Fields(path) + if len(parts) == 0 { + return path + } + return parts[len(parts)-1] +} + +// flagRow is one rendered flag row. +type flagRow struct { + name string + typ string + required bool + notes string +} + +// flagRows merges the dump's flag list with the richer Request-fields parse: +// the dump provides the authoritative flag set + type; the parse provides +// required-ness, usage, and enum. Flags appear in the dump's declared order, +// minus globals (data is shown as a body channel, not a flag row). +func flagRows(c Command, parsed map[string]parsedFlag) []flagRow { + var rows []flagRow + for _, f := range c.Flags { + if globalFlags[f.Name] { + continue + } + row := flagRow{name: f.Name, typ: f.Type} + if pf, ok := parsed[f.Name]; ok { + row.required = pf.required + row.notes = withEnum(pf.usage, pf.enum) + } + rows = append(rows, row) + } + return rows +} + +// reqSuffix renders the required marker appended to a flag's type token. +func reqSuffix(required bool) string { + if required { + return " (required)" + } + return "" +} + +// notesSuffix renders the usage/enum description after an em-dash, or empty. +func notesSuffix(notes string) string { + notes = strings.ReplaceAll(notes, "\n", " ") + notes = strings.TrimSpace(notes) + if notes == "" { + return "" + } + return " — " + notes +} + +// withEnum appends an enum hint to a usage string. +func withEnum(usage string, enum []string) string { + if len(enum) == 0 { + return usage + } + hint := "enum: " + strings.Join(enum, " | ") + if usage == "" { + return hint + } + return usage + " · " + hint +} + +// --- Long "Request fields:" parser ----------------------------------------- + +type parsedFlag struct { + required bool + enum []string + usage string +} + +type requestFields struct { + flags map[string]parsedFlag + bodyOnly []string // nested --data-only top-level field summaries +} + +var ( + flagLineRe = regexp.MustCompile(`^\s{2}--([a-z0-9-]+)\s+\S+\s*(.*)$`) + bodyLineRe = regexp.MustCompile(`^\s{2}([a-z0-9_]+)\s+\(([^,)]*)[^)]*\)\s*(.*)$`) + enumRe = regexp.MustCompile(`\[([^\]]+)\]`) + requiredTag = "(required)" +) + +// parseRequestFields extracts the per-flag required/enum/usage and the +// body-only (--data) field summaries from a command's Long "Request fields:" +// block. Returns empty maps when the block is absent (read-only verbs). +func parseRequestFields(long string) requestFields { + rf := requestFields{flags: map[string]parsedFlag{}} + lines := strings.Split(long, "\n") + in := false + for _, line := range lines { + trimmed := strings.TrimSpace(line) + if trimmed == "Request fields:" { + in = true + continue + } + if !in { + continue + } + // The block ends at a blank line or the Response fields header. + if trimmed == "" || strings.HasPrefix(trimmed, "Response fields") { + break + } + if m := flagLineRe.FindStringSubmatch(line); m != nil { + name, tail := m[1], m[2] + rf.flags[name] = parsedFlag{ + required: strings.Contains(tail, requiredTag), + enum: parseEnum(tail), + usage: cleanUsage(tail), + } + continue + } + // A top-level body-only field (no -- prefix, 2-space indent). Sub-fields + // are indented deeper and skipped here. The type capture stops at the + // first comma so "(array, via --data)" yields just "array". + if m := bodyLineRe.FindStringSubmatch(line); m != nil { + name, typ, tail := m[1], strings.TrimSpace(m[2]), m[3] + summary := name + " (" + typ + ")" + if strings.Contains(tail, requiredTag) { + summary += " (required)" + } + rf.bodyOnly = append(rf.bodyOnly, summary) + } + } + return rf +} + +// parseEnum pulls the enum members out of a trailing "[a, b, c]" marker. +func parseEnum(tail string) []string { + m := enumRe.FindStringSubmatch(tail) + if m == nil { + return nil + } + parts := strings.Split(m[1], ",") + out := make([]string, 0, len(parts)) + for _, p := range parts { + if v := strings.TrimSpace(p); v != "" { + out = append(out, v) + } + } + return out +} + +// cleanUsage strips the leading em-dash, the (required) tag, and the trailing +// enum bracket from a flag line's tail, leaving the human description. +func cleanUsage(tail string) string { + s := tail + s = enumRe.ReplaceAllString(s, "") + s = strings.ReplaceAll(s, requiredTag, "") + s = strings.TrimSpace(s) + s = strings.TrimPrefix(s, "—") + return strings.TrimSpace(s) +} diff --git a/internal/skilldoc/generate_test.go b/internal/skilldoc/generate_test.go new file mode 100644 index 0000000..e17db20 --- /dev/null +++ b/internal/skilldoc/generate_test.go @@ -0,0 +1,186 @@ +package skilldoc + +import ( + "strings" + "testing" +) + +// generatorDump mirrors the real cligen Long shape: a Request fields block with +// a required enum flag, a non-required flag, and a nested --data body field. +func generatorDump() Dump { + return Dump{Commands: []Command{ + { + Path: "status-page change-create", + Group: "status-page", + Short: "Create status page event", + Use: "change-create ", + Long: `Create status page event. + +Create a new incident or maintenance event on a status page. + +API: POST /status-page/change/create (statusPageChangeCreate) + +Request fields: + --description string — Event description (Markdown). Required by the validator. + --page-id int (required) — Status page ID. + --type string (required) — Event type. [incident, maintenance] + updates (array, via --data) (required) — Timeline updates. + - status (string) — Change status after this update. [investigating, identified, monitoring, resolved] + +Response fields ('data' envelope is unwrapped — these fields are at the top level): + - change_id (integer) (required) — Newly created event ID. +`, + Flags: []Flag{ + {Name: "description", Type: "string"}, + {Name: "page-id", Type: "int"}, + {Name: "type", Type: "string"}, + {Name: "data", Type: "string"}, + }, + }, + { + Path: "status-page change-active-list", + Group: "status-page", + Short: "List active status page events", + Use: "change-active-list ", + Long: `List active status page events. + +Request fields: + --page-id int (required) — Status page ID. + --type string (required) — Event type filter. [incident, maintenance] +`, + Flags: []Flag{{Name: "page-id", Type: "int"}, {Name: "type", Type: "string"}, {Name: "data", Type: "string"}}, + }, + // A different group must be excluded. + {Path: "incident detail", Group: "incident", Short: "x", Flags: []Flag{{Name: "data"}}}, + }} +} + +// foldedFlagNames must fold the EXACT flag a positional shadows. A scalar +// "" folds only "type" — an unrelated plural flag "--types" must survive. +// An array positional " [...]" folds the plural "*-ids" wire +// ("incident-ids"), since cligen singularizes the placeholder. Matching on a +// trailing-"s"-stripped key would wrongly collapse "types" onto "". +func TestFoldedFlagNames_ExactScalarAndArrayPlural(t *testing.T) { + scalar := foldedFlagNames([]string{""}) + if !scalar["type"] { + t.Errorf("scalar should fold flag `type`: %v", scalar) + } + if scalar["types"] { + t.Errorf("scalar must NOT fold unrelated plural `types`: %v", scalar) + } + array := foldedFlagNames([]string{"", "[...]"}) + if !array["incident-ids"] { + t.Errorf("array [...] should fold plural wire `incident-ids`: %v", array) + } + if array["incident-id"] { + t.Errorf("array positional should fold the plural wire only, not singular: %v", array) + } + if n := len(foldedFlagNames([]string{"[]"})); n != 0 { + t.Errorf("optional [] must fold nothing, got %v", n) + } +} + +func TestGenerateFence_StatusPage(t *testing.T) { + d := generatorDump() + out := GenerateFence(d, "status-page") + + // Fence markers, scoped to the group. + if !strings.Contains(out, "GENERATED:status-page START") { + t.Errorf("missing start marker:\n%s", out) + } + if !strings.Contains(out, "GENERATED:status-page END") { + t.Errorf("missing end marker:\n%s", out) + } + + // Each leaf verb of the group is listed; other groups are excluded. + if !strings.Contains(out, "### change-create") { + t.Errorf("missing change-create section:\n%s", out) + } + if !strings.Contains(out, "### change-active-list") { + t.Errorf("missing change-active-list section:\n%s", out) + } + if strings.Contains(out, "incident detail") { + t.Errorf("other-group command leaked into fence:\n%s", out) + } + + // change-create's --type is required and carries its enum. + if !strings.Contains(out, "--type") { + t.Errorf("missing --type flag:\n%s", out) + } + if !strings.Contains(out, "incident | maintenance") { + t.Errorf("missing --type enum incident | maintenance:\n%s", out) + } + + // Deterministic. + if out != GenerateFence(d, "status-page") { + t.Errorf("GenerateFence not deterministic") + } +} + +// TestGenerateFence_RequiredMarker checks required flags are visibly marked. +func TestGenerateFence_RequiredMarker(t *testing.T) { + out := GenerateFence(generatorDump(), "status-page") + // The change-create section must mark --type and --page-id required but not + // --description. + sec := sectionFor(out, "change-create") + if !strings.Contains(sec, "--type") || !markedRequired(sec, "--type") { + t.Errorf("--type should be marked required in section:\n%s", sec) + } + if markedRequired(sec, "--description") { + t.Errorf("--description should NOT be marked required:\n%s", sec) + } +} + +// TestGenerateFence_PositionalArg asserts that a field cligen folded into a +// positional argument (recorded in Use as "") is rendered as a +// positional — shown in the verb heading and as a `(positional, required)` row — +// and is NOT presented as a `--page-id` flag (passing the flag alone fails the +// binary's Args check). The non-folded --type flag must still render normally. +func TestGenerateFence_PositionalArg(t *testing.T) { + sec := sectionFor(GenerateFence(generatorDump(), "status-page"), "change-active-list") + if sec == "" { + t.Fatal("no change-active-list section") + } + // Heading carries the positional signature. + if !strings.Contains(sec, "### change-active-list ") { + t.Errorf("heading should show positional ``:\n%s", sec) + } + // page-id is documented as a positional, not as a --flag. + if !strings.Contains(sec, "`` (positional, required)") { + t.Errorf("page-id should render as a positional row:\n%s", sec) + } + if strings.Contains(sec, "--page-id") { + t.Errorf("folded positional must NOT appear as a --page-id flag row:\n%s", sec) + } + // A non-folded flag still renders as a flag. + if !strings.Contains(sec, "--type") { + t.Errorf("non-folded --type flag should remain:\n%s", sec) + } +} + +// sectionFor returns the slice of out from "### " to the next "### " or end. +func sectionFor(out, verb string) string { + start := strings.Index(out, "### "+verb) + if start < 0 { + return "" + } + rest := out[start+len("### "+verb):] + if next := strings.Index(rest, "\n### "); next >= 0 { + return out[start : start+len("### "+verb)+next] + } + return out[start:] +} + +// markedRequired reports whether the row for flag carries the generator's +// required marker. The marker is the literal "(required)" token emitted right +// after the type (not any "required" prose that may appear in a flag's usage +// text, e.g. "Required by the validator"). +func markedRequired(section, flag string) bool { + for _, line := range strings.Split(section, "\n") { + // Only inspect the flag's own bullet row (starts with "- ``"). + if strings.HasPrefix(strings.TrimSpace(line), "- `"+flag+"`") { + return strings.Contains(line, "(required)") + } + } + return false +} diff --git a/internal/skilldoc/harvest.go b/internal/skilldoc/harvest.go new file mode 100644 index 0000000..424f504 --- /dev/null +++ b/internal/skilldoc/harvest.go @@ -0,0 +1,175 @@ +package skilldoc + +import ( + "regexp" + "strings" +) + +// Example is one harvested CLI invocation from a markdown document. Tokens are +// the whitespace-separated arguments AFTER the `fduty`/`flashduty` binary word +// (so Tokens[0] is the command group). Line is the 1-based line where the +// invocation began. +type Example struct { + Line int + Tokens []string +} + +// binaryWords are the recognized CLI invocation prefixes. +var binaryWords = map[string]bool{"fduty": true, "flashduty": true} + +// placeholderRe matches an `ou_xxx`-style placeholder: a short lowercase prefix +// then one or more x's (e.g. ou_xxx, ch_xxx). +var placeholderRe = regexp.MustCompile(`^[a-z]{2,}_x+$`) + +// HasPlaceholder reports whether tok is a documentation placeholder rather than +// a literal argument: angle-bracket tokens (), shell vars ($VAR), the +// ellipsis (...), or `ou_xxx`-style stand-ins. The validator skips the value +// of any flag whose token is a placeholder. +func HasPlaceholder(tok string) bool { + switch { + case strings.ContainsAny(tok, "<>"): + return true + case strings.HasPrefix(tok, "$"): + return true + case tok == "...": + return true + case placeholderRe.MatchString(tok): + return true + default: + return false + } +} + +// HarvestExamples pulls every `fduty`/`flashduty` invocation out of markdown: +// fenced code blocks (```…```) and inline backtick spans alike. A candidate is +// any line whose first shell word is the binary; trailing-backslash +// continuations are joined into one example. Prose lines (no binary word) are +// ignored. +func HarvestExamples(md string) []Example { + var out []Example + lines := strings.Split(md, "\n") + inFence := false + + for i := 0; i < len(lines); i++ { + line := lines[i] + if isFenceMarker(line) { + inFence = !inFence + continue + } + + if inFence { + if cand, ok := fencedCandidate(line); ok { + joined, next := joinContinuations(cand, lines, i) + if ex, ok := parseInvocation(joined, i+1); ok { + out = append(out, ex) + } + i = next + } + continue + } + + // Outside fences, scan inline backtick spans on this line. + for _, span := range inlineSpans(line) { + if ex, ok := parseInvocation(span, i+1); ok { + out = append(out, ex) + } + } + } + return out +} + +// isFenceMarker reports whether a line opens or closes a ``` code fence. +func isFenceMarker(line string) bool { + return strings.HasPrefix(strings.TrimSpace(line), "```") +} + +// fencedCandidate returns the trimmed line if its first word is a binary word. +func fencedCandidate(line string) (string, bool) { + trimmed := strings.TrimSpace(line) + if firstWordIsBinary(trimmed) { + return trimmed, true + } + return "", false +} + +// joinContinuations folds trailing-backslash continuation lines (starting at +// index start) into a single logical command string. It returns the joined +// string and the index of the last consumed line. +func joinContinuations(first string, lines []string, start int) (string, int) { + parts := []string{strings.TrimSuffix(strings.TrimSpace(first), "\\")} + idx := start + for strings.HasSuffix(strings.TrimSpace(lines[idx]), "\\") && idx+1 < len(lines) { + idx++ + next := strings.TrimSpace(lines[idx]) + // A fence marker terminates the continuation run defensively. + if isFenceMarker(next) { + idx-- + break + } + parts = append(parts, strings.TrimSuffix(next, "\\")) + } + return strings.Join(parts, " "), idx +} + +// inlineSpans returns the contents of each `…` inline code span on a line. +func inlineSpans(line string) []string { + var spans []string + for { + open := strings.IndexByte(line, '`') + if open < 0 { + break + } + rest := line[open+1:] + close := strings.IndexByte(rest, '`') + if close < 0 { + break + } + spans = append(spans, rest[:close]) + line = rest[close+1:] + } + return spans +} + +// parseInvocation tokenizes a command string and, if it starts with a binary +// word, returns the post-binary tokens as an Example. A shell pipe or control +// operator ends the fduty invocation — tokens after it belong to another +// command (e.g. `| jq --argjson …`) and must not be validated as fduty flags. +func parseInvocation(cmd string, line int) (Example, bool) { + toks := strings.Fields(stripQuotes(cmd)) + if len(toks) == 0 || !binaryWords[toks[0]] { + return Example{}, false + } + rest := toks[1:] + for i, t := range rest { + if isShellOperator(t) { + rest = rest[:i] + break + } + } + return Example{Line: line, Tokens: rest}, true +} + +// isShellOperator reports whether a standalone token is a shell pipe, sequence, +// or redirection operator that terminates the current command. +func isShellOperator(t string) bool { + switch t { + case "|", "||", "&&", "&", ";", ">", ">>", "|&", "2>", "2>>": + return true + default: + return false + } +} + +// firstWordIsBinary reports whether the first whitespace word of s is a binary. +func firstWordIsBinary(s string) bool { + fields := strings.Fields(s) + return len(fields) > 0 && binaryWords[fields[0]] +} + +// stripQuotes removes ASCII double/single quote characters. For the PoC this is +// enough to keep `--title "x"` from splitting on the embedded space boundary +// incorrectly — flag *names* (the only thing the validator inspects) never +// contain spaces, so dropping quotes around values is safe. +func stripQuotes(s string) string { + return strings.NewReplacer(`"`, "", `'`, "").Replace(s) +} diff --git a/internal/skilldoc/harvest_test.go b/internal/skilldoc/harvest_test.go new file mode 100644 index 0000000..3d7cd34 --- /dev/null +++ b/internal/skilldoc/harvest_test.go @@ -0,0 +1,60 @@ +package skilldoc + +import "testing" + +func TestHarvest_JoinsContinuationsAndSkipsProse(t *testing.T) { + md := "text\n```bash\nfduty status-page change-create --type incident \\\n --title \"x\"\n```\nmore\n`fduty incident detail `\n" + ex := HarvestExamples(md) + if len(ex) != 2 { + t.Fatalf("want 2 examples, got %d: %+v", len(ex), ex) + } + if ex[0].Tokens[0] != "status-page" || ex[0].Tokens[1] != "change-create" { + t.Errorf("tok: %+v", ex[0].Tokens) + } + if !HasPlaceholder("") || HasPlaceholder("--type") { + t.Errorf("placeholder detection wrong") + } + // The joined example must carry the continuation's flags too, else the + // validator would never see --title on a multi-line example. + if !containsTok(ex[0].Tokens, "--title") || !containsTok(ex[0].Tokens, "--type") { + t.Errorf("continuation flags lost: %+v", ex[0].Tokens) + } +} + +func TestHarvest_StopsAtShellPipe(t *testing.T) { + // A piped jq carries its own flags (--argjson) which must NOT be attributed + // to the fduty command. Only the tokens before the pipe are the invocation. + md := "```bash\nfduty member list --json | jq --argjson ids '[1,2]' '[.[]|select(1)]'\n```\n" + ex := HarvestExamples(md) + if len(ex) != 1 { + t.Fatalf("want 1 example, got %d: %+v", len(ex), ex) + } + if !containsTok(ex[0].Tokens, "member") || !containsTok(ex[0].Tokens, "list") || !containsTok(ex[0].Tokens, "--json") { + t.Errorf("invocation tokens lost: %+v", ex[0].Tokens) + } + if containsTok(ex[0].Tokens, "--argjson") || containsTok(ex[0].Tokens, "jq") { + t.Errorf("tokens after the pipe must be dropped: %+v", ex[0].Tokens) + } +} + +func TestHasPlaceholder_Variants(t *testing.T) { + for _, tok := range []string{"", "$VAR", "...", "ou_xxx", "inc_xxx"} { + if !HasPlaceholder(tok) { + t.Errorf("expected placeholder: %q", tok) + } + } + for _, tok := range []string{"--type", "incident", "5750613685214", "change-create"} { + if HasPlaceholder(tok) { + t.Errorf("expected literal, not placeholder: %q", tok) + } + } +} + +func containsTok(toks []string, want string) bool { + for _, t := range toks { + if t == want { + return true + } + } + return false +} diff --git a/internal/skilldoc/model.go b/internal/skilldoc/model.go new file mode 100644 index 0000000..4c2aaf3 --- /dev/null +++ b/internal/skilldoc/model.go @@ -0,0 +1,41 @@ +// Package skilldoc derives a structured description of the flashduty CLI's +// command tree (the "dump") and uses it to generate and validate the +// command-cards that document the CLI for an LLM operator. +// +// The dump is the single source of truth: it is built in-process from the +// live cobra tree (see Build), so it can never drift from the binary it +// describes. The generator turns a dump into per-domain factual fences; the +// validator checks every documented `fduty …` example against the same dump. +package skilldoc + +// Dump is the structured snapshot of the CLI's command tree. It is the JSON +// contract shared between the dump oracle, the validator, and the generator. +type Dump struct { + Commands []Command `json:"commands"` +} + +// Command is one runnable leaf of the CLI tree. +type Command struct { + Path string `json:"path"` // space-joined name chain below root, e.g. "status-page change-create" + Group string `json:"group"` // first path segment, e.g. "status-page" + Short string `json:"short"` + // Use is cobra's raw Use string, e.g. "change-create ". cligen folds + // a required *_id field into a positional argument and records it here as a + // ; that field is then supplied positionally, NOT via its + // same-named --flag (passing the flag alone fails the Args check). Capturing + // Use is what lets the generator render the correct positional invocation — + // the bare Path alone (which strips the placeholder) cannot. + Use string `json:"use"` + Long string `json:"long"` // cligen's Request/Response field text (authoritative for enums + nested --data) + Example string `json:"example"` + Flags []Flag `json:"flags"` +} + +// Flag is one flag of a command, as exposed by pflag. +type Flag struct { + Name string `json:"name"` + Type string `json:"type"` + Default string `json:"default"` + Usage string `json:"usage"` + Required bool `json:"required"` +} diff --git a/internal/skilldoc/validate.go b/internal/skilldoc/validate.go new file mode 100644 index 0000000..41536ec --- /dev/null +++ b/internal/skilldoc/validate.go @@ -0,0 +1,245 @@ +package skilldoc + +import ( + "sort" + "strings" +) + +// Doc is a documentation file fed to the validator: its display Path (for +// issue reporting) and raw markdown Body. +type Doc struct { + Path string + Body string +} + +// Issue is one validation finding against the command oracle. +type Issue struct { + Doc string + Line int + Kind string // "unknown-command" | "unknown-flag" | "positional-as-flag" | "stale-fence" + Detail string +} + +// globalFlags are always-valid persistent flags that any command accepts; the +// validator never flags them as unknown. Kept in one place to stay DRY. +var globalFlags = map[string]bool{ + "output-format": true, + "json": true, + "no-trunc": true, + "app-key": true, + "base-url": true, + "data": true, +} + +// Validate checks every harvested `fduty …` example in docs against the dump: +// an example whose leading words resolve to no command path yields an +// unknown-command issue; an example flag absent from its command's flag set +// (and not a global flag) yields an unknown-flag issue. Placeholder tokens are +// skipped so documentation stand-ins (, $VAR) never trip the validator. +func Validate(d Dump, docs []Doc) []Issue { + idx := indexDump(d) + var issues []Issue + for _, doc := range docs { + for _, ex := range HarvestExamples(doc.Body) { + issues = append(issues, validateExample(idx, doc.Path, ex)...) + } + } + return issues +} + +// CheckFences asserts every GENERATED: fence embedded in docs matches a +// fresh render from the dump. A fence whose inner content has drifted, or a +// start marker with no matching end marker, yields a stale-fence issue. Docs +// with no generated fence for a group are silently fine. +func CheckFences(d Dump, docs []Doc) []Issue { + var issues []Issue + for _, group := range groups(d) { + fresh := GenerateFence(d, group) + start, end := FenceStart(group), FenceEnd(group) + for _, doc := range docs { + si := strings.Index(doc.Body, start) + if si < 0 { + continue // no fence for this group in this doc + } + ei := strings.Index(doc.Body[si:], end) + if ei < 0 { + issues = append(issues, Issue{ + Doc: doc.Path, + Line: lineOf(doc.Body, si), + Kind: "stale-fence", + Detail: "unterminated GENERATED:" + group + " fence", + }) + continue + } + block := doc.Body[si : si+ei+len(end)] + if block != fresh { + issues = append(issues, Issue{ + Doc: doc.Path, + Line: lineOf(doc.Body, si), + Kind: "stale-fence", + Detail: "GENERATED:" + group + " fence is out of date — run `make gen-cards`", + }) + } + } + } + return issues +} + +// groups returns the sorted, de-duplicated set of command groups in the dump. +func groups(d Dump) []string { + seen := map[string]bool{} + var out []string + for _, c := range d.Commands { + if c.Group != "" && !seen[c.Group] { + seen[c.Group] = true + out = append(out, c.Group) + } + } + sort.Strings(out) + return out +} + +// lineOf returns the 1-based line number of byte offset off within body. +func lineOf(body string, off int) int { + return strings.Count(body[:off], "\n") + 1 +} + +// commandIndex maps a command path to its set of declared flag names and to the +// set of flags cligen folded into required positionals, and carries the sorted +// list of paths for longest-prefix resolution. +type commandIndex struct { + flags map[string]map[string]bool + folded map[string]map[string]bool + paths []string +} + +func indexDump(d Dump) commandIndex { + idx := commandIndex{ + flags: make(map[string]map[string]bool), + folded: make(map[string]map[string]bool), + } + for _, c := range d.Commands { + set := make(map[string]bool, len(c.Flags)) + for _, f := range c.Flags { + set[f.Name] = true + } + idx.flags[c.Path] = set + idx.folded[c.Path] = foldedFlagNames(positionalsOf(c.Use)) + idx.paths = append(idx.paths, c.Path) + } + // Longest paths first so resolveCommand prefers the most specific match. + sort.Slice(idx.paths, func(i, j int) bool { + return len(idx.paths[i]) > len(idx.paths[j]) + }) + return idx +} + +func validateExample(idx commandIndex, docPath string, ex Example) []Issue { + path, flagSet, ok := resolveCommand(idx, ex.Tokens) + if !ok { + // An unresolved command that is empty (a bare `fduty` prose mention) or + // templated (a placeholder in the command-path position, e.g. + // `fduty `) is a documentation reference, not a runnable + // example — skip it, mirroring the placeholder tolerance applied to flag + // values below. A non-empty, non-placeholder path that still doesn't + // resolve is a genuine wrong command name (e.g. `statuspage`) and is + // reported. + words := leadingWords(ex.Tokens) + if len(words) == 0 || anyPlaceholder(words) { + return nil + } + return []Issue{{ + Doc: docPath, + Line: ex.Line, + Kind: "unknown-command", + Detail: strings.Join(words, " "), + }} + } + + folded := idx.folded[path] + var issues []Issue + for _, tok := range ex.Tokens { + name, isFlag := flagName(tok) + if !isFlag || HasPlaceholder(name) { + continue + } + // cligen folded this field into a required positional: the flag is still + // registered (so it is in flagSet) but passing it as a flag fails the + // binary's Args check. Catch it before the flagSet pass would wave it + // through — this is the exact misuse only a live run surfaced before. + if folded[name] { + issues = append(issues, Issue{ + Doc: docPath, + Line: ex.Line, + Kind: "positional-as-flag", + Detail: "--" + name + " is folded into a required positional of `" + path + "` — pass it as a bare argument, not a flag", + }) + continue + } + if globalFlags[name] || flagSet[name] { + continue + } + issues = append(issues, Issue{ + Doc: docPath, + Line: ex.Line, + Kind: "unknown-flag", + Detail: "--" + name + " not a flag of `" + path + "`", + }) + } + return issues +} + +// resolveCommand finds the longest dump command path that is a prefix of the +// example's leading non-flag words. Returns the path, its flag set, and whether +// a match was found. +func resolveCommand(idx commandIndex, tokens []string) (string, map[string]bool, bool) { + words := leadingWords(tokens) + candidate := strings.Join(words, " ") + for _, p := range idx.paths { + if candidate == p || strings.HasPrefix(candidate+" ", p+" ") { + return p, idx.flags[p], true + } + } + return "", nil, false +} + +// leadingWords returns the run of non-flag tokens at the start of an example +// (the command path words, before any --flag). +func leadingWords(tokens []string) []string { + var words []string + for _, t := range tokens { + if strings.HasPrefix(t, "-") { + break + } + words = append(words, t) + } + return words +} + +// anyPlaceholder reports whether any of the command-path words is a +// documentation placeholder (e.g. ), meaning the example is a template +// rather than a concrete invocation. +func anyPlaceholder(words []string) bool { + for _, w := range words { + if HasPlaceholder(w) { + return true + } + } + return false +} + +// flagName extracts the bare flag name from a token like "--type" or +// "--type=x", returning ("type", true). Non-flag tokens return ("", false). +func flagName(tok string) (string, bool) { + if !strings.HasPrefix(tok, "--") { + return "", false + } + name := strings.TrimPrefix(tok, "--") + if i := strings.IndexByte(name, '='); i >= 0 { + name = name[:i] + } + if name == "" { + return "", false + } + return name, true +} diff --git a/internal/skilldoc/validate_test.go b/internal/skilldoc/validate_test.go new file mode 100644 index 0000000..4e4a5ed --- /dev/null +++ b/internal/skilldoc/validate_test.go @@ -0,0 +1,123 @@ +package skilldoc + +import "testing" + +// validatorDump is a minimal dump fixture: one status-page leaf with flags +// {type, title} plus the data flag. +func validatorDump() Dump { + return Dump{Commands: []Command{ + { + Path: "status-page change-create", + Group: "status-page", + Flags: []Flag{ + {Name: "type"}, {Name: "title"}, {Name: "data"}, + }, + }, + }} +} + +func TestValidate_UnknownCommandAndFlag(t *testing.T) { + d := validatorDump() + docs := []Doc{ + {Path: "A", Body: "```bash\nfduty status-page change-create --type incident --title x\n```\n"}, + {Path: "B", Body: "```bash\nfduty status-page change-create --bogus 1\n```\n"}, + {Path: "C", Body: "```bash\nfduty status-page bogus-verb\n```\n"}, + {Path: "D", Body: "```bash\nfduty status-page change-create --title \n```\n"}, + } + issues := Validate(d, docs) + + byDoc := map[string][]Issue{} + for _, is := range issues { + byDoc[is.Doc] = append(byDoc[is.Doc], is) + } + + if n := len(byDoc["A"]); n != 0 { + t.Errorf("doc A: want 0 issues, got %d: %+v", n, byDoc["A"]) + } + if n := len(byDoc["B"]); n != 1 || byDoc["B"][0].Kind != "unknown-flag" { + t.Errorf("doc B: want 1 unknown-flag, got %+v", byDoc["B"]) + } + if n := len(byDoc["C"]); n != 1 || byDoc["C"][0].Kind != "unknown-command" { + t.Errorf("doc C: want 1 unknown-command, got %+v", byDoc["C"]) + } + if n := len(byDoc["D"]); n != 0 { + t.Errorf("doc D: want 0 issues (placeholder value, known flag), got %+v", byDoc["D"]) + } +} + +// A field cligen folded into a required positional is still a registered flag, +// but passing it as `--flag` fails the binary's Args check. The validator must +// catch this misuse (kind "positional-as-flag") — the exact error that only a +// live run surfaced before Use was threaded into the oracle. Passing the field +// positionally must stay clean, and the same flag name on a command where it is +// NOT folded (two required ids) must remain valid. +func TestValidate_FoldedPositionalAsFlag(t *testing.T) { + d := Dump{Commands: []Command{ + { // single required id → cligen folds page-id into a positional + Path: "status-page change-active-list", + Group: "status-page", + Use: "change-active-list ", + Flags: []Flag{{Name: "page-id"}, {Name: "type"}, {Name: "data"}}, + }, + { // two required ids → no fold; page-id stays a real flag + Path: "status-page change-timeline-create", + Group: "status-page", + Use: "change-timeline-create", + Flags: []Flag{{Name: "page-id"}, {Name: "change-id"}, {Name: "data"}}, + }, + }} + docs := []Doc{ + {Path: "bad", Body: "```bash\nfduty status-page change-active-list --page-id 5\n```\n"}, + {Path: "good", Body: "```bash\nfduty status-page change-active-list 5 --type incident\n```\n"}, + {Path: "twoid", Body: "```bash\nfduty status-page change-timeline-create --page-id 5 --change-id 9\n```\n"}, + } + byDoc := map[string][]Issue{} + for _, is := range Validate(d, docs) { + byDoc[is.Doc] = append(byDoc[is.Doc], is) + } + if n := len(byDoc["bad"]); n != 1 || byDoc["bad"][0].Kind != "positional-as-flag" { + t.Errorf("bad: want 1 positional-as-flag, got %+v", byDoc["bad"]) + } + if n := len(byDoc["good"]); n != 0 { + t.Errorf("good: positional usage want 0 issues, got %+v", byDoc["good"]) + } + if n := len(byDoc["twoid"]); n != 0 { + t.Errorf("twoid: --page-id on non-folding command want 0 issues, got %+v", byDoc["twoid"]) + } +} + +func TestValidate_GlobalFlagsAllowed(t *testing.T) { + d := validatorDump() + docs := []Doc{ + {Path: "G", Body: "```bash\nfduty status-page change-create --type incident --title x --output-format toon\n```\n"}, + } + if issues := Validate(d, docs); len(issues) != 0 { + t.Errorf("global flag --output-format should be allowed: %+v", issues) + } +} + +// Prose mentions of the binary — a bare `fduty` word or a templated +// `fduty ` — are documentation references, not runnable examples, +// and must not be flagged. A genuine wrong command name (no placeholder) must +// STILL be caught, since catching command-name drift is the validator's job. +func TestValidate_SkipsBareAndTemplatedMentions(t *testing.T) { + d := validatorDump() + docs := []Doc{ + {Path: "bare", Body: "The `fduty` CLI is the interface. Each `fduty` subprocess gets auth.\n"}, + {Path: "tmpl", Body: "Derive it then run `fduty --help`.\n"}, + {Path: "drift", Body: "```bash\nfduty statuspage list\n```\n"}, + } + byDoc := map[string][]Issue{} + for _, is := range Validate(d, docs) { + byDoc[is.Doc] = append(byDoc[is.Doc], is) + } + if n := len(byDoc["bare"]); n != 0 { + t.Errorf("bare `fduty` prose mention: want 0 issues, got %+v", byDoc["bare"]) + } + if n := len(byDoc["tmpl"]); n != 0 { + t.Errorf("templated `fduty `: want 0 issues, got %+v", byDoc["tmpl"]) + } + if n := len(byDoc["drift"]); n != 1 || byDoc["drift"][0].Kind != "unknown-command" { + t.Errorf("drift `statuspage`: want 1 unknown-command, got %+v", byDoc["drift"]) + } +} diff --git a/skills/flashduty-admin/SKILL.md b/skills/flashduty-admin/SKILL.md deleted file mode 100644 index c1d0b8f..0000000 --- a/skills/flashduty-admin/SKILL.md +++ /dev/null @@ -1,223 +0,0 @@ ---- -name: flashduty-admin -version: 1.0.0 -description: "Flashduty administration: manage teams (list, get, create, update, delete), list members, and search audit logs for compliance and investigation. Commands: team list/get/create/update/delete, member list, audit search. Use when managing team structure, looking up person IDs or team IDs for other commands, finding contact information, searching who performed specific actions, or reviewing audit trails for compliance." -metadata: - requires: - bins: ["flashduty"] - cliHelp: "flashduty member --help" ---- - -# flashduty-admin - -**CRITICAL** -- Before using this skill, read [`../flashduty-shared/SKILL.md`](../flashduty-shared/SKILL.md) for authentication, the 3-layer noise reduction model, global flags, and safety rules. - -## Overview - -Manage teams, look up members, and search audit logs for compliance and investigation. - -## Commands - -### team list - -List teams with their members. - -```bash -flashduty team list [flags] -``` - -| Flag | Type | Default | Description | -|------|------|---------|-------------| -| `--name` | string | | Search by team name substring | -| `--page` | int | 1 | Page number | -| `--limit` | int | 20 | Page size, max 100 | -| `--orderby` | string | | Sort field: `created_at`, `updated_at`, `team_name` | -| `--asc` | bool | false | Sort in ascending order | -| `--person-id` | int | 0 | Filter teams by member ID | - -Output columns: ID, NAME, MEMBERS. - -### team get - -Get detailed information about a specific team. Specify the team by exactly one of `--id`, `--name`, or `--ref-id`. - -```bash -flashduty team get --id -flashduty team get --name "SRE Team" -flashduty team get --ref-id "hr-dept-42" -``` - -| Flag | Type | Description | -|------|------|-------------| -| `--id` | int | Team ID | -| `--name` | string | Team name (exact match) | -| `--ref-id` | string | External reference ID | - -Output fields: ID, Name, Description, Status, Ref ID, Members, Created, Updated, Created By, Updated By. - -### team create - -Create a new team. The `--name` flag is required and must be unique (1-39 characters). - -```bash -flashduty team create --name "SRE Team" [flags] -``` - -| Flag | Type | Description | -|------|------|-------------| -| `--name` | string | **Required.** Team name (1-39 characters) | -| `--description` | string | Team description (max 500 characters) | -| `--person-ids` | string | Comma-separated member person IDs | -| `--emails` | string | Comma-separated email addresses to invite | -| `--ref-id` | string | External reference ID for HR system integration | - -### team update - -Update an existing team. The `--id` flag is required. **WARNING:** `--person-ids` replaces the entire member list. Use `team get` to see current members before updating. - -```bash -flashduty team update --id [flags] -``` - -| Flag | Type | Description | -|------|------|-------------| -| `--id` | int | **Required.** Team ID | -| `--name` | string | New team name (1-39 characters) | -| `--description` | string | New description (max 500 characters) | -| `--person-ids` | string | Comma-separated member person IDs (replaces entire member list) | -| `--emails` | string | Comma-separated email addresses to invite | -| `--ref-id` | string | External reference ID | - -### team delete - -Permanently delete a team. Specify the team by exactly one of `--id`, `--name`, or `--ref-id`. This action is **irreversible**. You will be prompted for confirmation unless `--force` is set. - -```bash -flashduty team delete --id -flashduty team delete --name "Old Team" --force -``` - -| Flag | Type | Description | -|------|------|-------------| -| `--id` | int | Team ID | -| `--name` | string | Team name | -| `--ref-id` | string | External reference ID | -| `--force` | bool | Skip confirmation prompt | - -### member list - -List organization members with contact details and status. - -```bash -flashduty member list [flags] -``` - -| Flag | Type | Default | Description | -|------|------|---------|-------------| -| `--name` | string | | Search by name | -| `--email` | string | | Search by email | -| `--page` | int | 1 | Page number | - -Output columns: ID, NAME, EMAIL, STATUS, TIMEZONE. - -### audit search - -Search the audit log for system mutations. Uses cursor-based pagination internally. - -```bash -flashduty audit search [flags] -``` - -| Flag | Type | Default | Description | -|------|------|---------|-------------| -| `--since` | string | `7d` | Start time (relative like `24h`, `30d` or absolute) | -| `--until` | string | `now` | End time | -| `--person` | int | 0 | Filter by person ID | -| `--operation` | string | | Filter by operation type (comma-separated) | -| `--limit` | int | 20 | Max results | -| `--page` | int | 1 | Page number | - -Output columns: TIMESTAMP, PERSON, OPERATION, TARGET, DETAILS. - -## Workflows - -### Find a Person's ID for Other Commands - -Member IDs are required as inputs across many CLI commands (incident reassign, oncall who, insight responder, audit filters). Use member list to look them up. - -```bash -# Search by name -flashduty member list --name "John" - -# Search by email -flashduty member list --email "john@company.com" - -# Use the resulting ID in other commands -flashduty incident reassign --assignee -flashduty audit search --person --since 7d -``` - -### Audit Investigation - -```bash -# What did a specific person do in the past week? -flashduty audit search --person --since 7d - -# What operations of a specific type occurred in the past day? -flashduty audit search --operation "incident.close" --since 24h - -# Full audit trail for compliance review -flashduty audit search --since 30d --limit 100 -``` - -### Team Structure Review - -```bash -# List all teams and their members -flashduty team list - -# Search for a specific team by name -flashduty team list --name "Platform" - -# Get full detail for a specific team -flashduty team get --id 123 - -# Look up a team by external reference ID -flashduty team get --ref-id "hr-dept-42" -``` - -### Team Lifecycle Management - -```bash -# Create a new team with initial members -flashduty team create --name "SRE Team" --description "Site Reliability" --person-ids 1,2,3 - -# Create a team and invite members by email -flashduty team create --name "Backend Team" --emails alice@example.com,bob@example.com - -# Rename a team -flashduty team update --id 123 --name "Platform SRE" - -# Replace the entire member list (check current members first!) -flashduty team get --id 123 -flashduty team update --id 123 --person-ids 1,2,3,4,5 - -# Delete a team (prompts for confirmation) -flashduty team delete --id 123 - -# Delete without confirmation (for scripting) -flashduty team delete --id 123 --force -``` - -## Key Concepts - -- **Member IDs** (int64) are used across many commands: incident assign/reassign, audit filters, oncall schedules. -- **Team IDs** (int64) are used for filtering: oncall schedules, postmortem list, channels. -- **Team update replaces members** -- `--person-ids` is a full replacement, not an append. Always check current members with `team get` before updating. -- **Team delete is irreversible** -- requires confirmation in interactive mode; requires `--force` in non-interactive (CI/scripted) mode. -- **Audit logs** track all mutations in the system -- useful for compliance, incident investigation, and change tracking. - -## Cross-References - -- **Prerequisites:** flashduty-shared (authentication, output formatting) -- **Related:** flashduty-incident (reassign needs person IDs), flashduty-oncall (team-based schedule filtering), flashduty-insight (responder metrics) diff --git a/skills/flashduty-alert/SKILL.md b/skills/flashduty-alert/SKILL.md deleted file mode 100644 index a703e9b..0000000 --- a/skills/flashduty-alert/SKILL.md +++ /dev/null @@ -1,265 +0,0 @@ ---- -name: flashduty-alert -version: 1.0.0 -description: "Flashduty alert and alert event investigation: search, filter, and inspect alerts (Layer 1 deduplicated) and raw alert events (Layer 0 signals). Commands: alert list, get, events, timeline, merge; alert-event list. Use when drilling down from incidents to root cause alerts, tracing deduplication history, viewing alert state transitions, merging alerts into incidents, searching global alert events by severity or integration, or analyzing alert noise patterns." -metadata: - requires: - bins: ["flashduty"] - cliHelp: "flashduty alert --help" ---- - -# flashduty-alert - -**CRITICAL** — Before using this skill, read [`../flashduty-shared/SKILL.md`](../flashduty-shared/SKILL.md) for authentication, the 3-layer noise reduction model, global flags, and safety rules. - ---- - -## Overview - -This skill covers **Layer 0 (Alert Events)** and **Layer 1 (Alerts)** of the Flashduty 3-layer noise reduction model. - -- **Layer 0 -- Alert Events**: Raw signals pushed by monitoring systems (Prometheus, Zabbix, Datadog, etc.) via an `integration_key`. These are immutable records of every firing/recovery signal received. -- **Layer 1 -- Alerts**: Deduplicated from Alert Events using `alert_key`. Multiple raw events with the same alert_key collapse into a single alert, incrementing its `EventCnt`. - -Use this skill for **investigation** -- drilling down from incidents to their root alert signals. - ---- - -## Quick Decision - -| User wants to... | Command | -|---|---| -| Search alerts by severity/status | `alert list --severity Critical --active` | -| Inspect a specific alert | `alert get ` | -| See raw events behind an alert | `alert events ` | -| View alert state transitions | `alert timeline ` | -| Correlate alerts to an incident | `alert merge --incident ` | -| Search all raw alert events globally | `alert-event list --since 1h` | - ---- - -## Commands - -### alert list - -List alerts with filtering and pagination. - -```bash -flashduty alert list [flags] -``` - -| Flag | Type | Default | Description | -|------|------|---------|-------------| -| `--severity` | string | | Filter by severity: `Critical`, `Warning`, `Info` | -| `--active` | bool | false | Show active alerts only | -| `--recovered` | bool | false | Show recovered alerts only | -| `--channel` | string | | Comma-separated channel IDs | -| `--muted` | bool | false | Show ever-muted alerts only | -| `--title` | string | | Search by title keyword | -| `--since` | string | `24h` | Start time (duration or absolute) | -| `--until` | string | `now` | End time (duration or absolute) | -| `--limit` | int | 20 | Max results per page | -| `--page` | int | 1 | Page number | - -**Constraint**: `--active` and `--recovered` are mutually exclusive. Specifying both produces an error. - -Output columns: ID, TITLE, SEVERITY, STATUS, EVENTS, CHANNEL, STARTED. - -Examples: -```bash -# Active critical alerts in the last 24 hours -flashduty alert list --severity Critical --active - -# Warnings from a specific channel in the last 6 hours -flashduty alert list --severity Warning --channel 12345 --since 6h - -# Search by title keyword -flashduty alert list --title "disk usage" --active -``` - -### alert get - -Show full detail for a single alert. - -```bash -flashduty alert get -``` - -Displays a vertical detail view including: ID, Title, Severity, Status, Alert Key, Channel, Integration (name and type), Event Count, Start/Last/End times, Muted status, linked Incident (ID and progress), Labels, and Description. - -### alert events - -List all raw alert events (Layer 0) that were deduplicated into a specific alert. - -```bash -flashduty alert events -``` - -Output columns: EVENT_ID, SEVERITY, STATUS, TIME, TITLE. - -This shows the **dedup history** for one alert -- how many raw signals were collapsed into it. Use this to understand event volume and timing for a single alert. - -### alert timeline - -View the timeline/feed for a specific alert, showing state transitions and operator actions. - -```bash -flashduty alert timeline [flags] -``` - -| Flag | Type | Default | Description | -|------|------|---------|-------------| -| `--limit` | int | 20 | Max timeline events | -| `--page` | int | 1 | Page number | - -Output columns: TIME, TYPE, OPERATOR, DETAIL. Operator names are enriched (resolved to person names). - -### alert merge - -Merge one or more alerts into an existing incident. **This operation is IRREVERSIBLE.** - -```bash -flashduty alert merge [ ...] --incident [--comment ] -``` - -| Flag | Type | Required | Description | -|------|------|----------|-------------| -| `--incident` | string | Yes | Target incident ID | -| `--comment` | string | No | Merge comment | - -Example: -```bash -flashduty alert merge abc123 def456 --incident inc789 --comment "Related disk alerts" -``` - -### alert-event list (global) - -Search across ALL alert events globally (Layer 0). This is a separate top-level command, not a subcommand of `alert`. - -```bash -flashduty alert-event list [flags] -``` - -| Flag | Type | Default | Description | -|------|------|---------|-------------| -| `--severity` | string | | Filter by severity: `Critical`, `Warning`, `Info` (comma-separated) | -| `--channel` | string | | Comma-separated channel IDs | -| `--integration-type` | string | | Comma-separated integration types | -| `--since` | string | `1h` | Start time (duration or absolute) | -| `--until` | string | `now` | End time (duration or absolute) | -| `--limit` | int | 20 | Max results per page | -| `--page` | int | 1 | Page number | - -Output columns: EVENT_ID, ALERT_ID, SEVERITY, STATUS, TIME, TITLE. - -**Important**: The default time window is `1h`, which is shorter than `alert list`'s default of `24h`. This is intentional because raw event volume can be very high. - -Example: -```bash -# All critical events in the last hour -flashduty alert-event list --severity Critical - -# Events from a specific integration type in the last 30 minutes -flashduty alert-event list --integration-type Prometheus --since 30m - -# Events from multiple severity levels -flashduty alert-event list --severity Critical,Warning --since 2h -``` - ---- - -## Workflows - -### Workflow 1: Investigate an Incident's Root Cause - -Drill down from an incident through its contributing alerts to the raw signals. - -```bash -# 1. See all alerts contributing to this incident -flashduty incident alerts - -# 2. Pick a suspicious alert and view its full detail -flashduty alert get - -# 3. Trace the raw events that were deduplicated into this alert -flashduty alert events - -# 4. View the alert's state transition history -flashduty alert timeline -``` - -### Workflow 2: Find Noisy Alert Sources - -Identify which alerts or integrations are generating the most noise. - -```bash -# 1. Find active warnings in the last 24 hours -flashduty alert list --since 24h --active --severity Warning - -# 2. Check recent critical event volume (raw Layer 0 signals) -flashduty alert-event list --since 1h --severity Critical - -# 3. For aggregate analysis, use the insight command (see flashduty-insight skill) -flashduty insight top-alerts --label integration_name -``` - -### Workflow 3: Manually Correlate Alerts to an Incident - -Find related alerts and merge them into a single incident for unified response. - -```bash -# 1. Find alerts matching a pattern -flashduty alert list --title "disk" --active - -# 2. Merge selected alerts into an existing incident (IRREVERSIBLE) -flashduty alert merge --incident --comment "Related disk alerts" -``` - ---- - -## Key Concepts - -### alert events vs alert-event list - -These are different commands with different scopes: - -| | `alert events ` | `alert-event list` | -|---|---|---| -| Scope | Events for ONE specific alert | ALL events globally | -| Purpose | Dedup history of a single alert | Global raw signal search | -| Filters | None (alert_id is the filter) | Severity, channel, integration type, time | -| Default window | N/A (all events for the alert) | 1 hour | -| Use case | "How many raw events hit this alert?" | "What raw signals arrived recently?" | - -### Alert States - -- **Active**: The alert is currently firing. No recovery signal has been received. -- **Recovered**: A recovery signal was received (or the alert was manually resolved). -- The `--active` and `--recovered` flags on `alert list` are mutually exclusive boolean filters. Omitting both returns all alerts regardless of state. - -### Muted Status - -- `EverMuted` indicates whether the alert was muted at any point during its lifecycle (via noise reduction rules or manual muting). -- The `--muted` flag on `alert list` filters to alerts that have been muted at least once. - -### Deduplication via alert_key - -Multiple raw alert events with the same `alert_key` within a channel are deduplicated into a single alert. The alert's `EventCnt` reflects how many raw events were collapsed. Use `alert events ` to see the individual raw events. - ---- - -## Safety Notes - -- **`alert merge` is IRREVERSIBLE.** Once alerts are merged into an incident, they cannot be separated. Always confirm the target incident ID before merging. -- **`alert-event list` defaults to a 1-hour window**, which is shorter than other commands' 24-hour default. This is by design due to potentially high raw event volume. Widen the window explicitly with `--since` if needed, but be aware of large result sets. - ---- - -## Cross-References - -| Relation | Skill | Purpose | -|----------|-------|---------| -| Prerequisites | `flashduty-shared` | Authentication, configuration, shared flags | -| Parent layer | `flashduty-incident` | Incidents contain alerts (Layer 2) | -| Analytics | `flashduty-insight` | Alert noise analytics, top-alerts aggregation | -| Rules | `flashduty-channel` | Noise reduction rules, aggregation configuration | diff --git a/skills/flashduty-change/SKILL.md b/skills/flashduty-change/SKILL.md deleted file mode 100644 index 53be30f..0000000 --- a/skills/flashduty-change/SKILL.md +++ /dev/null @@ -1,100 +0,0 @@ ---- -name: flashduty-change -version: 1.0.0 -description: "Flashduty change event tracking: list recent deployments, config changes, and releases; query change volume trends over time. Commands: change list, change trend. Use when correlating incidents with recent deployments, investigating whether a change caused an outage, reviewing deployment frequency (DORA metrics), or auditing changes for a specific service or channel." -metadata: - requires: - bins: ["flashduty"] - cliHelp: "flashduty change --help" ---- - -# flashduty-change - -**CRITICAL** — Before using this skill, read [`../flashduty-shared/SKILL.md`](../flashduty-shared/SKILL.md) for authentication, the 3-layer noise reduction model, global flags, and safety rules. - -## Overview - -Change events run on a **parallel track** alongside the 3-layer noise reduction model (Alert Event -> Alert -> Incident). They are pushed by CI/CD systems via Flashduty integrations and are **read-only** in the CLI. - -Correlation with incidents is by **label matching + time proximity**, not by foreign key. Change trend data maps directly to the DORA deployment frequency metric. - -## Commands - -### change list - -List recent change events with title, channel, status, and timestamps. - -```bash -flashduty change list [flags] -``` - -| Flag | Type | Default | Description | -|------|------|---------|-------------| -| `--channel` | int | | Filter by channel ID | -| `--since` | string | `24h` | Start time (relative like `2h`, `7d` or absolute) | -| `--until` | string | `now` | End time | -| `--limit` | int | `20` | Max results per page | -| `--page` | int | `1` | Page number | - -Output columns: ID, TITLE, STATUS, CHANNEL, TIME. - -### change trend - -Show change volume over time. Useful for DORA deployment frequency metrics. - -```bash -flashduty change trend [flags] -``` - -| Flag | Type | Default | Description | -|------|------|---------|-------------| -| `--step` | string | `day` | Aggregation level: `day`, `week`, `month` | -| `--since` | string | `30d` | Start time | -| `--until` | string | `now` | End time | - -Output columns: DATE, CHANGES, EVENTS. - -## Workflows - -### Correlate an Incident with Recent Changes - -```bash -# 1. Get incident details — note the channel and timestamp -flashduty incident detail - -# 2. List changes in that channel around the incident time -flashduty change list --channel --since 2h - -# 3. Compare timestamps and labels to identify potential cause -``` - -### Review Deployment Frequency - -```bash -# Weekly deployment volume over the past month -flashduty change trend --step week --since 30d - -# Daily granularity for the past week -flashduty change trend --step day --since 7d -``` - -### Audit Changes for a Specific Service - -```bash -# 1. Find the channel ID -flashduty channel list --name "my-service" - -# 2. List all changes in the past week -flashduty change list --channel --since 7d -``` - -## Key Concepts - -- Change events are pushed by CI/CD systems via Flashduty integrations — they are **read-only** in the CLI. -- Correlation with incidents is by **label matching + time proximity**, not by foreign key. -- Change trend data maps directly to the DORA deployment frequency metric. - -## Cross-References - -- **Prerequisites:** flashduty-shared (authentication, output formatting) -- **Related:** flashduty-incident (incidents that changes may have caused), flashduty-insight (MTTA/MTTR context alongside deployment frequency) diff --git a/skills/flashduty-channel/SKILL.md b/skills/flashduty-channel/SKILL.md deleted file mode 100644 index a220596..0000000 --- a/skills/flashduty-channel/SKILL.md +++ /dev/null @@ -1,106 +0,0 @@ ---- -name: flashduty-channel -version: 1.0.0 -description: "Flashduty channel and escalation rule management: list channels (collaboration spaces where alerts are routed), view escalation rules with notification layers. Commands: channel list, escalation-rule list. Use when looking up channel IDs for filtering other commands, reviewing escalation policies, or understanding how alerts are routed and escalated in the noise reduction pipeline." -metadata: - requires: - bins: ["flashduty"] - cliHelp: "flashduty channel --help" ---- - -# flashduty-channel - -**CRITICAL** — Before using this skill, read [`../flashduty-shared/SKILL.md`](../flashduty-shared/SKILL.md) for authentication, the 3-layer noise reduction model, global flags, and safety rules. - -## Overview - -Channels are the collaboration spaces where alerts are routed, noise reduction rules are applied, and escalation policies are configured. In the noise reduction pipeline, channels sit between Layer 1 (Alerts) and Layer 2 (Incidents) -- they are the primary container for alert routing and escalation. This skill covers read-only operations for channels and escalation rules. - -## Commands - -### channel list - -```bash -flashduty channel list [flags] -``` - -**Flags:** - -| Flag | Type | Description | -|----------|--------|--------------------| -| `--name` | string | Search by name | - -**Output columns:** ID, NAME, TEAM, CREATOR - -Lists channels with their ID, name, owning team, and creator. Channels are the collaboration spaces where alerts land after routing. - -### escalation-rule list - -```bash -flashduty escalation-rule list [flags] -``` - -**Flags:** - -| Flag | Type | Description | -|------------------|--------|------------------------------------| -| `--channel` | int | Channel ID | -| `--channel-name` | string | Channel name (resolved to ID internally) | - -One of `--channel` or `--channel-name` is required. When `--channel-name` matches multiple channels, the CLI prints all matches and asks the user to specify a `--channel` ID. - -**Output columns:** ID, NAME, CHANNEL, STATUS, PRIORITY, LAYERS - -Shows escalation rules for a channel: rule ID, name, channel name, status, priority, and the number of notification layers. - -## Workflows - -### Find a Channel and Its Escalation Policy - -```bash -# 1. Search for the channel by name -flashduty channel list --name "Production" - -# 2. List escalation rules for that channel -flashduty escalation-rule list --channel -``` - -### Look Up Channel ID for Other Commands - -Many commands (`incident list`, `alert list`, `change list`, etc.) accept a `--channel` flag. Use `channel list` to find the ID first: - -```bash -flashduty channel list --name "my-service" - -# Then use the ID in other commands: -flashduty incident list --channel -flashduty alert list --channel -flashduty change list --channel -``` - -### Review Escalation for Incident Response - -When investigating slow response times or checking that escalation is properly configured: - -```bash -flashduty channel list --name "critical-service" -flashduty escalation-rule list --channel -# Review the LAYERS column to confirm multi-layer escalation is in place -``` - -## Key Concepts - -- **Channels** are where the noise reduction pipeline runs (Layer 1 -> Layer 2). They are the primary container for alert routing and escalation. -- Each channel belongs to a **team**. -- **Escalation rules** define notification layers: who gets notified, when, and how. The LAYERS column shows how many escalation tiers exist. -- **Channel IDs** are used as filters across many other CLI commands (incidents, alerts, changes, etc.). -- The CLI currently provides **read-only** access to channels and escalation rules. -- Full channel CRUD and noise reduction config (silence rules, inhibit rules, drop rules, alert pipelines, routing, label enrichment) are planned for a future release. - -## Cross-References - -- **Prerequisites:** `flashduty-shared` -- authentication and global flags -- **Related skills:** - - `flashduty-incident` -- incidents are created in channels - - `flashduty-alert` -- alerts are routed to channels - - `flashduty-oncall` -- escalation may trigger on-call schedules diff --git a/skills/flashduty-incident/SKILL.md b/skills/flashduty-incident/SKILL.md deleted file mode 100644 index 1187ca5..0000000 --- a/skills/flashduty-incident/SKILL.md +++ /dev/null @@ -1,154 +0,0 @@ ---- -name: flashduty-incident -version: 1.0.0 -description: "Flashduty incident lifecycle management: list, filter, triage, investigate, and resolve incidents. Commands: incident list, get, detail (AI summary), create, update, ack, close, merge, snooze, reopen, reassign, feed, timeline, alerts, similar; postmortem list. Use when responding to pages, triaging alerts, investigating outages, acknowledging or closing incidents, merging duplicates, snoozing during maintenance, reassigning responders, reviewing incident timelines, or listing post-mortem reports." -metadata: - requires: - bins: ["flashduty"] - cliHelp: "flashduty incident --help" ---- - -# flashduty-incident - -**CRITICAL** -- Before using this skill, read [`../flashduty-shared/SKILL.md`](../flashduty-shared/SKILL.md) for authentication, the 3-layer noise reduction model, global flags, and safety rules. - -## Overview - -Layer 2 of Flashduty's 3-layer noise reduction model (Alert -> **Incident** -> Event). Incidents are the actionable items that humans respond to. Alerts are aggregated into incidents via noise reduction rules; responders triage, investigate, and resolve incidents. This skill covers the full incident lifecycle: listing, creating, triaging, investigating, resolving, merging, snoozing, reassigning, and post-mortem review. - -## Quick Decision - -| User wants to... | Command | -|---|---| -| See active incidents | `incident list --progress Triggered` | -| Investigate an incident | `incident detail ` | -| Respond to a page | `incident ack ` | -| Combine related incidents | `incident merge --source ` | -| Delay an incident | `incident snooze --duration 2h` | -| Re-route an incident | `incident reassign --person ` | -| Review past incidents | `postmortem list` | - -## Commands - -| Command | Description | -|---|---| -| [`incident list`](references/flashduty-incident-list.md) | List and filter incidents | -| [`incident get`](references/flashduty-incident-get.md) | Get incident(s) in table/detail view | -| [`incident detail`](references/flashduty-incident-detail.md) | Full detail with AI summary, root cause, impact | -| [`incident create`](references/flashduty-incident-create.md) | Create a new incident | -| [`incident update`](references/flashduty-incident-update.md) | Update incident fields | -| [`incident ack`](references/flashduty-incident-ack.md) | Acknowledge incidents (Triggered -> Processing) | -| [`incident close`](references/flashduty-incident-close.md) | Close incidents | -| [`incident merge`](references/flashduty-incident-merge.md) | Merge incidents (IRREVERSIBLE) | -| [`incident snooze`](references/flashduty-incident-snooze.md) | Snooze incidents for a duration | -| [`incident reopen`](references/flashduty-incident-reopen.md) | Reopen closed incidents | -| [`incident reassign`](references/flashduty-incident-reassign.md) | Reassign to new responders | -| [`incident feed`](references/flashduty-incident-feed.md) | Paginated event timeline | -| [`incident timeline`](references/flashduty-incident-timeline.md) | Full event history (non-paginated) | -| [`incident alerts`](references/flashduty-incident-alerts.md) | View contributing alerts | -| [`incident similar`](references/flashduty-incident-similar.md) | Find similar incidents | -| [`postmortem list`](references/flashduty-postmortem-list.md) | List post-mortem reports | - -## Workflows - -### Workflow 1: Triage an Active Incident - -Investigate and respond to a newly triggered incident. - -```bash -# 1. Find unacknowledged critical incidents -flashduty incident list --progress Triggered --severity Critical - -# 2. Investigate with AI summary, root cause, and impact -flashduty incident detail - -# 3. Acknowledge ownership -flashduty incident ack - -# 4. See contributing alerts for root cause analysis -flashduty incident alerts - -# 5. Check for related past incidents -flashduty incident similar - -# 6. Resolve when fixed -flashduty incident close -``` - -### Workflow 2: Merge Related Incidents - -Consolidate multiple incidents caused by the same underlying issue. - -```bash -# 1. Find related incidents by keyword -flashduty incident list --query "database" --progress Triggered - -# 2. Review the results and identify the primary incident - -# 3. Merge duplicates into the primary (IRREVERSIBLE) -flashduty incident merge --source , -``` - -### Workflow 3: Escalate and Reassign - -Hand off an incident to the right responder. - -```bash -# 1. Find the right person -flashduty member list --name "senior" - -# 2. Reassign -flashduty incident reassign --person -``` - -### Workflow 4: Snooze for Maintenance Window - -Temporarily silence an incident during planned maintenance. - -```bash -# 1. Snooze for the maintenance duration -flashduty incident snooze --duration 2h - -# 2. After maintenance, check if it re-triggered -flashduty incident list --progress Triggered -``` - -### Workflow 5: Post-Incident Review - -Review what happened after resolving an incident. - -```bash -# 1. Get full incident detail with AI analysis -flashduty incident detail - -# 2. Review the timeline of events -flashduty incident timeline - -# 3. Check the feed for all actions taken -flashduty incident feed - -# 4. Look at related post-mortems -flashduty postmortem list --status published --since 7d -``` - -## Key Concepts - -- **Incident states**: `Triggered` (new, unacknowledged) -> `Processing` (acknowledged, being worked) -> `Closed` (resolved). -- **Severity levels**: `Critical`, `Warning`, `Info`. -- **Noise reduction**: Multiple alerts are aggregated into a single incident via noise reduction rules, reducing responder fatigue. -- **AI analysis**: The `incident detail` command provides AI-generated summaries, root cause analysis, resolution suggestions, and impact assessments. - -## Safety Notes - -- `incident merge` is **IRREVERSIBLE** -- always double-check source and target IDs before running. -- `incident create` triggers notifications to responders -- confirm with the user before creating. -- Always `incident list` before bulk operations (`ack`, `close`, `reopen`) to verify the right incidents are targeted. -- `incident snooze` has a maximum duration of 24 hours and requires whole minutes. - -## Cross-References - -- **Prerequisites**: `flashduty-shared` -- authentication, 3-layer noise reduction model overview, global flags (`--json`, `--api-key`, `--api-host`). -- **Related skills**: - - `flashduty-alert` -- drill into contributing alerts (Layer 1). - - `flashduty-oncall` -- find who is on-call and should respond. - - `flashduty-insight` -- MTTA/MTTR metrics and operational analytics. diff --git a/skills/flashduty-incident/references/flashduty-incident-ack.md b/skills/flashduty-incident/references/flashduty-incident-ack.md deleted file mode 100644 index 3057066..0000000 --- a/skills/flashduty-incident/references/flashduty-incident-ack.md +++ /dev/null @@ -1,19 +0,0 @@ -# flashduty incident ack - -Acknowledge one or more incidents (moves state from Triggered to Processing). - -## Usage - -```bash -flashduty incident ack [ ...] -``` - -## Flags - -No command-specific flags. Supports global flags (`--json`, `--no-trunc`, etc.). - -## Examples - -```bash -flashduty incident ack abc123 def456 -``` diff --git a/skills/flashduty-incident/references/flashduty-incident-alerts.md b/skills/flashduty-incident/references/flashduty-incident-alerts.md deleted file mode 100644 index b00b99e..0000000 --- a/skills/flashduty-incident/references/flashduty-incident-alerts.md +++ /dev/null @@ -1,25 +0,0 @@ -# flashduty incident alerts - -View alerts contributing to an incident. - -## Usage - -```bash -flashduty incident alerts [flags] -``` - -## Flags - -| Flag | Type | Default | Description | -|------|------|---------|-------------| -| `--limit` | int | `10` | Max alerts to show | - -## Output Columns - -ALERT_ID, TITLE, SEVERITY, STATUS, STARTED. - -## Examples - -```bash -flashduty incident alerts abc123 --limit 20 -``` diff --git a/skills/flashduty-incident/references/flashduty-incident-close.md b/skills/flashduty-incident/references/flashduty-incident-close.md deleted file mode 100644 index 0f22d4d..0000000 --- a/skills/flashduty-incident/references/flashduty-incident-close.md +++ /dev/null @@ -1,19 +0,0 @@ -# flashduty incident close - -Close one or more incidents. - -## Usage - -```bash -flashduty incident close [ ...] -``` - -## Flags - -No command-specific flags. Supports global flags (`--json`, `--no-trunc`, etc.). - -## Examples - -```bash -flashduty incident close abc123 def456 -``` diff --git a/skills/flashduty-incident/references/flashduty-incident-create.md b/skills/flashduty-incident/references/flashduty-incident-create.md deleted file mode 100644 index 2a11cff..0000000 --- a/skills/flashduty-incident/references/flashduty-incident-create.md +++ /dev/null @@ -1,34 +0,0 @@ -# flashduty incident create - -Create a new incident. In interactive terminals, title and severity prompt interactively if not provided via flags. - -## Usage - -```bash -flashduty incident create [flags] -``` - -## Flags - -| Flag | Type | Default | Description | -|------|------|---------|-------------| -| `--title` | string | | Incident title (**required**, 3-200 chars) | -| `--severity` | string | | `Critical`, `Warning`, or `Info` (**required**) | -| `--channel` | int | | Channel ID | -| `--description` | string | | Description (max 6144 chars) | -| `--assign` | int slice | | Person IDs to assign (repeatable flag) | - -## Examples - -```bash -# Create with all flags -flashduty incident create --title "Payment gateway timeout" --severity Critical --channel 100 --description "Stripe API returning 504s" --assign 1 --assign 2 - -# Minimal - will prompt interactively for title and severity in a terminal -flashduty incident create -``` - -## Notes - -- Creating an incident triggers notifications to responders -- confirm with the user before running. -- In non-interactive environments (pipes, scripts), `--title` and `--severity` must be provided via flags. diff --git a/skills/flashduty-incident/references/flashduty-incident-detail.md b/skills/flashduty-incident/references/flashduty-incident-detail.md deleted file mode 100644 index feb83ae..0000000 --- a/skills/flashduty-incident/references/flashduty-incident-detail.md +++ /dev/null @@ -1,27 +0,0 @@ -# flashduty incident detail - -Full incident detail with AI-powered analysis fields: AI summary, root cause, resolution, and impact analysis. - -## Usage - -```bash -flashduty incident detail -``` - -## Flags - -No command-specific flags. Supports global flags (`--json`, `--no-trunc`, etc.). - -## Output Fields - -ID, Title, Severity, Progress, Channel, Created, Acknowledged, Closed, Alerts count, Events count, Frequency, AI Summary, Root Cause, Resolution, Impact, Description, Labels, Custom Fields, Responders. - -## Examples - -```bash -flashduty incident detail abc123 -``` - -## Notes - -This command provides richer output than `incident get` by including AI-generated analysis fields. Use this for deep investigation; use `incident get` for quick lookups. diff --git a/skills/flashduty-incident/references/flashduty-incident-feed.md b/skills/flashduty-incident/references/flashduty-incident-feed.md deleted file mode 100644 index a9c7022..0000000 --- a/skills/flashduty-incident/references/flashduty-incident-feed.md +++ /dev/null @@ -1,26 +0,0 @@ -# flashduty incident feed - -Paginated timeline of incident events. - -## Usage - -```bash -flashduty incident feed [flags] -``` - -## Flags - -| Flag | Type | Default | Description | -|------|------|---------|-------------| -| `--limit` | int | `20` | Max events | -| `--page` | int | `1` | Page number | - -## Output Columns - -TIME, TYPE, OPERATOR, DETAIL. - -## Examples - -```bash -flashduty incident feed abc123 --limit 50 --page 2 -``` diff --git a/skills/flashduty-incident/references/flashduty-incident-get.md b/skills/flashduty-incident/references/flashduty-incident-get.md deleted file mode 100644 index 30a9fdc..0000000 --- a/skills/flashduty-incident/references/flashduty-incident-get.md +++ /dev/null @@ -1,28 +0,0 @@ -# flashduty incident get - -Get incident details. Single ID produces a vertical detail view; multiple IDs produce a table. - -## Usage - -```bash -flashduty incident get [ ...] -``` - -## Flags - -No command-specific flags. Supports global flags (`--json`, `--no-trunc`, etc.). - -## Output Columns - -Single ID: vertical key-value detail view. -Multiple IDs: table with columns ID, TITLE, SEVERITY, PROGRESS, CHANNEL, CREATED. - -## Examples - -```bash -# Single incident - vertical detail view -flashduty incident get abc123 - -# Multiple incidents - table format -flashduty incident get abc123 def456 ghi789 -``` diff --git a/skills/flashduty-incident/references/flashduty-incident-list.md b/skills/flashduty-incident/references/flashduty-incident-list.md deleted file mode 100644 index 37cbf65..0000000 --- a/skills/flashduty-incident/references/flashduty-incident-list.md +++ /dev/null @@ -1,39 +0,0 @@ -# flashduty incident list - -List incidents with filtering and pagination. - -## Usage - -```bash -flashduty incident list [flags] -``` - -## Flags - -| Flag | Type | Default | Description | -|------|------|---------|-------------| -| `--progress` | string | | Filter by state: `Triggered`, `Processing`, `Closed` | -| `--severity` | string | | Filter by severity: `Critical`, `Warning`, `Info` | -| `--channel` | int | | Filter by channel ID | -| `--query` | string | | Free-text search across title, labels, and content | -| `--since` | string | `24h` | Start time (duration like `24h`, date, datetime, or unix timestamp) | -| `--until` | string | `now` | End time | -| `--limit` | int | `20` | Max results (max 100) | -| `--page` | int | `1` | Page number | - -## Output Columns - -ID, TITLE, SEVERITY, PROGRESS, CHANNEL, CREATED. - -## Examples - -```bash -# Critical incidents in the last hour -flashduty incident list --severity Critical --since 1h - -# Triggered incidents matching "database" (title, labels, or content) -flashduty incident list --progress Triggered --query "database" - -# Closed incidents from a specific channel, page 2 -flashduty incident list --progress Closed --channel 12345 --page 2 -``` diff --git a/skills/flashduty-incident/references/flashduty-incident-merge.md b/skills/flashduty-incident/references/flashduty-incident-merge.md deleted file mode 100644 index e85c977..0000000 --- a/skills/flashduty-incident/references/flashduty-incident-merge.md +++ /dev/null @@ -1,26 +0,0 @@ -# flashduty incident merge - -Merge source incidents into a target incident. **This operation is IRREVERSIBLE.** - -## Usage - -```bash -flashduty incident merge --source -``` - -## Flags - -| Flag | Type | Default | Description | -|------|------|---------|-------------| -| `--source` | string | | Comma-separated source incident IDs (**required**, max 100) | - -## Examples - -```bash -flashduty incident merge abc123 --source def456,ghi789,jkl012 -``` - -## Notes - -- This operation is **IRREVERSIBLE**. Always double-check source and target IDs before running. -- Source incidents are absorbed into the target; they cannot be un-merged. diff --git a/skills/flashduty-incident/references/flashduty-incident-reassign.md b/skills/flashduty-incident/references/flashduty-incident-reassign.md deleted file mode 100644 index 5f1a29d..0000000 --- a/skills/flashduty-incident/references/flashduty-incident-reassign.md +++ /dev/null @@ -1,25 +0,0 @@ -# flashduty incident reassign - -Reassign an incident to new responders. - -## Usage - -```bash -flashduty incident reassign --person -``` - -## Flags - -| Flag | Type | Default | Description | -|------|------|---------|-------------| -| `--person` | string | | Comma-separated person IDs (**required**) | - -## Examples - -```bash -flashduty incident reassign abc123 --person 101,102 -``` - -## Notes - -Use `flashduty member list` to find person IDs. diff --git a/skills/flashduty-incident/references/flashduty-incident-reopen.md b/skills/flashduty-incident/references/flashduty-incident-reopen.md deleted file mode 100644 index 5ca7124..0000000 --- a/skills/flashduty-incident/references/flashduty-incident-reopen.md +++ /dev/null @@ -1,19 +0,0 @@ -# flashduty incident reopen - -Reopen one or more closed incidents. - -## Usage - -```bash -flashduty incident reopen [ ...] -``` - -## Flags - -No command-specific flags. Supports global flags (`--json`, `--no-trunc`, etc.). - -## Examples - -```bash -flashduty incident reopen abc123 -``` diff --git a/skills/flashduty-incident/references/flashduty-incident-similar.md b/skills/flashduty-incident/references/flashduty-incident-similar.md deleted file mode 100644 index 628e93f..0000000 --- a/skills/flashduty-incident/references/flashduty-incident-similar.md +++ /dev/null @@ -1,25 +0,0 @@ -# flashduty incident similar - -Find similar incidents (useful for pattern recognition and investigation). - -## Usage - -```bash -flashduty incident similar [flags] -``` - -## Flags - -| Flag | Type | Default | Description | -|------|------|---------|-------------| -| `--limit` | int | `5` | Max results | - -## Output Columns - -ID, TITLE, SEVERITY, PROGRESS, CHANNEL, CREATED. - -## Examples - -```bash -flashduty incident similar abc123 --limit 10 -``` diff --git a/skills/flashduty-incident/references/flashduty-incident-snooze.md b/skills/flashduty-incident/references/flashduty-incident-snooze.md deleted file mode 100644 index 0a0b5a4..0000000 --- a/skills/flashduty-incident/references/flashduty-incident-snooze.md +++ /dev/null @@ -1,26 +0,0 @@ -# flashduty incident snooze - -Snooze one or more incidents for a specified duration. - -## Usage - -```bash -flashduty incident snooze [ ...] --duration -``` - -## Flags - -| Flag | Type | Default | Description | -|------|------|---------|-------------| -| `--duration` | string | | Duration, e.g. `2h`, `30m` (**required**, max `24h`, must be whole minutes) | - -## Examples - -```bash -flashduty incident snooze abc123 --duration 2h -``` - -## Notes - -- Maximum snooze duration is 24 hours. -- Duration must be specified in whole minutes (e.g., `30m`, `1h`, `2h`). diff --git a/skills/flashduty-incident/references/flashduty-incident-timeline.md b/skills/flashduty-incident/references/flashduty-incident-timeline.md deleted file mode 100644 index b9dc05d..0000000 --- a/skills/flashduty-incident/references/flashduty-incident-timeline.md +++ /dev/null @@ -1,27 +0,0 @@ -# flashduty incident timeline - -View the incident timeline/event history (non-paginated). - -## Usage - -```bash -flashduty incident timeline -``` - -## Flags - -No command-specific flags. Supports global flags (`--json`, `--no-trunc`, etc.). - -## Output Columns - -TIME, TYPE, OPERATOR, DETAIL. - -## Examples - -```bash -flashduty incident timeline abc123 -``` - -## Notes - -Unlike `incident feed`, this command returns the full timeline without pagination. Use `incident feed` for large timelines where you need paginated access. diff --git a/skills/flashduty-incident/references/flashduty-incident-update.md b/skills/flashduty-incident/references/flashduty-incident-update.md deleted file mode 100644 index f6d1348..0000000 --- a/skills/flashduty-incident/references/flashduty-incident-update.md +++ /dev/null @@ -1,28 +0,0 @@ -# flashduty incident update - -Update an existing incident's fields. - -## Usage - -```bash -flashduty incident update [flags] -``` - -## Flags - -| Flag | Type | Default | Description | -|------|------|---------|-------------| -| `--title` | string | | New title | -| `--description` | string | | New description | -| `--severity` | string | | New severity: `Critical`, `Warning`, `Info` | -| `--field` | string array | | Custom field `key=value` (repeatable) | - -## Examples - -```bash -# Update severity and add custom fields -flashduty incident update abc123 --severity Warning --field "team=platform" --field "region=us-east-1" - -# Update title -flashduty incident update abc123 --title "Resolved: Payment gateway timeout" -``` diff --git a/skills/flashduty-incident/references/flashduty-postmortem-list.md b/skills/flashduty-incident/references/flashduty-postmortem-list.md deleted file mode 100644 index 2ac0c40..0000000 --- a/skills/flashduty-incident/references/flashduty-postmortem-list.md +++ /dev/null @@ -1,35 +0,0 @@ -# flashduty postmortem list - -List post-mortem reports with filtering. - -## Usage - -```bash -flashduty postmortem list [flags] -``` - -## Flags - -| Flag | Type | Default | Description | -|------|------|---------|-------------| -| `--status` | string | | Filter: `drafting` or `published` | -| `--channel` | string | | Comma-separated channel IDs | -| `--team` | string | | Comma-separated team IDs | -| `--since` | string | | Created after (time filter) | -| `--until` | string | | Created before (time filter) | -| `--limit` | int | `20` | Max results | -| `--page` | int | `1` | Page number | - -## Output Columns - -ID, TITLE, STATUS, CHANNEL, CREATED. - -## Examples - -```bash -# Published post-mortems from the last month -flashduty postmortem list --status published --since 30d - -# Post-mortems for specific teams -flashduty postmortem list --team 1,2,3 -``` diff --git a/skills/flashduty-insight/SKILL.md b/skills/flashduty-insight/SKILL.md deleted file mode 100644 index 020d57e..0000000 --- a/skills/flashduty-insight/SKILL.md +++ /dev/null @@ -1,195 +0,0 @@ ---- -name: flashduty-insight -version: 1.0.0 -description: "Flashduty analytics and SRE reporting: query MTTA/MTTR metrics by team, channel, or responder; identify noisy alert sources; review incident performance with response times; analyze notification volume and cost trends. Commands: insight team, channel, responder, top-alerts, incidents, notifications. Use when reviewing SRE performance, generating weekly or monthly incident reports, identifying noise reduction opportunities, analyzing alert fatigue, or tracking notification costs." -metadata: - requires: - bins: ["flashduty"] - cliHelp: "flashduty insight --help" ---- - -# flashduty-insight - -**CRITICAL** -- Before using this skill, read [`../flashduty-shared/SKILL.md`](../flashduty-shared/SKILL.md) for authentication, the 3-layer noise reduction model, global flags, and safety rules. - -## Overview - -Analytics, metrics, and reporting for Flashduty. All commands are read-only and query historical data. Use this skill when the user needs SRE performance metrics (MTTA, MTTR), noise reduction effectiveness, responder workload analysis, or notification cost trends. - -## Quick Decision - -| User wants to... | Command | -|---|---| -| Team-level performance overview | `insight team --since 7d` | -| Channel-level metrics | `insight channel --since 7d` | -| Individual responder stats | `insight responder --since 7d` | -| Find noisiest alert sources | `insight top-alerts --label integration_name` | -| List incidents with response metrics | `insight incidents --since 7d` | -| Notification volume/cost analysis | `insight notifications --step day` | - -## Commands - -### insight team - -Query incident response metrics aggregated by team. - -```bash -flashduty insight team [flags] -``` - -| Flag | Default | Description | -|------|---------|-------------| -| `--since` | `7d` | Start time (relative like `7d`, `24h`, or absolute) | -| `--until` | `now` | End time | - -Output columns: TEAM, INCIDENTS, ACK%, MTTA, MTTR, NOISE_REDUCTION, ALERTS, EVENTS. - -### insight channel - -Query incident response metrics aggregated by channel. - -```bash -flashduty insight channel [flags] -``` - -| Flag | Default | Description | -|------|---------|-------------| -| `--since` | `7d` | Start time | -| `--until` | `now` | End time | - -Output columns: CHANNEL, INCIDENTS, ACK%, MTTA, MTTR, NOISE_REDUCTION, ALERTS, EVENTS. - -### insight responder - -Query incident response metrics aggregated by individual responder. - -```bash -flashduty insight responder [flags] -``` - -| Flag | Default | Description | -|------|---------|-------------| -| `--since` | `7d` | Start time | -| `--until` | `now` | End time | - -Output columns: RESPONDER, EMAIL, INCIDENTS, ACK%, MTTA, INTERRUPTIONS, ENGAGED. - -### insight top-alerts - -Show the noisiest alert sources grouped by a label. Essential for noise reduction work. - -```bash -flashduty insight top-alerts [flags] -``` - -| Flag | Default | Description | -|------|---------|-------------| -| `--label` | *(required)* | Label key to group by (e.g., `integration_name`, `alert_key`) | -| `--since` | `7d` | Start time | -| `--until` | `now` | End time | -| `--limit` | `10` | Top K results to return | - -Output columns: LABEL, ALERTS, EVENTS. - -### insight incidents - -List incidents with individual performance metrics. - -```bash -flashduty insight incidents [flags] -``` - -| Flag | Default | Description | -|------|---------|-------------| -| `--since` | `7d` | Start time | -| `--until` | `now` | End time | -| `--limit` | `20` | Max results per page | -| `--page` | `1` | Page number | - -Output columns: ID, TITLE, SEVERITY, CHANNEL, MTTA, MTTR, NOTIFICATIONS. - -### insight notifications - -Show notification volume over time. Useful for cost analysis. - -```bash -flashduty insight notifications [flags] -``` - -| Flag | Default | Description | -|------|---------|-------------| -| `--step` | `day` | Aggregation period: `day`, `week`, `month` | -| `--since` | `30d` | Start time | -| `--until` | `now` | End time | - -Output columns: DATE, SMS, VOICE, EMAIL. - -## Workflows - -### Weekly SRE Performance Review - -```bash -# Overall team performance for the past week -flashduty insight team --since 7d - -# Drill into channel-level metrics -flashduty insight channel --since 7d - -# Individual responder metrics -flashduty insight responder --since 7d -``` - -### Identify and Reduce Alert Noise - -```bash -# Find noisiest integration sources -flashduty insight top-alerts --label integration_name --since 7d --limit 5 - -# Find noisiest alert keys -flashduty insight top-alerts --label alert_key --since 7d --limit 10 - -# Check noise reduction effectiveness by channel -flashduty insight channel --since 7d -``` - -### Notification Cost Analysis - -```bash -# Weekly notification trends over the past month -flashduty insight notifications --step week --since 30d - -# Identify which incidents generated the most notifications -flashduty insight incidents --since 7d -``` - -### Monthly Incident Report - -```bash -# Team-level summary for the month -flashduty insight team --since 30d - -# Full incident list with metrics -flashduty insight incidents --since 30d --limit 50 - -# Top noisy sources over the month -flashduty insight top-alerts --label integration_name --since 30d -``` - -## Key Concepts - -- **MTTA** -- Mean Time To Acknowledge: duration from incident trigger to first acknowledgement. -- **MTTR** -- Mean Time To Resolve: duration from incident trigger to close. -- **Noise reduction%** -- percentage of alert events that were suppressed (deduplicated/aggregated) before becoming incidents. -- **ACK%** -- percentage of incidents that were acknowledged by a responder. -- **Interruptions** -- number of times a responder was paged (notifications received). -- **Engaged time** -- total time a responder spent working on incidents. -- Times are displayed as human-readable durations (e.g., `2m 30s`, `1h 15m`). -- All insight commands are read-only -- they query historical metrics and never modify data. - -## Cross-References - -- **Prerequisites**: `flashduty-shared` (authentication, configuration, output formats) -- **Related skills**: - - `flashduty-incident` -- drill into specific incidents surfaced by `insight incidents` - - `flashduty-alert` -- investigate noisy alert sources identified by `insight top-alerts` - - `flashduty-change` -- correlate deployment frequency via `change trend` with incident metrics diff --git a/skills/flashduty-oncall/SKILL.md b/skills/flashduty-oncall/SKILL.md deleted file mode 100644 index bea44d8..0000000 --- a/skills/flashduty-oncall/SKILL.md +++ /dev/null @@ -1,122 +0,0 @@ ---- -name: flashduty-oncall -version: 1.0.0 -description: "Flashduty on-call schedule management: find who is currently on call, list schedules, view schedule details with rotation layers and shift slots. Commands: oncall who, oncall schedule list, oncall schedule get. Use when finding the current on-call responder, checking who covers a future time window, reviewing upcoming shifts, or looking up schedule IDs and rotation configurations." -metadata: - requires: - bins: ["flashduty"] - cliHelp: "flashduty oncall --help" ---- - -# flashduty-oncall - -**CRITICAL** — Before using this skill, read [`../flashduty-shared/SKILL.md`](../flashduty-shared/SKILL.md) for authentication, the 3-layer noise reduction model, global flags, and safety rules. - -## Overview - -On-call schedule queries for Flashduty. Find who is currently on call, list available schedules, and inspect schedule details including rotation layers and computed shift slots. All commands are read-only; schedule creation and editing are done in the Flashduty web UI. - -## Commands - -### oncall who - -Show who is currently on call across all schedules. - -```bash -flashduty oncall who [flags] -``` - -| Flag | Type | Default | Description | -|------|------|---------|-------------| -| `--query` | string | | Search by schedule name | -| `--team` | string | | Comma-separated team IDs | -| `--since` | string | `now` | Start of time range | -| `--until` | string | `+24h` | End of time range | -| `--limit` | int | `20` | Max results per page | -| `--page` | int | `1` | Page number | - -Output columns: SCHEDULE, ON_CALL, UNTIL, NEXT - -### oncall schedule list - -List all schedules with their ID, status, and layer count. - -```bash -flashduty oncall schedule list [flags] -``` - -| Flag | Type | Default | Description | -|------|------|---------|-------------| -| `--query` | string | | Search by schedule name | -| `--team` | string | | Comma-separated team IDs | -| `--since` | string | `now` | Start of time range | -| `--until` | string | `+24h` | End of time range | -| `--limit` | int | `20` | Max results per page | -| `--page` | int | `1` | Page number | - -Output columns: ID, NAME, STATUS, LAYERS - -Use this command to discover schedule IDs needed by `oncall schedule get`. - -### oncall schedule get - -Get detailed view of a single schedule including rotation layers, current/next on-call, and computed shift slots. - -```bash -flashduty oncall schedule get [flags] -``` - -| Flag | Type | Default | Description | -|------|------|---------|-------------| -| `--since` | string | `now` | Start of time range | -| `--until` | string | `+7d` | End of time range | - -Output: Key-value header (ID, Name, Status, Layers, Current, Next) followed by a slots table (START, END, GROUP) showing the computed final schedule within the requested time range. - -## Workflows - -### Find Who Is On Call Right Now - -```bash -flashduty oncall who -``` - -Shows all schedules with the current on-call person and when their shift ends. - -### Find On-Call for a Specific Team - -```bash -# 1. Look up the team ID -flashduty team list --name "Platform" - -# 2. Filter on-call by team -flashduty oncall who --team -``` - -### Check Next Week's Schedule - -```bash -flashduty oncall schedule get --since now --until +7d -``` - -This is the default time range for `schedule get`, so `--since` and `--until` can be omitted. - -### Find Who Will Be On Call at a Specific Time - -```bash -flashduty oncall who --since "2024-01-20T00:00:00Z" --until "2024-01-21T00:00:00Z" -``` - -Provide an explicit time range to see who covers a future window. - -## Key Concepts - -- **Future duration syntax**: The `+24h` and `+7d` values in `--since`/`--until` are relative durations added to the current time. This is unique to on-call commands. -- **`oncall who` vs `oncall schedule list`**: `oncall who` is optimized for "who is on call NOW" (shows person and shift times). `oncall schedule list` is for browsing schedules (shows ID, name, status, layer count). -- **`oncall schedule get`**: Shows full rotation detail for a single schedule, including the computed final schedule as a slots table. Requires a `schedule_id` positional argument. -- **Read-only**: Schedules are read-only in the CLI. Creation and editing are done in the Flashduty web UI. - -## Cross-References - -- **Prerequisites:** flashduty-shared (authentication, global flags, output formatting) -- **Related:** flashduty-incident (reassign incidents to the current on-call person), flashduty-admin (look up team IDs and member details) diff --git a/skills/flashduty-shared/SKILL.md b/skills/flashduty-shared/SKILL.md deleted file mode 100644 index d648e2c..0000000 --- a/skills/flashduty-shared/SKILL.md +++ /dev/null @@ -1,236 +0,0 @@ ---- -name: flashduty-shared -version: 1.0.0 -description: "Flashduty CLI foundation: authentication (login, app_key, config), the 3-layer noise reduction model (Alert Event to Alert to Incident), global flags (--output-format, --json, --no-trunc), output modes (table, JSON, TOON, vertical detail), pagination (--limit, --page), time parsing (relative, absolute, unix, future durations), reference data lookups (member, team, channel, field, escalation-rule), and safety rules. Prerequisite for all other flashduty-* skills. Use when setting up flashduty-cli, encountering auth errors, looking up IDs, or needing to understand the Flashduty data model." -metadata: - requires: - bins: ["flashduty"] - cliHelp: "flashduty --help" ---- - -# flashduty-shared - -## Overview - -Flashduty is an incident management platform. The CLI (`flashduty`) wraps the Flashduty Open API and is built with Go + Cobra. This skill is the foundation that all other `flashduty-*` skills build on. It covers the core data model, authentication, global flags, output formatting, pagination, time parsing, reference data lookups, and safety rules. - ---- - -## The 3-Layer Noise Reduction Model - -This is the **core mental model** for all Flashduty operations. Every command maps to one of these layers. - -``` -Monitoring / CI / External Systems - | - | push via integration_key - v -┌──────────────────────────────────────┐ -│ Layer 0: Alert Events (raw signals) │ -│ enrichment -> pipeline -> routing │ -└──────────────┬───────────────────────┘ - | dedup by alert_key -┌──────────────v───────────────────────┐ -│ Layer 1: Alerts (deduplicated) │ -│ drop -> grouping -> flapping │ -└──────────────┬───────────────────────┘ - | aggregate by grouping rules -┌──────────────v───────────────────────┐ -│ Layer 2: Incidents (actionable) │ -│ silence -> inhibit -> escalate │ -│ -> notify │ -└──────────────────────────────────────┘ - -Parallel: Change Events --(label correlation)--> Incidents -``` - -### Key Relationships - -- One **Alert Event** creates or merges into one **Alert** (via `alert_key`). -- One **Alert** belongs to one **Incident** (via aggregation rules). -- One **Incident** contains 1 to 5000 **Alerts**. -- **Change Events** correlate with Incidents via shared label values and time proximity (no foreign key). - -### CLI Command Mapping - -| Layer | Entity | CLI command group | -|-------|--------|-------------------| -| 0 | Alert Event | `flashduty alert-event` | -| 1 | Alert | `flashduty alert` | -| 2 | Incident | `flashduty incident` | -| -- | Change Event | `flashduty change` | - ---- - -## Authentication - -### First-Time Setup - -```bash -# Interactive login -- prompts for App Key (input is hidden) -flashduty login -``` - -The login command validates the key by making a test API call, then stores it in `~/.flashduty/config.yaml` with `0600` permissions. - -### Environment Variables - -| Variable | Purpose | -|----------|---------| -| `FLASHDUTY_APP_KEY` | Override app key (takes precedence over config file) | -| `FLASHDUTY_BASE_URL` | Override base URL (default: `https://api.flashcat.cloud`) | - -### Config Management - -```bash -# View current config (key is masked) -flashduty config show - -# Set a config value -flashduty config set app_key -flashduty config set base_url https://api.flashcat.cloud -``` - -### Resolution Order - -1. `--app-key` flag (hidden, for scripting) -2. `FLASHDUTY_APP_KEY` environment variable -3. `~/.flashduty/config.yaml` file - -If no key is found, the CLI returns: `no app key configured. Run 'flashduty login' or set FLASHDUTY_APP_KEY`. - ---- - -## Global Flags - -These flags are available on **every** command via Cobra `PersistentFlags`: - -| Flag | Type | Default | Effect | -|------|------|---------|--------| -| `--output-format` | string | `table` | Output format: `table`, `json`, or `toon` (compact, fewer tokens) | -| `--json` | bool | `false` | Output as JSON (alias for `--output-format json`) | -| `--no-trunc` | bool | `false` | Do not truncate long values in table output | -| `--app-key` | string | `""` | Override app key (hidden flag) | -| `--base-url` | string | `""` | Override base URL | - ---- - -## Output Modes - -### Table (default) - -Human-readable aligned columns. Long values are truncated with `...` unless `--no-trunc` is set. List commands append a pagination footer: - -``` -Showing 20 results (page 1, total 142). -``` - -### JSON (`--json` / `--output-format json`) - -Machine-readable full output. No truncation. Suitable for piping to `jq`. Success messages are wrapped as `{"message": "..."}`. - -### TOON (`--output-format toon`) - -Token-Oriented Object Notation — machine-readable full output, no truncation, encoded via the same path as the Flashduty MCP server. For uniform arrays (list commands) it emits the field keys once as a header instead of repeating them on every row, so list output costs materially fewer tokens than JSON. **Preferred when an LLM/agent reads the output.** Not directly `jq`-able — use `--json` when you need `jq` field selection. - -### Vertical Detail - -Used automatically for single-item lookups (e.g., `flashduty incident get ` with one ID). Displays key-value pairs vertically instead of a table row. - ---- - -## Pagination - -Most list commands support offset-based pagination: - -| Flag | Default | Description | -|------|---------|-------------| -| `--limit` | `20` | Max results per page (max 100) | -| `--page` | `1` | Page number (1-based) | - -**Constraint**: `page * limit <= 10000`. For data beyond this window, narrow the query using time filters or other criteria. - ---- - -## Time Parsing - -Time flags (`--since`, `--until`) accept multiple formats. The parser lives in `internal/timeutil/parse.go`. - -| Format | Example | Meaning | -|--------|---------|---------| -| `now` | `now` | Current time | -| Relative duration | `24h`, `30m`, `7d` | Subtract from now | -| Future duration | `+24h`, `+7d` | Add to now (used with `--until` in oncall) | -| Date | `2024-01-15` | Midnight in local timezone | -| Datetime | `2024-01-15 10:00:00` | Exact time in local timezone | -| Unix timestamp | `1705312200` | Exact time (must be > 1000000000) | - -**Notes**: -- The `d` suffix is shorthand for days (e.g., `7d` becomes `168h` internally). -- Negative durations are rejected. -- Default for `--since` is typically `24h`; default for `--until` is typically `now`. - ---- - -## Reference Data Commands - -These lookup commands find IDs required by other commands: - -```bash -# Find a person's ID -flashduty member list --name "John" -flashduty member list --email "john@example.com" - -# Find a team ID -flashduty team list --name "SRE" - -# Get full team detail by ID, name, or ref-id -flashduty team get --id 123 - -# Find a channel (collaboration space) ID -flashduty channel list --name "Production" - -# Find custom field definitions -flashduty field list --name "priority" - -# View escalation rules for a channel -flashduty escalation-rule list --channel - -# List status pages -flashduty status-page list -``` - ---- - -## Safety Rules - -**Hard constraints for AI agents operating the CLI:** - -1. **NEVER** create or close incidents without explicit user confirmation. -2. **NEVER** merge incidents or alerts without user confirmation -- merges are **irreversible**. -3. **NEVER** snooze incidents unless the user specifies a duration. -4. **NEVER** reassign or reopen incidents without user confirmation. -5. **NEVER** delete a team without explicit user confirmation -- deletion is **irreversible**. -6. **NEVER** update a team's member list without showing the current members first -- `--person-ids` replaces the entire list. -7. **Always** show what will be affected before executing destructive or mutating operations. -8. When in doubt about severity or scope, **list first, then act**. -9. Prefer **read-only** operations (`list`, `get`, `detail`) unless the user explicitly requests a mutation. -10. For bulk operations (multiple IDs), enumerate the targets and confirm before proceeding. - ---- - -## Related Skills - -All `flashduty-*` skills depend on this foundation: - -| Skill | Purpose | -|-------|---------| -| `flashduty-shared` | Foundation (this skill) | -| `flashduty-incident` | Incident lifecycle -- list, create, ack, close, merge, snooze, reassign (Layer 2) | -| `flashduty-alert` | Alert events and alerts -- list, detail, merge to incident (Layer 0 + 1) | -| `flashduty-change` | Change event tracking and trend analysis | -| `flashduty-oncall` | On-call schedules and coverage queries | -| `flashduty-channel` | Collaboration spaces and escalation rules | -| `flashduty-insight` | Analytics -- MTTA, MTTR, top-K alerts, notification trends | -| `flashduty-admin` | Teams, members, custom fields, audit logs | -| `flashduty-template` | Notification template preview and validation | diff --git a/skills/flashduty-statuspage/SKILL.md b/skills/flashduty-statuspage/SKILL.md deleted file mode 100644 index 69c4782..0000000 --- a/skills/flashduty-statuspage/SKILL.md +++ /dev/null @@ -1,267 +0,0 @@ ---- -name: flashduty-statuspage -version: 1.0.0 -description: "Flashduty status page management and one-time migration from external providers. Commands: statuspage list, changes, create-incident, create-timeline; statuspage migrate structure, email-subscribers, status, cancel. Use when publishing an incident or maintenance to a public status page, posting a timeline update, importing an existing Atlassian Statuspage (structure, history, or email subscribers) into Flashduty, or polling a migration job. Does not cover incident response workflows inside Flashduty — see flashduty-incident for that." -metadata: - requires: - bins: ["flashduty"] - cliHelp: "flashduty statuspage --help" ---- - -# flashduty-statuspage - -**CRITICAL** -- Before using this skill, read [`../flashduty-shared/SKILL.md`](../flashduty-shared/SKILL.md) for authentication, the 3-layer noise reduction model, global flags, and safety rules. - -## Overview - -Status pages are the public-facing communication layer: they let customers and internal stakeholders see current service health, ongoing incidents, and scheduled maintenance. This skill covers two distinct use cases: - -1. **Day-to-day operations** on existing Flashduty status pages: listing pages, posting incidents and maintenance windows, updating timelines. -2. **One-time migration** from an external provider (currently Atlassian Statuspage) into Flashduty: structure + history in one job, email subscribers in a separate job to control when verification emails go out. - -Migration jobs are **asynchronous**. Start a job, poll its status, cancel if needed. The CLI never blocks waiting on a long-running migration. - -## Quick Decision - -| User wants to... | Command | -|---|---| -| See existing status pages | `statuspage list` | -| See open incidents or maintenance on a page | `statuspage changes --page-id --type incident` | -| Publish a new incident | `statuspage create-incident --page-id --title "..."` | -| Update an ongoing incident | `statuspage create-timeline --page-id --change --message "..."` | -| Migrate a page from Atlassian (contents first) | `statuspage migrate structure --from atlassian ...` | -| Migrate subscribers (after structure) | `statuspage migrate email-subscribers --from atlassian ...` | -| Check migration progress | `statuspage migrate status --job-id ` | -| Stop a runaway migration | `statuspage migrate cancel --job-id ` | - -## Commands - -### statuspage list - -List all status pages visible to the account. - -```bash -flashduty statuspage list [flags] -``` - -| Flag | Type | Description | -|------|------|-------------| -| `--id` | string | Filter by page IDs (comma-separated) | - -Output columns: ID, NAME, SLUG, STATUS, COMPONENTS. - -Use this to look up page IDs for the other commands in this skill. - -### statuspage changes - -List active incidents or maintenance windows on a page. - -```bash -flashduty statuspage changes --page-id --type -``` - -| Flag | Type | Description | -|------|------|-------------| -| `--page-id` | int | Page ID (**required**) | -| `--type` | string | `incident` or `maintenance` (**required**) | - -Output columns: ID, TITLE, TYPE, STATUS, CREATED, UPDATED. - -"Active" means not yet resolved / not yet completed. Returns both incident and maintenance changes depending on `--type`. - -### statuspage create-incident - -Publish a new incident on a status page. The incident appears to subscribers and on the public page immediately. - -```bash -flashduty statuspage create-incident --page-id --title [flags] -``` - -| Flag | Type | Description | -|------|------|-------------| -| `--page-id` | int | Page ID (**required**) | -| `--title` | string | Incident title, max 255 chars (**required**) | -| `--message` | string | Initial update message | -| `--components` | string | `id1:status,id2:status` — statuses: `operational`, `degraded`, `partial_outage`, `full_outage` | -| `--notify` | bool | Notify page subscribers (default: false) | - -Use `--notify` deliberately — it sends email + push to every subscriber on the page. - -### statuspage create-timeline - -Add a timeline update to an existing incident or maintenance. Use this to move a change through its lifecycle (`investigating` → `identified` → `monitoring` → `resolved`). - -```bash -flashduty statuspage create-timeline --page-id <id> --change <id> --message <msg> [flags] -``` - -| Flag | Type | Description | -|------|------|-------------| -| `--page-id` | int | Page ID (**required**) | -| `--change` | int | Change ID (**required**) — get it from `statuspage changes` | -| `--message` | string | Timeline message (**required**) | -| `--status` | string | Incident: `investigating`, `identified`, `monitoring`, `resolved`; maintenance: `scheduled`, `ongoing`, `completed` | - -The `--status` transition determines when a change is considered resolved / completed and stops appearing in `statuspage changes`. - -### statuspage migrate structure - -Start an asynchronous migration of status page **structure and history** from an external provider into a new Flashduty status page. Components, sections, past incidents, past maintenance windows, and notification templates are imported. **No emails are sent to subscribers.** - -```bash -flashduty statuspage migrate structure --from atlassian --source-page-id <id> --api-key <key> -``` - -| Flag | Type | Description | -|------|------|-------------| -| `--from` | string | Source provider, currently only `atlassian` (**required**) | -| `--source-page-id` | string | Page ID in the source provider (**required**) | -| `--api-key` | string | Source provider API key (**required**) | -| `--url-name` | string | Optional URL name for the newly created Flashduty public page. It is normalized with the same slug rules as page creation and only applies when the source page is not already mapped. If the source page already maps to a different Flashduty URL name, the command fails instead of changing the existing page. | - -Returns a job ID plus the command to poll its status. Human output shows the new Flashduty `target_page_id` once the job reaches the `completed` phase — capture that for the subscriber migration. - -### statuspage migrate email-subscribers - -Start a **separate** migration of email subscribers from the external provider into an existing Flashduty status page. Split from `migrate structure` so that operators can verify imported content before waking up the subscriber list. - -```bash -flashduty statuspage migrate email-subscribers --from atlassian --source-page-id <id> --target-page-id <id> --api-key <key> -``` - -| Flag | Type | Description | -|------|------|-------------| -| `--from` | string | Source provider, currently only `atlassian` (**required**) | -| `--source-page-id` | string | Page ID in the source provider (**required**) | -| `--target-page-id` | int | Flashduty page ID from `migrate structure` (**required**) | -| `--api-key` | string | Source provider API key (**required**) | - -### statuspage migrate status - -Poll the progress of an async migration job. - -```bash -flashduty statuspage migrate status --job-id <id> -``` - -| Flag | Type | Description | -|------|------|-------------| -| `--job-id` | string | Migration job ID (**required**) | - -Human output prints the current phase, status, progress counters (components, sections, incidents, maintenances, subscribers, templates), and any accumulated warnings or a fatal error. Poll until `Status: completed`, `failed`, or `cancelled`. - -### statuspage migrate cancel - -Request cancellation of an in-flight migration job. Best-effort: jobs in their final phase may still complete. - -```bash -flashduty statuspage migrate cancel --job-id <id> -``` - -| Flag | Type | Description | -|------|------|-------------| -| `--job-id` | string | Migration job ID (**required**) | - -Returns a confirmation and the command to poll the final state. - -## Workflows - -### Workflow 1: Publish and Manage an Incident - -Post a new incident, move it through investigation, resolve it. - -```bash -# 1. Find the page ID -flashduty statuspage list - -# 2. Create the incident (no notifications yet) -flashduty statuspage create-incident \ - --page-id 42 \ - --title "Database latency elevated" \ - --message "We're investigating reports of slow database queries" \ - --components "comp_1:degraded" - -# 3. See the change ID for follow-up updates -flashduty statuspage changes --page-id 42 --type incident - -# 4. Post updates as the incident progresses -flashduty statuspage create-timeline \ - --page-id 42 --change 101 \ - --status identified \ - --message "Root cause: a runaway query. Rolling back now." - -flashduty statuspage create-timeline \ - --page-id 42 --change 101 \ - --status monitoring \ - --message "Rollback complete. Monitoring for recurrence." - -# 5. Resolve -flashduty statuspage create-timeline \ - --page-id 42 --change 101 \ - --status resolved \ - --message "Latency back to baseline. Closing." -``` - -### Workflow 2: Full Atlassian → Flashduty Migration - -Import structure first, verify, then import subscribers. - -```bash -# 1. Start the structure + history migration -flashduty statuspage migrate structure \ - --from atlassian \ - --source-page-id page_atl_123 \ - --url-name customer-facing-status \ - --api-key "$ATLASSIAN_STATUSPAGE_API_KEY" -# → captures Job ID: str_abc - -# 2. Poll until completed -flashduty statuspage migrate status --job-id str_abc -# Repeat until Status: completed. -# Capture Target page ID from the completed job's output. - -# 3. Sanity-check the imported page -flashduty statuspage list --id <new_page_id> -flashduty statuspage changes --page-id <new_page_id> --type incident - -# 4. ONLY AFTER VERIFYING: start subscriber migration -flashduty statuspage migrate email-subscribers \ - --from atlassian \ - --source-page-id page_atl_123 \ - --target-page-id <new_page_id> \ - --api-key "$ATLASSIAN_STATUSPAGE_API_KEY" -# → captures Job ID: sub_xyz - -# 5. Poll until completed -flashduty statuspage migrate status --job-id sub_xyz -``` - -### Workflow 3: Stop a Runaway Migration - -```bash -# Saw unexpected warnings or want to retry with a different config -flashduty statuspage migrate cancel --job-id str_abc - -# Confirm it reached a terminal state -flashduty statuspage migrate status --job-id str_abc -# Status should become cancelled. -``` - -## Key Concepts - -- **Page ID** (int) is the Flashduty status page primary key. **Change ID** (int) is the ID of an incident/maintenance within a page. Don't confuse them. -- **Migration is async.** `migrate structure` and `migrate email-subscribers` return immediately with a job ID; the actual work happens on the backend. -- **Two migration jobs, not one.** Structure + history run separately from subscribers. This is deliberate — subscriber import triggers verification emails, so operators verify content first. -- **`--url-name` is create-only.** Use it to choose the public URL slug for a newly created Flashduty page. It does not rename an existing mapped target page; if the Atlassian page has already been migrated to another URL name, retry without `--url-name` or use the mapped page. -- **Migration phases** for the structure job progress in order: `components` → `sections` → `history` (incidents + maintenances) → `templates`. The subscribers job has a single `subscribers` phase. -- **Terminal statuses:** `completed`, `failed`, `cancelled`. Stop polling once any of these appears. -- **`--notify` is subscriber-visible.** In `create-incident`, omit or set `--notify=false` for silent incidents; set `--notify` when you want an announcement. -- **Component statuses vary by change type.** Incident statuses: `operational`, `degraded`, `partial_outage`, `full_outage`. Maintenance statuses: `operational`, `under_maintenance`. -- **Source provider support** is currently Atlassian Statuspage only. Other providers will require SDK + CLI updates. - -## Cross-References - -- **Prerequisites:** `flashduty-shared` — authentication, global flags (`--json`, `--no-trunc`), and safety rules. -- **Related skills:** - - `flashduty-incident` — internal Flashduty incident response (distinct from public status page incidents). - - `flashduty-channel` — channels are how alerts are routed internally; status pages publish to customers. They can be wired together but are independent concepts. diff --git a/skills/flashduty-template/SKILL.md b/skills/flashduty-template/SKILL.md deleted file mode 100644 index 6702708..0000000 --- a/skills/flashduty-template/SKILL.md +++ /dev/null @@ -1,182 +0,0 @@ ---- -name: flashduty-template -version: 1.0.0 -description: "Flashduty notification template management: get preset templates, validate and preview custom templates, explore available variables and functions. Commands: template get-preset, validate, variables, functions. Use when customizing incident notification rendering, testing template changes, debugging template errors, or exploring available template variables and Sprig/custom functions." -metadata: - requires: - bins: ["flashduty"] - cliHelp: "flashduty template --help" ---- - -# flashduty-template - -**CRITICAL** -- Before using this skill, read [`../flashduty-shared/SKILL.md`](../flashduty-shared/SKILL.md) for authentication, the 3-layer noise reduction model, global flags, and safety rules. - -## Overview - -Templates control how incident notifications are rendered across different channels (Slack, email, SMS, etc.). This skill covers retrieving preset templates, validating custom templates, and exploring the available template variables and functions. - -## Commands - -### template get-preset - -Get the default/preset template for a specific notification channel. Useful as a starting point for customization. - -```bash -flashduty template get-preset --channel <notification_channel> -``` - -| Flag | Type | Default | Description | -|------|------|---------|-------------| -| `--channel` | string | | Notification channel (**required**). Values include various Flashduty notification channels (e.g. Slack, email, SMS). | - -Example: -```bash -# Get the preset Slack notification template -flashduty template get-preset --channel slack -``` - -### template validate - -Validate a template file and preview the rendered output. Reports validation status, errors, warnings, rendered size vs channel limit, and a preview of the rendered notification. - -```bash -flashduty template validate --channel <channel> --file <path> [--incident <id>] -``` - -| Flag | Type | Default | Description | -|------|------|---------|-------------| -| `--channel` | string | | Notification channel (**required**) | -| `--file` | string | | Path to template file on local filesystem (**required**) | -| `--incident` | string | | Real incident ID for preview (optional; uses mock data if omitted) | - -Examples: -```bash -# Validate with mock data -flashduty template validate --channel slack --file ./my-template.tpl - -# Validate with a real incident for realistic preview -flashduty template validate --channel slack --file ./my-template.tpl --incident abc123 -``` - -### template variables - -List all available template variables with name, type, description, and example values. - -```bash -flashduty template variables [--category <category>] -``` - -| Flag | Type | Default | Description | -|------|------|---------|-------------| -| `--category` | string | | Filter by category: `core`, `time`, `people`, `alerts`, `labels`, `context`, `notification`, `post_incident` | - -Examples: -```bash -# List all variables -flashduty template variables - -# List only core incident variables -flashduty template variables --category core - -# List alert-related variables -flashduty template variables --category alerts -``` - -### template functions - -List available template functions with name, syntax, and description. Includes both Flashduty custom functions and Sprig template functions. - -```bash -flashduty template functions [--type <type>] -``` - -| Flag | Type | Default | Description | -|------|------|---------|-------------| -| `--type` | string | `all` | Filter by type: `custom`, `sprig`, or `all` | - -Examples: -```bash -# List all functions -flashduty template functions - -# List only Flashduty custom functions -flashduty template functions --type custom - -# List only Sprig functions -flashduty template functions --type sprig -``` - -## Workflows - -### Workflow 1: Customize a Notification Template - -Start from a preset, modify it, and validate the result. - -```bash -# 1. Get the preset template as a starting point -flashduty template get-preset --channel slack - -# 2. Save the output to a file and edit it -# (redirect output or copy-paste into ./my-template.tpl) - -# 3. Validate your changes with mock data -flashduty template validate --channel slack --file ./my-template.tpl - -# 4. Test with a real incident for a realistic preview -flashduty template validate --channel slack --file ./my-template.tpl --incident <incident_id> -``` - -### Workflow 2: Explore Available Template Data - -Discover what variables and functions are available for use in templates. - -```bash -# 1. See all variable categories -flashduty template variables - -# 2. Focus on specific data areas -flashduty template variables --category core -flashduty template variables --category alerts -flashduty template variables --category time - -# 3. See available functions -flashduty template functions --type custom -flashduty template functions --type sprig -``` - -### Workflow 3: Debug a Template Rendering Issue - -Diagnose why a template is not rendering as expected. - -```bash -# 1. Check for syntax errors and validation issues -flashduty template validate --channel email --file ./broken-template.tpl - -# 2. Review errors and warnings in the output -# - Syntax errors point to the exact location of the problem -# - Warnings highlight deprecated variables or risky patterns - -# 3. Check rendered size vs channel limit -# - Each channel enforces a maximum rendered size -# - The validate command reports the current size and the limit - -# 4. Look up variables used in the template -flashduty template variables --category core -``` - -## Key Concepts - -- Templates use **Go template syntax** (similar to Jinja2/Mustache but with `{{ }}` delimiters and pipeline operators). -- Each notification channel has its own template format and **size limits** -- validate always reports size vs limit. -- **Sprig functions** (string manipulation, date formatting, math, etc.) are available out of the box. -- Flashduty adds **custom functions** on top of Sprig for domain-specific operations. -- Validation checks **syntax correctness**, **size limits**, and **variable existence**. -- Template CRUD (create, update, delete) is planned for a future CLI release -- currently templates are managed via the Flashduty web UI. - -## Cross-References - -- **Prerequisites:** `flashduty-shared` -- authentication, global flags (`--json`, `--api-key`, `--api-host`). -- **Related skills:** - - `flashduty-incident` -- templates render incident data; use to find real incident IDs for preview. - - `flashduty-channel` -- templates are used within escalation notification channels. diff --git a/skills/flashduty/SKILL.md b/skills/flashduty/SKILL.md new file mode 100644 index 0000000..093b655 --- /dev/null +++ b/skills/flashduty/SKILL.md @@ -0,0 +1,73 @@ +--- +name: flashduty +version: "3.0" +description: "USE FIRST for Flashduty tasks — status pages, incidents, alerts, on-call, monitors, RUM, members. `fduty` CLI = the whole API. ALWAYS load this skill + read reference/<domain>.md for exact verbs & flags BEFORE running fduty. Don't guess or --help-dance." +allowed-tools: bash, read +hidden: true # internal-only: withheld from skills.sh public discovery (Safari embeds this skill directly). +--- + +# Flashduty CLI + +`fduty` is your interface to Flashduty — invoke it from `bash`. This SKILL.md is a **router**: shared model + conventions below, then a domain index. For a real task, **read the one `reference/<domain>.md` card first** — it carries every command, flag, enum, and the worked flow for that domain, so you operate without `--help` trial-and-error or guessing command names. + +## Auth & availability + +- **Auth.** Set your Flashduty app key once — `export FLASHDUTY_APP_KEY=<key>` — or pass `--app-key <key>` per call. Then just call the verb. +- **No curl for the API.** The CLI is the only supported path to Flashduty — never hand-roll an HTTP call. +- **If `fduty: command not found`** (rare — it is normally on PATH at startup): install from the Flashduty CDN into a user-writable dir (no sudo, no hang), then tell the user — don't work around it: `curl -sSL https://static.flashcat.cloud/flashduty-cli/install.sh | FLASHDUTY_INSTALL_DIR="$HOME/.local/bin" INSTALLED_NAME=fduty sh && export PATH="$HOME/.local/bin:$PATH"`. + +## Data model — 3 layers + +`Alert Event` (raw signal) → `Alert` (deduplicated by `alert_key`) → `Incident` (actionable; 1–5000 alerts). `Change` events correlate to incidents by shared labels + time proximity (no foreign key). Command groups map to these layers: `alert-event`, `alert`, `incident`, `change`. + +## Output — prefer `toon` + +Append `--output-format toon` to read commands: it drops the per-row repeated keys that JSON emits, so lists cost far fewer tokens. Use `--json` only to pipe into `jq`. Bare output is a human table — don't parse it. + +**Empty result = authoritative not-found.** A filter returning `[]` means no such entity in scope — report it (optionally the 1–2 closest names) and stop. Do **not** brute-force (no shifted-keyword re-queries, no widening past caps, no full-dump grep). Never infer "feature not enabled" from an empty list, and never fabricate data absent from tool output. + +**A result you did not fetch is "unknown", never "empty".** You may report a command's result — including "returned empty" or any count/list/finding — **only if that exact command appears in your tool-call history this turn**. If you did not run it, the honest answer is "未查询 / not queried", followed by the command to run. Writing "`incident similar` 返回空" or "无变更" for a command you never executed is fabrication, not a summary. + +## Command names — don't guess, read the card + +The hot path: **read the domain card** (index below) for the exact verb + flags. Command groups are hyphenated (`status-page`, `alert-event`), not concatenated (`statuspage`) — guessing the wrong form costs a failed call. For a command outside the cards, derive it from its API path: **group = first path segment, verb = the rest joined by `-`** (`POST /status-page/change/create` → `fduty status-page change-create`), then confirm with `fduty <group> <verb> --help`. Pass nested-object / array fields as JSON via `--data '{...}'`; typed scalar flags override matching `--data` keys. + +**Positional arguments.** A card heading like `### change-create <page-id>` means that id is **positional** — pass it as the first bare argument (`change-create 5759… --type incident`), not as `--page-id`. A heading with no `<…>` takes all inputs as flags. Cards mark each positional with a `(positional, required)` row; trust the heading over your instinct to use a flag. + +## fduty answers directly — don't grep or browse + +Configuration, permission-model, enrichment, monitor, and on-call questions are answered by `fduty` itself (the cards + the live commands). Do **not** grep external documentation or browse the web for something the CLI covers — that usually returns staler information than the live API. Read the card, run the verb. + +## Safety — confirm before mutating + +Read verbs (`list`, `get`, `info`, `detail`, `timeline`) are free. Mutating verbs (`create`, `update`, `delete`, `merge`, `ack`, `close`, `assign`, `move`, …) change state — recommend the action and get explicit per-target confirmation first. `merge` / `delete` are **irreversible** — double-check IDs. `create` notifies responders/subscribers. `list` before any bulk mutate to confirm the IDs. + +## Compound flows — bundled scripts + +Some asks span several commands. For those the skill ships a script that fetches everything in one call — run it as your **first action** for that ask, rather than hand-picking commands and writing the rest from memory: + +- **Full incident fault analysis** (详情 + 关联告警 + 变更 + 时间线 + 相似故障 + 复盘 / detail + alerts + changes + timeline + similar + post-mortems): `bash scripts/incident-summary.sh <incident-id>` — runs all six reads and prints them in one block, so the summary is written from real output. See `reference/incident.md`. + +## Domain index — read the card for the task + +| intent / 意图 (terms route in either language) | card | +|---|---| +| incident / fault / 故障 / 事件 / triage 分诊 / acknowledge 认领 / merge 合并 / escalate 升级 / postmortem 复盘 / **summarize or analyze an incident 故障汇总分析** | **`reference/incident.md`** | +| alert / 告警 / dedup 去重 / alert fields 告警字段 / alert pipeline 告警管道 | **`reference/alert.md`** | +| change / 变更 / deployment 部署 / release 发布 / correlated change 变更关联 / what changed | **`reference/change.md`** | +| monitor / 监控 / alert rule 告警规则 / datasource 数据源 / inspection 巡检 / rule config 规则配置 | **`reference/monit.md`** | +| metric/log query / 指标查询 / 日志查询 / PromQL / LogsQL / SQL / trend 趋势 / log clustering 日志聚类 / datasource RCA 数据源排查 | **`reference/monit-query.md`** | +| host diagnostics / 主机诊断 / on-box / process 进程 / load 负载 / lock 锁 / slow query 慢查询 / mysql / reachability 可达性 | **`reference/monit-agent.md`** | +| channel / 协作空间 / collaboration space / 频道 / integration 集成 / dispatch rule 分派规则 / escalation 升级规则 / noise reduction 降噪 / silence 静默 / inhibit 抑制 | **`reference/channel.md`** | +| enrichment / 数据加工 / 富化 / label mapping 字段映射 / extraction 提取 / mapping schema 集成 schema | **`reference/enrichment.md`** | +| insight / 洞察 / stats 统计 / trend 趋势 / MTTA / MTTR / top alerts Top 告警 / incident export 故障导出 | **`reference/insight.md`** | +| schedule / on-call / 值班 / 排班 / rotation 轮值 / who is on call 谁在值班 / shift 班次 / next responder 下一班 | **`reference/schedule.md`** | +| calendar / 日历 / on-call calendar 值班日历 / calendar event 日历事件 / holiday 休假 | **`reference/calendar.md`** | +| template / 通知模板 / message template 消息模板 / card template 卡片模板 | **`reference/template.md`** | +| role / 角色 / permission 权限 / RBAC / permission factor 权限因子 | **`reference/role.md`** | +| team / 团队 / team membership 团队成员归属 / HR sync HR 同步 | **`reference/team.md`** | +| member / 成员 / person 人员 / invite 邀请 / grant role 角色授予 | **`reference/member.md`** | +| custom field / 自定义字段 / field option 字段选项 | **`reference/field.md`** | +| route / 分派路由 / alert routing 告警路由 / integration routing 集成路由 / routing case 路由用例 | **`reference/route.md`** | +| RUM / real user monitoring / 真实用户监控 / frontend 前端 / application 应用 / issue | **`reference/rum.md`** | +| status page / 状态页 / public incident 公开事件 / public timeline 公开时间线 / maintenance window 维护窗口 / subscriber 订阅者 | **`reference/status-page.md`** | diff --git a/skills/flashduty/reference/alert.md b/skills/flashduty/reference/alert.md new file mode 100644 index 0000000..569573b --- /dev/null +++ b/skills/flashduty/reference/alert.md @@ -0,0 +1,143 @@ +# fduty alert — command card + +Prereq: `SKILL.md` read. Read verbs are free. `merge` is **irreversible** (alerts cannot be un-merged). `pipeline-upsert` **replaces** the full pipeline config. Confirm IDs before either. + +## Route here when + +"告警 / 告警事件 / 去重 / 合并到故障 / 告警流水线 / alert noise / dedup / severity filter / alert pipeline" → **alert**, NOT `incident` (incident = the actionable layer above alerts). Key ID: **`alert_id` (ObjectID hex string)** — get it from `alert list` output or from `incident alerts <incident-id>` (incident domain). Pipeline verbs need an **`integration_id` (int)** from the channel/integration domain. + +## Intent → verb + +| want | verb | +|---|---| +| active / recovered / muted alerts in a time window | `list` | +| full detail of one alert | `get` | +| full detail of one alert (alternate path) | `info` | +| raw events deduplicated into one alert | `events` | +| raw events for one alert (alternate) | `event-list` | +| alert state-transition history | `feed` | +| alert state-transition history (alternate) | `timeline` | +| fetch multiple alerts by ID list | `list-by-ids` | +| reassign alerts to a different incident | `merge` | +| get processing pipeline for an integration | `pipeline-info` | +| get pipelines for multiple integrations | `pipeline-list` | +| create or replace a processing pipeline | `pipeline-upsert` | + +## Hot flow — investigate an incident's root alerts + +```bash +# 1. list contributing alerts (from the incident domain) +fduty incident alerts <incident-id> --output-format toon +# 2. inspect the worst alert +fduty alert get <alert-id> --output-format toon +# 3. trace raw events deduplicated into that alert +fduty alert events <alert-id> --output-format toon +# 4. view state transitions (mute/severity changes/operator actions) +fduty alert feed <alert-id> --output-format toon +``` + +## Hot flow — merge noisy alerts into an existing incident + +```bash +# 1. find active critical alerts in the last 4 hours +fduty alert list --severity Critical --active --since 4h --output-format toon +# 2. merge (IRREVERSIBLE) — alert IDs are POSITIONAL; --incident-id is a flag +fduty alert merge <alert-id1> <alert-id2> --incident-id <incident-id> --comment "Related disk alerts" +``` + +<!-- GENERATED:alert START · 由 fduty __dump-commands 同步 · 勿手改 fence 内 --> + +### event-list <alert-id> +List events for an alert +- `<alert-id>` (positional, required) string — Alert ID (ObjectID hex string). + +### events <alert_id> +List alert events + +### feed <alert-id> +List alert activity feed +- `<alert-id>` (positional, required) string — Alert ID. +- `--asc` bool — Sort ascending. +- `--limit` int64 — Page size, max 100, default 20. +- `--page` int64 — Page number, starting at 1. +- `--search-after-ctx` string +- `--types` stringSlice — Filter by feed types. + +### get <alert_id> +Get alert detail + +### info <alert-id> +Get alert detail +- `<alert-id>` (positional, required) string — Alert ID (ObjectID hex string). + +### list +List alerts +- `--active` bool +- `--channel` string +- `--limit` int +- `--muted` bool +- `--page` int +- `--recovered` bool +- `--severity` string +- `--since` string +- `--until` string + +### list-by-ids <alert-id> [<id2>...] +List alerts by IDs +- `<alert-ids>` (positional, required) stringSlice — List of alert IDs (ObjectID hex strings). + +### merge <alert-id> [<id2>...] +Merge alerts into an incident +- `<alert-ids>` (positional, required) stringSlice — Alert IDs to merge. +- `--comment` string — Optional comment on the merge action. +- `--incident-id` string (required) — Target incident ID. +- `--owner-id` int64 — Optional new owner for the target incident. +- `--title` string — Optional new title for the target incident. + +### pipeline-info <integration-id> +Get alert pipeline +- `<integration-id>` (positional, required) int64 — Integration ID. + +### pipeline-list <integration-id> [<id2>...] +List alert pipelines +- `<integration-ids>` (positional, required) intSlice — Integration IDs. + +### pipeline-upsert <integration-id> +Create or update alert pipeline +- `<integration-id>` (positional, required) int64 — Integration ID to configure. +- body-only (`--data`): rules (array<object>) (required) + +### timeline <alert_id> +View alert timeline +- `--limit` int +- `--page` int + +<!-- GENERATED:alert END --> + +## Alert status values + +- **`alert_severity`** / **`alert_status`**: `Critical` · `Warning` · `Info` · `Ok` +- An alert is **active** if no recovery signal has been received; **recovered** once a recovery fires or it is manually resolved. +- `--active` and `--recovered` on `list` are mutually exclusive — passing both errors. + +## Pipeline rule kinds + +`pipeline-upsert` replaces the whole pipeline; `rules[].kind` values: `title_reset` · `description_reset` · `severity_reset` · `alert_drop` · `alert_inhibit`. The `rules` array has no typed flag — pass it via `--data '{"rules":[...]}'`. The call is idempotent (upsert), so re-running with the same body is safe. + +## Gotchas + +- **All alert verbs are positional except `list` and the two-ID `merge` flag.** Every verb with `<alert-id>` in its `use` form takes that ID as the first bare argument — do NOT pass `--alert-id`. The single exception: `merge` takes the first alert ID positionally AND requires `--incident-id` as a flag (two different IDs, different roles). +- **`alert get` vs `alert info`, `alert events` vs `alert-event list`:** both pairs exist; prefer `get`/`events` (shorter, no extra flag); `info`/`event-list` accept `--alert-id` as a flag override for scripting. +- **No server-side title filter on `list`.** To search by title, use `--json` and pipe to `jq`: `fduty alert list --json | jq '.[] | select(.title | test("disk";"i"))'` +- **`list` time window cap is 31 days**; `--limit` max is 100. For broader queries use `insight` domain. +- **`pipeline-upsert` fully replaces** the existing pipeline — always fetch current config with `pipeline-info` first and include unchanged rules in the new body. +- **Empty `list` result is authoritative** — report "no alerts match" and stop; do not widen filters or retry with alternate keywords. + +## Worked example + +```bash +# Find active Critical alerts in a specific channel and view the noisiest one +fduty alert list --severity Critical --active --channel 98765 --since 2h --output-format toon +fduty alert get <alert-id> --output-format toon +fduty alert events <alert-id> --output-format toon +``` diff --git a/skills/flashduty/reference/calendar.md b/skills/flashduty/reference/calendar.md new file mode 100644 index 0000000..1492516 --- /dev/null +++ b/skills/flashduty/reference/calendar.md @@ -0,0 +1,144 @@ +# fduty calendar — command card + +Prereq: `SKILL.md` read. **`delete` is irreversible** (calendar + all its events gone). `event-delete` is irreversible per event. Reads are free; confirm IDs before any delete. + +## Route here when + +"服务日历 / 工作日 / 非工作日 / 节假日 / 补班 / 值班日历" → **calendar**, NOT `oncall` (oncall = who is on call in a schedule; calendar = custom working/non-working day definitions used by oncall rules). + +You need a **`cal_id`** (string, format `cal.<uuid>`). Get it from `calendar list`. For events you also need **`event_id`** (returned by `event-upsert`; list with `event-list`). + +Public holiday calendars (e.g. `zh-cn.china.official`) are read-only — list them with `--kind region.official.holiday`, then inherit them into a personal calendar via `--extra-cal-ids`. + +## Intent → verb + +| want | verb | +|---|---| +| list personal calendars | `list` | +| browse public-holiday calendars | `list --kind region.official.holiday` | +| calendar details (config) | `info <cal-id>` | +| create a new personal calendar | `create` | +| rename / change workdays / inherit holidays | `update <cal-id>` | +| delete a calendar (irreversible) | `delete <cal-id>` | +| list events in a calendar | `event-list <cal-id>` | +| add or edit a working/non-working day override | `event-upsert <cal-id>` | +| remove a calendar event (irreversible) | `event-delete` | + +## Hot flow — create a calendar with holiday inheritance + +```bash +# 1. Find available public-holiday cal IDs for your locale +fduty calendar list --kind region.official.holiday --output-format toon + +# 2. Create a personal calendar that inherits CN public holidays, Mon–Fri workdays +fduty calendar create --cal-name "Ops Workdays" \ + --timezone Asia/Shanghai \ + --workdays 1,2,3,4,5 \ + --extra-cal-ids zh-cn.china.official +# → returns cal_id; save it + +# 3. Mark a make-up workday (補班) on a Saturday +fduty calendar event-upsert <cal-id> --summary "補班 (New Year)" \ + --start-at 2026-01-17 --end-at 2026-01-18 --is-off false + +# 4. Mark a custom holiday (non-working) +fduty calendar event-upsert <cal-id> --summary "Team offsite" \ + --start-at 2026-03-20 --end-at 2026-03-22 --is-off true +``` + +## Hot flow — audit & clean up events for a month + +```bash +# List all events in January 2026 +fduty calendar event-list <cal-id> --year 2026 --month 1 --output-format toon + +# Delete a specific event (get event_id from the list above) +fduty calendar event-delete --cal-id <cal-id> --event-id <event-id> +``` + +<!-- GENERATED:calendar START · 由 fduty __dump-commands 同步 · 勿手改 fence 内 --> + +### create +Create calendar +- `--cal-name` string (required) — Calendar display name. (1-39 chars) +- `--description` string — Calendar description. (≤499 chars) +- `--extra-cal-ids` stringSlice — Additional public-holiday calendar IDs to inherit events from (for example zh-cn.china.official). +- `--team-id` int64 — Owning team ID. 0 means no team. +- `--timezone` string — IANA timezone. Defaults to Asia/Shanghai when empty. +- `--workdays` intSlice — Workday numbers (0 = Sunday, 6 = Saturday). + +### delete <cal-id> +Delete calendar +- `<cal-id>` (positional, required) string — Calendar ID. + +### event-delete +Delete calendar event +- `--cal-id` string (required) — Calendar ID. +- `--event-id` string (required) — Event ID. + +### event-list <cal-id> +List calendar events +- `<cal-id>` (positional, required) string — Calendar ID. +- `--day` int64 — Day (1-31). 0 means no day filter. (0-31) +- `--month` int64 — Month (1-12). 0 means no month filter. (0-12) +- `--year` int64 — Year. Defaults to the current year when omitted. (min 2023) + +### event-upsert <cal-id> +Upsert calendar event +- `<cal-id>` (positional, required) string — Calendar ID. +- `--description` string — Event description. (≤499 chars) +- `--end-at` string (required) — Event end date in YYYY-MM-DD (exclusive). +- `--event-id` string — Event ID. Omit when creating. (≤63 chars) +- `--is-off` bool (required) — Whether the event marks a non-working day. true = day off, false = working day override. +- `--start-at` string (required) — Event start date in YYYY-MM-DD. +- `--summary` string (required) — Event summary. (1-39 chars) + +### info <cal-id> +Get calendar info +- `<cal-id>` (positional, required) string — Calendar ID. + +### list +List calendars +- `--kind` string — Calendar kind filter. Defaults to personal when empty. · enum: region.official.holiday | personal +- `--no-locale` bool — Disable locale filtering when listing public-holiday calendars. + +### update <cal-id> +Update calendar +- `<cal-id>` (positional, required) string — Calendar ID. +- `--cal-name` string — New calendar name. (1-39 chars) +- `--description` string — New description. (≤499 chars) +- `--extra-cal-ids` stringSlice — Additional public-holiday calendar IDs to inherit events from. +- `--team-id` int64 — New owning team ID. +- `--timezone` string — New IANA timezone. +- `--workdays` intSlice — Workday numbers (0 = Sunday, 6 = Saturday). + +<!-- GENERATED:calendar END --> + +## Key concepts + +- **`is-off` (bool, required on event-upsert):** `true` = mark as non-working day (holiday/closure); `false` = override to working day (make-up workday / 補班). This is the only enum-like field — it must be explicit; the server rejects a missing value. +- **`end-at` is exclusive:** a single-day event on 2026-01-17 needs `--start-at 2026-01-17 --end-at 2026-01-18`. +- **`workdays` integers:** 0 = Sunday, 1 = Monday … 6 = Saturday. Standard Mon–Fri = `1,2,3,4,5`. +- **Calendar kinds:** `personal` (editable, default filter) vs `region.official.holiday` (read-only, browsable). The returned `kind` field can also be `religion.holiday`. +- **Account cap:** max 5 personal calendars per account by default. + +## Gotchas + +- **`cal-id` is POSITIONAL on `info`, `update`, `delete`, `event-list`, `event-upsert`** — pass it as the first bare argument: `fduty calendar info <cal-id>`. On `event-delete` both `--cal-id` and `--event-id` are flags (no positional — `use` is bare `event-delete`). +- **`event-upsert` creates OR updates** — omit `--event-id` to create; supply it to edit an existing event. The returned `event_id` is what to save for future edits or deletes. +- **`list` defaults to `--kind personal`** — you will NOT see public-holiday calendars unless you pass `--kind region.official.holiday`. Add `--no-locale` to see all locales, not just yours. +- **`delete` removes the calendar and ALL its events** — confirm `cal_id` with `list` first; irreversible. +- **`extra-cal-ids` on update is a full replacement list** — pass ALL desired public-holiday IDs, not just the new one; omitting an ID removes it. + +## Worked example + +Mark the Spring Festival week (2026) as non-working in a personal calendar: + +```bash +fduty calendar event-upsert cal.abc123 \ + --summary "Spring Festival" \ + --start-at 2026-01-28 --end-at 2026-02-04 \ + --is-off true \ + --description "Golden Week — office closed." +# Returns event_id for future edits. +``` diff --git a/skills/flashduty/reference/change.md b/skills/flashduty/reference/change.md new file mode 100644 index 0000000..0051909 --- /dev/null +++ b/skills/flashduty/reference/change.md @@ -0,0 +1,54 @@ +# fduty change — command card + +Prereq: `SKILL.md` read. One read-only verb. Change events are the "what changed" signal you correlate against an incident during fault analysis. + +## Route here when + +"变更 / 变更事件 / 变更关联 / 发布 / 部署 / change / change event / deployment / release / what changed / correlated change" → **change**. Change events carry `labels`; they correlate to incidents by **shared labels + time proximity** (there is no foreign key). For *status-page* maintenance/incident events use `status-page` instead — that is a different "change". + +## Intent → verb + +| want | verb | +|---|---| +| list recent change events (filter by window / channel / integration / keyword) | `list` | + +## Hot flow — find changes around an incident + +```bash +# Pull recent changes in the incident's window, then eyeball label/time overlap with the incident. +fduty change list --since 24h --output-format toon + +# Narrow by the integration or channel that emitted them, or by a keyword: +fduty change list --since 48h --integration <integration-id> --query "deploy" --output-format toon +``` + +<!-- GENERATED:change START · 由 fduty __dump-commands 同步 · 勿手改 fence 内 --> + +### list +List changes +- `--channel` string +- `--integration` string +- `--limit` int +- `--page` int +- `--query` string +- `--since` string +- `--until` string + +<!-- GENERATED:change END --> + +## Key concepts + +- **Correlation is heuristic, not relational.** A change is "related" to an incident when their `labels` overlap and their timestamps are close — there is no `incident_id` on a change. Judge the overlap yourself; do not claim a causal link the data doesn't support. +- **`--integration` / `--channel`** scope to the source that emitted the change; **`--since` / `--until`** bound the window (relative like `24h`, `-1h`, `now`, or Unix seconds). + +## Gotchas + +- **List-only domain.** There is no `change get` / `change detail` verb — `list` (with filters) is the whole surface. Don't guess a detail verb. +- **Empty result is authoritative** — no changes in that window/scope. Report it; don't widen blindly or invent a change to explain the incident. + +## Worked example + +```bash +# Changes in the last 6h on a specific integration, newest first +fduty change list --since 6h --integration 5759613685214 --output-format toon +``` diff --git a/skills/flashduty/reference/channel.md b/skills/flashduty/reference/channel.md new file mode 100644 index 0000000..716bc84 --- /dev/null +++ b/skills/flashduty/reference/channel.md @@ -0,0 +1,306 @@ +# fduty channel — command card + +Prereq: `SKILL.md` read. **SKILL.md + this card = full competence on channels — no `--help` needed.** Read verbs are free; `create`, `update`, `delete`, `escalate-rule-create/update/delete`, `inhibit-rule-*`, `silence-rule-*`, `unsubscribe-rule-*` all mutate state — confirm before acting. `delete` is **irreversible**. + +## Route here when + +"协作空间 / 频道 / 渠道 / 告警分组 / 降噪 / 静默 / 抑制 / 丢弃 / 升级策略 / 告警收敛 / channel / collaboration space / escalation rule / silence / inhibit / drop rule" → **channel**, NOT `incident` (incidents live _inside_ a channel) or `alert` (alerts are routed _into_ a channel). **`协作空间` (collaboration space) IS the `channel` API noun** — a naive translation would be "频道", but Flashduty's product surfaces it as 协作空间. Key IDs: **`channel-id` (int)** from `channel list`; **`rule-id` (MongoDB ObjectID string)** from `escalate-rule-list`, `inhibit-rule-list`, `silence-rule-list`, `unsubscribe-rule-list`. + +## Intent → verb + +| want | verb | +|---|---| +| list all channels (with team/name filter) | `list` | +| channel detail | `info <channel-id>` | +| batch fetch channels | `infos <channel-id> [id2 ...]` | +| create a channel | `create` | +| rename / reconfigure a channel | `update <channel-id>` | +| disable / re-enable a channel | `disable <channel-id>` / `enable <channel-id>` | +| delete a channel | `delete <channel-id>` | +| list escalation rules | `escalate-rule-list <channel-id>` | +| escalation rule detail | `escalate-rule-info` | +| add escalation rule | `escalate-rule-create` | +| edit escalation rule | `escalate-rule-update` | +| toggle escalation rule | `escalate-rule-enable` / `escalate-rule-disable` | +| remove escalation rule | `escalate-rule-delete` | +| list / create / update / toggle / delete inhibit rules | `inhibit-rule-list <channel-id>` / `inhibit-rule-create <channel-id>` / `inhibit-rule-update` / `inhibit-rule-enable` / `inhibit-rule-disable` / `inhibit-rule-delete` | +| list / create / update / toggle / delete silence rules | `silence-rule-list <channel-id>` / `silence-rule-create <channel-id>` / `silence-rule-update` / `silence-rule-enable` / `silence-rule-disable` / `silence-rule-delete` | +| list / create / update / toggle / delete drop (unsubscribe) rules | `unsubscribe-rule-list <channel-id>` / `unsubscribe-rule-create <channel-id>` / `unsubscribe-rule-update` / `unsubscribe-rule-enable` / `unsubscribe-rule-disable` / `unsubscribe-rule-delete` | + +## Hot flow — create channel + add escalation rule + +```bash +# 1. find owning team-id (from `fduty team list --output-format toon`) +fduty channel list --output-format toon +# 2. create the channel (no positional; --channel-name and --team-id are required) +fduty channel create --channel-name "production-api" --team-id <team-id> \ + --auto-resolve-timeout 3600 --auto-resolve-mode trigger +# → returns channel_id; use it below + +# 3. add an escalation rule (all flags; layers is required via --data) +fduty channel escalate-rule-create \ + --channel-id <channel-id> --rule-name "P1 on-call" --template-id <template-id> \ + --data '{"layers":[{"target":{"person_ids":[<member-id>],"by":{"critical":["voice","sms"],"warning":["feishu"]}},"notify_step":5,"max_times":3,"escalate_window":30}]}' +``` + +## Hot flow — add a silence rule during maintenance + +```bash +# channel-id is POSITIONAL on silence-rule-create (see use: "silence-rule-create <channel-id>") +fduty channel silence-rule-create <channel-id> \ + --rule-name "planned-maintenance-2026-07-01" \ + --is-auto-delete \ + --data '{"time_filter":{"start_time":1751328000,"end_time":1751371200}}' +# verify +fduty channel silence-rule-list <channel-id> --output-format toon +``` + +<!-- GENERATED:channel START · 由 fduty __dump-commands 同步 · 勿手改 fence 内 --> + +### create +Create channel +- `--auto-resolve-mode` string — Auto-resolve timer reset mode. · enum: trigger | update +- `--auto-resolve-timeout` int64 — Auto-resolve timeout in seconds. 0 disables auto-resolve. Max 30 days. (0-2592000) +- `--channel-name` string (required) — Channel name. 1 to 59 characters. (1-59 chars) +- `--description` string — Free-form description. Up to 500 characters. (≤500 chars) +- `--disable-auto-close` bool — Disable automatic incident closing. +- `--disable-outlier-detection` bool — Disable outlier incident detection. +- `--is-external-report-enabled` bool — Allow external reporters to file incidents into this channel. +- `--is-private` bool — When true, the channel is visible only to its managing teams. +- `--managing-team-ids` intSlice — Additional teams that can manage the channel. Up to 3 entries. +- `--plugin-ids` intSlice — IDs of plugins (integrations) subscribed to this channel. +- `--team-id` int64 (required) — Owning team ID. +- body-only (`--data`): escalate_rule (object); flapping (object); group (object) + +### delete <channel-id> +Delete channel +- `<channel-id>` (positional, required) int64 — Channel ID. + +### disable <channel-id> +Disable channel +- `<channel-id>` (positional, required) int64 — Channel ID. + +### enable <channel-id> +Enable channel +- `<channel-id>` (positional, required) int64 — Channel ID. + +### escalate-rule-create +Create escalation rule +- `--aggr-window` int64 — Aggregation window in seconds. 0 disables aggregation. (0-3600) +- `--channel-id` int64 (required) — Channel the rule belongs to. +- `--description` string — Rule description, up to 500 characters. (≤500 chars) +- `--priority` int64 — Evaluation priority. Lower runs first. (0-200) +- `--rule-name` string (required) — Rule name, 1 to 39 characters. (1-39 chars) +- `--template-id` string (required) — Notification template ID (MongoDB ObjectID). +- body-only (`--data`): filters (array<array>); layers (array<object>) (required); time_filters (array<object>) + +### escalate-rule-delete +Delete escalation rule +- `--channel-id` int64 (required) — Channel the rule belongs to. +- `--rule-id` string (required) — Rule ID (MongoDB ObjectID). + +### escalate-rule-disable +Disable escalation rule +- `--channel-id` int64 (required) — Channel the rule belongs to. +- `--rule-id` string (required) — Rule ID (MongoDB ObjectID). + +### escalate-rule-enable +Enable escalation rule +- `--channel-id` int64 (required) — Channel the rule belongs to. +- `--rule-id` string (required) — Rule ID (MongoDB ObjectID). + +### escalate-rule-info +Get escalation rule detail +- `--channel-id` int64 (required) — Channel the rule belongs to. +- `--rule-id` string (required) — Rule ID (MongoDB ObjectID). + +### escalate-rule-list <channel-id> +List escalation rules +- `<channel-id>` (positional, required) int64 — Channel to list rules for. + +### escalate-rule-update +Update escalation rule +- `--aggr-window` int64 — Aggregation window in seconds. 0 disables aggregation. +- `--channel-id` int64 (required) — Channel the rule belongs to. +- `--description` string — Rule description, up to 500 characters. (≤500 chars) +- `--priority` int64 — Evaluation priority. Lower runs first. +- `--rule-id` string (required) — Escalation rule ID (MongoDB ObjectID). +- `--rule-name` string (required) — Rule name, 1 to 39 characters. (1-39 chars) +- `--template-id` string (required) — Notification template ID (MongoDB ObjectID). +- body-only (`--data`): filters (object); layers (array<object>) (required); time_filters (array<object>) + +### info <channel-id> +Get channel detail +- `<channel-id>` (positional, required) int64 — Channel ID to fetch. + +### infos <channel-id> [<id2>...] +Batch get channels +- `<channel-ids>` (positional, required) intSlice — Channel IDs to look up. Up to 1000. + +### inhibit-rule-create <channel-id> +Create inhibit rule +- `<channel-id>` (positional, required) int64 — Channel the rule belongs to. +- `--description` string — Rule description, up to 500 characters. (≤500 chars) +- `--equals` stringSlice (required) — Label keys used to pair source and target alerts. +- `--is-directly-discard` bool — When true, suppressed target alerts are dropped instead of merged. +- `--priority` int64 — Evaluation priority. Lower runs first. +- `--rule-name` string (required) — Rule name, 1 to 39 characters. (1-39 chars) +- body-only (`--data`): source_filters (array<array>); target_filters (array<array>) + +### inhibit-rule-delete +Delete inhibit rule +- `--channel-id` int64 (required) — Channel the rule belongs to. +- `--rule-id` string (required) — Rule ID (MongoDB ObjectID). + +### inhibit-rule-disable +Disable inhibit rule +- `--channel-id` int64 (required) — Channel the rule belongs to. +- `--rule-id` string (required) — Rule ID (MongoDB ObjectID). + +### inhibit-rule-enable +Enable inhibit rule +- `--channel-id` int64 (required) — Channel the rule belongs to. +- `--rule-id` string (required) — Rule ID (MongoDB ObjectID). + +### inhibit-rule-list <channel-id> +List inhibit rules +- `<channel-id>` (positional, required) int64 — Channel to list rules for. + +### inhibit-rule-update +Update inhibit rule +- `--channel-id` int64 (required) — Channel the rule belongs to. +- `--description` string — Rule description, up to 500 characters. (≤500 chars) +- `--equals` stringSlice (required) — Label keys used to pair source and target alerts. +- `--is-directly-discard` bool — When true, suppressed target alerts are dropped instead of merged. +- `--priority` int64 — Evaluation priority. Lower runs first. +- `--rule-id` string (required) — Inhibit rule ID (MongoDB ObjectID). +- `--rule-name` string (required) — Rule name, 1 to 39 characters. (1-39 chars) +- body-only (`--data`): source_filters (object); target_filters (object) + +### list +List channels +- `--name` string +- `--team-ids` int64Slice + +### silence-rule-create <channel-id> +Create silence rule +- `<channel-id>` (positional, required) int64 — Channel the rule belongs to. +- `--description` string — Rule description, up to 500 characters. (≤500 chars) +- `--from-incident-id` string — Source incident ID when the silence was created from an incident. +- `--is-auto-delete` bool — When true, the silence rule is automatically deleted after its time window expires. Defaults to false. +- `--is-directly-discard` bool — When true, silenced alerts are dropped instead of suppressed into incidents. +- `--priority` int64 — Evaluation priority. Lower runs first. +- `--rule-name` string (required) — Rule name, 1 to 39 characters. (1-39 chars) +- body-only (`--data`): filters (array<array>); time_filter (object); time_filters (array<object>) + +### silence-rule-delete +Delete silence rule +- `--channel-id` int64 (required) — Channel the rule belongs to. +- `--rule-id` string (required) — Rule ID (MongoDB ObjectID). + +### silence-rule-disable +Disable silence rule +- `--channel-id` int64 (required) — Channel the rule belongs to. +- `--rule-id` string (required) — Rule ID (MongoDB ObjectID). + +### silence-rule-enable +Enable silence rule +- `--channel-id` int64 (required) — Channel the rule belongs to. +- `--rule-id` string (required) — Rule ID (MongoDB ObjectID). + +### silence-rule-list <channel-id> +List silence rules +- `<channel-id>` (positional, required) int64 — Channel to list rules for. + +### silence-rule-update +Update silence rule +- `--channel-id` int64 (required) — Channel the rule belongs to. +- `--description` string — Rule description, up to 500 characters. (≤500 chars) +- `--is-auto-delete` bool — When true, the silence rule is automatically deleted after its time window expires. Defaults to false. +- `--is-directly-discard` bool — When true, silenced alerts are dropped instead of suppressed into incidents. +- `--priority` int64 — Evaluation priority. Lower runs first. +- `--rule-id` string (required) — Silence rule ID (MongoDB ObjectID). +- `--rule-name` string (required) — Rule name, 1 to 39 characters. (1-39 chars) +- body-only (`--data`): filters (object); time_filter (object); time_filters (array<object>) + +### unsubscribe-rule-create <channel-id> +Create drop rule +- `<channel-id>` (positional, required) int64 — Channel the rule belongs to. +- `--description` string — Rule description, up to 500 characters. (≤500 chars) +- `--priority` int64 — Evaluation priority. Lower runs first. +- `--rule-name` string (required) — Rule name, 1 to 39 characters. (1-39 chars) +- body-only (`--data`): filters (array<array>) + +### unsubscribe-rule-delete +Delete drop rule +- `--channel-id` int64 (required) — Channel the rule belongs to. +- `--rule-id` string (required) — Rule ID (MongoDB ObjectID). + +### unsubscribe-rule-disable +Disable drop rule +- `--channel-id` int64 (required) — Channel the rule belongs to. +- `--rule-id` string (required) — Rule ID (MongoDB ObjectID). + +### unsubscribe-rule-enable +Enable drop rule +- `--channel-id` int64 (required) — Channel the rule belongs to. +- `--rule-id` string (required) — Rule ID (MongoDB ObjectID). + +### unsubscribe-rule-list <channel-id> +List drop rules +- `<channel-id>` (positional, required) int64 — Channel to list rules for. + +### unsubscribe-rule-update +Update drop rule +- `--channel-id` int64 (required) — Channel the rule belongs to. +- `--description` string — Rule description, up to 500 characters. (≤500 chars) +- `--priority` int64 — Evaluation priority. Lower runs first. +- `--rule-id` string (required) — Drop rule ID (MongoDB ObjectID). +- `--rule-name` string (required) — Rule name, 1 to 39 characters. (1-39 chars) +- body-only (`--data`): filters (object) + +### update <channel-id> +Update channel +- `--auto-resolve-mode` string — Auto-resolve timer reset mode. · enum: trigger | update +- `--auto-resolve-timeout` int64 — Auto-resolve timeout in seconds. 0 disables auto-resolve. Max 30 days. (0-2592000) +- `<channel-id>` (positional, required) int64 — Channel ID to update. +- `--channel-name` string — New channel name. 1 to 59 characters. (1-59 chars) +- `--description` string — New description. Up to 500 characters. (≤500 chars) +- `--disable-auto-close` bool — Disable automatic incident closing. +- `--disable-outlier-detection` bool — Disable outlier incident detection. +- `--is-external-report-enabled` bool — Allow external reporters to file incidents into this channel. +- `--is-private` bool — When true, the channel is visible only to its managing teams. +- `--managing-team-ids` intSlice — Additional teams that can manage the channel. Up to 3 entries. +- `--team-id` int64 — New owning team ID. +- body-only (`--data`): flapping (object); group (object) + +<!-- GENERATED:channel END --> + +## Key concepts + +- **`--auto-resolve-mode`** enum: `trigger` (timer resets on each new alert trigger) | `update` (timer resets on any alert update). +- **Alert grouping `group.method`**: `i` = intelligent (embedding similarity), `p` = pattern (label equality), `n` = none. Set via `--data '{"group":{"method":"p","equals":[["service","env"]],"time_window":300}}'` on `create`/`update`. +- **Rule status**: `enabled` | `disabled` — apply to escalation, inhibit, silence, and drop rules alike. +- **Inhibit `--equals`**: label keys that must be **equal** between the source (high-priority) and target (suppressed) alert to form a pair (e.g. `--equals service,env`). +- **Silence time windows**: `time_filter` (one-off, unix seconds, mutually exclusive) vs `time_filters` (recurring weekly HH:MM windows). Pass via `--data`. +- **Escalation `layers`** (required via `--data` on create/update): each layer needs `target` (with `person_ids`/`team_ids`/`schedule_to_role_ids`/`emails` + `by` OR `webhooks`) and optionally `notify_step`, `max_times`, `escalate_window`, `force_escalate`. + +## Gotchas + +- **Positional trap**: `channel-id` is **positional** on `info`, `infos`, `update`, `delete`, `disable`, `enable`, `escalate-rule-list`, `inhibit-rule-create`, `inhibit-rule-list`, `silence-rule-create`, `silence-rule-list`, `unsubscribe-rule-create`, `unsubscribe-rule-list`. It is a **flag** (`--channel-id`) on all `escalate-rule-*`, `inhibit-rule-update/delete/enable/disable`, `silence-rule-update/delete/enable/disable`, `unsubscribe-rule-update/delete/enable/disable`. When in doubt, the fence heading `### verb <channel-id>` = positional; heading without `<…>` = flag. +- **`escalate-rule-create` needs `layers` via `--data`** — it is required and cannot be expressed as a flat flag. Omitting it returns a validation error. +- **`rule-id` is a MongoDB ObjectID string**, not an integer. Retrieve it from `escalate-rule-list`, `inhibit-rule-list`, `silence-rule-list`, or `unsubscribe-rule-list` before any update/delete/enable/disable. +- **`channel create` requires `--channel-name` and `--team-id`** even though they are not marked `required` in the flag list — the server rejects the request without them. +- **`delete` on a channel is irreversible** — all rules within it are also removed. Confirm the `channel-id` against `list` before proceeding. +- **Empty rule list is authoritative** — if `escalate-rule-list` / `silence-rule-list` / etc. returns no rows, no rules exist; do not widen the query. +- **`list` response is a top-level array** (pipe `jq '.[]'`); rule-list responses nest under `items[]` (pipe `jq '.items[]'`). + +## Worked example — look up a channel and inspect its escalation policy + +```bash +fduty channel list --name "payments" --output-format toon +# → find channel_id (e.g. 4201) +fduty channel escalate-rule-list 4201 --output-format toon +# → find rule_id (MongoDB ObjectID string, e.g. "6643abc123def456789012aa") +fduty channel escalate-rule-info --channel-id 4201 --rule-id "6643abc123def456789012aa" --output-format toon +``` diff --git a/skills/flashduty/reference/enrichment.md b/skills/flashduty/reference/enrichment.md new file mode 100644 index 0000000..6688bcf --- /dev/null +++ b/skills/flashduty/reference/enrichment.md @@ -0,0 +1,212 @@ +# fduty enrichment — command card + +Prereq: `SKILL.md` read. Read verbs are free. **`upsert` fully replaces all rules for an integration** (atomic, irreversible in the sense that the previous ruleset is gone); `mapping-schema-delete`, `mapping-api-delete`, and `mapping-data-truncate` are irreversible — confirm IDs before running. + +## Route here when + +"告警丰富 / 字段提取 / 标签映射 / 标签组合 / 标签删除 / 映射表 / 映射 API / enrichment rules / label extraction / label composition / mapping schema / lookup table / alert enrichment" → **enrichment**, NOT `route` (routing = which channel an alert goes to) or `template` (notification rendering). You need two kinds of IDs: +- **`integration-id`** (int64) — the integration that produces alerts. Get a real one from **`fduty alert list`** (every alert carries `integration_id` + `integration_name`). It is **NOT** a `channel_id` — `channel list` does not surface integration IDs, so never feed channel IDs here. +- **`schema-id`** / **`api-id`** (MongoDB ObjectID hex string) — from `mapping-schema-list` / `mapping-api-list`. + +If no specific integration is in scope, ask which one — do **not** enumerate every channel/integration ID and probe each (most 400 `Integration ... not found`; that is not a discovery strategy). + +## Intent → verb + +| want | verb | +|---|---| +| view current enrichment rules for an integration | `info` | +| view rules for multiple integrations at once | `list` | +| create or fully replace enrichment rules | `upsert` | +| see all mapping schemas | `mapping-schema-list` | +| create a mapping schema (define lookup keys + output labels) | `mapping-schema-create` | +| get one schema's detail | `mapping-schema-info` | +| rename / redescribe a schema | `mapping-schema-update` | +| delete a schema | `mapping-schema-delete` | +| browse rows in a schema | `mapping-data-list` | +| add / update rows (up to 1000 at a time) | `mapping-data-upsert` | +| bulk-load rows from a CSV file | `mapping-data-upload` | +| export schema data to CSV | `mapping-data-download` | +| delete specific rows by key | `mapping-data-delete` | +| wipe all rows in a schema | `mapping-data-truncate` | +| see all external HTTP lookup APIs | `mapping-api-list` | +| register an HTTP lookup endpoint | `mapping-api-create` | +| get one API's detail | `mapping-api-info` | +| update an API's URL / name / timeout | `mapping-api-update` | +| remove an HTTP lookup API | `mapping-api-delete` | + +## Hot flow — create a mapping schema and populate it + +```bash +# 1. Create schema: define which alert labels are the lookup keys and what labels will be added +fduty enrichment mapping-schema-create \ + --schema-name "service-owner-map" \ + --source-labels service \ + --result-labels owner_team,oncall_email \ + --description "Maps service name to owning team and oncall email" +# → returns schema_id (hex string); save it + +# 2. Populate rows (up to 1000 per call; docs array via --data) +fduty enrichment mapping-data-upsert <schema-id> \ + --data '{"docs":[{"service":"payments","owner_team":"platform","oncall_email":"platform@example.com"},{"service":"auth","owner_team":"identity","oncall_email":"identity@example.com"}]}' + +# 3. Verify rows landed +fduty enrichment mapping-data-list <schema-id> --output-format toon +``` + +## Hot flow — attach enrichment rules to an integration + +```bash +# 1. Find a real integration ID — alerts carry integration_id + integration_name +fduty alert list --limit 20 --output-format toon + +# 2. Check existing rules before replacing +fduty enrichment info <integration-id> --output-format toon + +# 3. Upsert rules (full replacement; rules array via --data) +fduty enrichment upsert <integration-id> \ + --data '{"rules":[{"kind":"mapping","settings":{"schema_id":"<schema-id>","source_labels":["service"],"result_labels":["owner_team","oncall_email"]}},{"kind":"composition","settings":{"target":"summary","template":"[{{.owner_team}}] {{.title}}"}}]}' + +# 4. Confirm the new ruleset +fduty enrichment info <integration-id> --output-format toon +``` + +<!-- GENERATED:enrichment START · 由 fduty __dump-commands 同步 · 勿手改 fence 内 --> + +### info <integration-id> +Get enrichment rules +- `<integration-id>` (positional, required) int64 — Integration ID to query enrichment rules for. Must be greater than 0. (min 1) + +### list <integration-id> [<id2>...] +List enrichment rules +- `<integration-ids>` (positional, required) intSlice — List of integration IDs to query. + +### mapping-api-create +Create mapping API +- `--api-name` string (required) — Unique API name (max 199 chars). (≤199 chars) +- `--description` string — Optional description. +- `--insecure-skip-verify` bool — Skip TLS certificate verification. Default 'false'. +- `--retry-count` int64 — Number of retries on failure (0–1). Default 0. +- `--team-id` int64 — Owning team ID. +- `--timeout` int64 — Request timeout in seconds (1–3). Default 2. +- `--url` string (required) — HTTP/HTTPS endpoint URL (max 500 chars). (≤500 chars) +- body-only (`--data`): headers (object) + +### mapping-api-delete <api-id> +Delete mapping API +- `<api-id>` (positional, required) string — Mapping API ID (MongoDB ObjectID hex). + +### mapping-api-info <api-id> +Get mapping API detail +- `<api-id>` (positional, required) string — Mapping API ID (MongoDB ObjectID hex). + +### mapping-api-list +List mapping APIs + +### mapping-api-update <api-id> +Update mapping API +- `<api-id>` (positional, required) string — Mapping API ID (MongoDB ObjectID hex). +- `--api-name` string — New API name (max 199 chars). (≤199 chars) +- `--description` string — New description. +- `--insecure-skip-verify` bool — New TLS skip-verify setting. +- `--retry-count` int64 — New retry count. +- `--team-id` int64 — New owning team ID. +- `--timeout` int64 — New timeout in seconds. +- `--url` string — New endpoint URL (max 500 chars). (≤500 chars) +- body-only (`--data`): headers (object) + +### mapping-data-delete <schema-id> +Delete mapping data rows +- `--keys` stringSlice (required) — Keys of rows to delete. +- `<schema-id>` (positional, required) string — Mapping schema ID (MongoDB ObjectID hex). + +### mapping-data-download <schema-id> +Download mapping data as CSV +- `<schema-id>` (positional, required) string — Mapping schema ID (MongoDB ObjectID hex). + +### mapping-data-list <schema-id> +List mapping data +- `--asc` bool — Sort ascending when 'true'. +- `--limit` int64 — Page size (1–100, default 20). +- `--orderby` string — Sort field. · enum: created_at | updated_at +- `--page` int64 — Page number (1-based). Used for offset-based pagination. +- `<schema-id>` (positional, required) string — Mapping schema ID (MongoDB ObjectID hex). +- `--search-after-ctx` string — Opaque cursor token for cursor-based pagination. +- body-only (`--data`): query (object) + +### mapping-data-truncate <schema-id> +Truncate mapping data +- `<schema-id>` (positional, required) string — Mapping schema ID (MongoDB ObjectID hex). + +### mapping-data-upload +Upload mapping data via CSV +- `--file` string — CSV file to upload. +- `--schema-id` string — Mapping schema ID (query parameter). + +### mapping-data-upsert <schema-id> +Upsert mapping data rows +- `<schema-id>` (positional, required) string — Mapping schema ID (MongoDB ObjectID hex). +- body-only (`--data`): docs (array<object>) (required) + +### mapping-schema-create +Create mapping schema +- `--description` string — Optional description (max 500 chars). (≤500 chars) +- `--result-labels` stringSlice (required) — Output label names (1–10). Must not overlap with 'source_labels'. +- `--schema-name` string (required) — Unique schema name (max 39 chars). (≤39 chars) +- `--source-labels` stringSlice (required) — Lookup key label names (1–3). Must not overlap with 'result_labels'. +- `--team-id` int64 — Owning team ID. '0' means no team. + +### mapping-schema-delete <schema-id> +Delete mapping schema +- `<schema-id>` (positional, required) string — Mapping schema ID (MongoDB ObjectID hex). + +### mapping-schema-info <schema-id> +Get mapping schema detail +- `<schema-id>` (positional, required) string — Mapping schema ID (MongoDB ObjectID hex). + +### mapping-schema-list +List mapping schemas + +### mapping-schema-update <schema-id> +Update mapping schema +- `--description` string — New description (max 500 chars). (≤500 chars) +- `<schema-id>` (positional, required) string — Schema ID (MongoDB ObjectID hex). +- `--schema-name` string — New schema name (max 39 chars). (≤39 chars) +- `--team-id` int64 — New owning team ID. '0' removes the team association. + +### upsert <integration-id> +Upsert enrichment rules +- `<integration-id>` (positional, required) int64 — Integration ID to configure enrichment rules for. +- body-only (`--data`): rules (array<object>) (required) + +<!-- GENERATED:enrichment END --> + +## Rule kinds (load-bearing — wrong `kind` or `settings` shape 400s) + +| kind | what it does | key `settings` fields | +|---|---|---| +| `extraction` | extracts a new label via regex or GJson path | `source`, `target`, `method` (`regex`/`gjson`), `pattern` | +| `composition` | builds a label from a Go template over existing labels | `target`, `template` | +| `mapping` | looks up result labels from a schema or API by source label values | `schema_id` OR `api_id`, `source_labels`, `result_labels` | +| `drop` | removes labels matching a list | `labels` | + +Each rule may have an optional `if` AND-filter: `[{"key":"env","oper":"IN","vals":["prod"]}]` — rule is skipped when the filter does not match. `oper` must be `IN` or `NOTIN`. + +## Gotchas + +- **`upsert` replaces the entire ruleset atomically.** There is no "add one rule" verb. Always read with `info` first, then reconstruct the full `rules` array before calling `upsert`. Omitting a rule deletes it silently. +- **`integration-id` is POSITIONAL on `info`, `list`, and `upsert`** — pass it as the first bare argument (e.g. `fduty enrichment upsert 12345 --data '...'`), not as `--integration-id`. Similarly, `schema-id` and `api-id` are POSITIONAL on all verbs where the `use` shows `<schema-id>` or `<api-id>`. +- **`list` accepts multiple integration IDs as positional args** (`use: list <integration-id> [<id2>...]`) — pass them space-separated: `fduty enrichment list 101 102 103`. +- **`mapping-data-upsert` requires `docs` via `--data`** — this array cannot be expressed as flat flags. Each doc must include all `source_labels` AND all `result_labels` fields for the schema, or the row is rejected. +- **`mapping-schema-create` requires Pro plan** — creating a schema on a free account returns a plan-gate error, not a 404. +- **`mapping-data-truncate` wipes all rows immediately** — there is no undo. Use `mapping-data-download` to export a backup CSV first if the data matters. +- **`source-labels` and `result-labels` must not overlap** on `mapping-schema-create`; max 3 source labels, max 10 result labels. Violating either constraint 400s. + +## Worked example — inspect and extend enrichment rules + +```bash +# Read current rules for integration 42 (positional), then extend them +fduty enrichment info 42 --output-format toon +# → copy the existing rules[] array, append the new rule, then upsert the full set: +fduty enrichment upsert 42 \ + --data '{"rules":[<existing_rules...>,{"kind":"drop","settings":{"labels":["raw_body","_meta"]}}]}' +``` diff --git a/skills/flashduty/reference/field.md b/skills/flashduty/reference/field.md new file mode 100644 index 0000000..29c0d2c --- /dev/null +++ b/skills/flashduty/reference/field.md @@ -0,0 +1,107 @@ +# fduty field — command card + +Prereq: `SKILL.md` read. Read verbs (`list`, `info`) are free. `delete` is **irreversible** — double-check the field-id before running it. + +## Route here when + +"自定义字段 / 事件字段 / 字段选项 / incident field / custom field / field schema" → **field**. +NOT `enrichment` (enrichment = rules that auto-populate field values; field = the schema that defines those fields). +You need a **`field_id`** (24-char hex ObjectID) — get it from `field list`. + +## Intent → verb + +| want | verb | +|---|---| +| see all custom fields | `list` | +| filter fields by name | `list --name <keyword>` | +| full detail for one field | `info <field-id>` | +| create a new custom field | `create` | +| rename, re-describe, or change options | `update <field-id>` | +| permanently remove a field | `delete <field-id>` | + +## Hot flow — create a select field and update its options + +```bash +# 1. Check what already exists (avoid duplicate display-name) +fduty field list --output-format toon + +# 2. Create a single-select field (field-type + value-type + options are all required here) +fduty field create \ + --display-name "Root Cause" \ + --field-name "root_cause" \ + --field-type single_select \ + --value-type string \ + --options "hardware failure" --options "software bug" --options "human error" +# → returns field_id; save it. + +# 3. Later: add an option (pass the full replacement list) +fduty field update <field-id> \ + --options "hardware failure" --options "software bug" --options "human error" --options "network issue" +``` + +<!-- GENERATED:field START · 由 fduty __dump-commands 同步 · 勿手改 fence 内 --> + +### create +Create field +- `--description` string — Optional free-text description. (≤499 chars) +- `--display-name` string (required) — Human-readable name. Must be unique within the account. (≤39 chars) +- `--field-name` string (required) — Machine name. Must start with a letter or underscore; 1–40 chars of ''. Immutable after creation. (≤39 chars) · enum: a-zA-Z0-9_ +- `--field-type` string (required) — Field input type. Immutable after creation. · enum: checkbox | multi_select | single_select | text +- `--options` stringSlice — Required and non-empty for 'single_select'/'multi_select' (unique strings, each 1–200 chars). Must be omitted or empty for 'checkbox'/'text'. +- `--value-type` string (required) — Stored value type. 'checkbox' requires 'bool'; 'single_select'/'multi_select'/'text' require 'string'. Immutable after creation. · enum: string | bool | float +- body-only (`--data`): default_value (any) + +### delete <field-id> +Delete field +- `<field-id>` (positional, required) string — Field ID — 24-character hex ObjectID. + +### info <field-id> +Get field detail +- `<field-id>` (positional, required) string — Field ID — 24-character hex ObjectID. + +### list +List custom fields +- `--name` string + +### update <field-id> +Update field +- `--description` string — New description. +- `--display-name` string — New display name. Must remain unique within the account. (≤39 chars) +- `<field-id>` (positional, required) string — Field ID — 24-character hex ObjectID. +- `--options` stringSlice — Replacement options list. Must obey the same per-type rules as create. +- body-only (`--data`): default_value (any) + +<!-- GENERATED:field END --> + +## Type constraints (immutable triad — wrong values 400) + +`--field-type`, `--field-name`, and `--value-type` are **permanently fixed at creation** and cannot be changed via `update`. + +| `--field-type` | `--value-type` | `--options` | +|---|---|---| +| `single_select` | `string` | required, ≥1 unique string | +| `multi_select` | `string` | required, ≥1 unique string | +| `text` | `string` | must be omitted | +| `checkbox` | `bool` | must be omitted | + +`default_value` (optional) can be set or changed; pass it via `--data '{"default_value": ...}'` because it has no typed flag. + +## Gotchas + +- **`delete`, `info`, `update` take `<field-id>` as a POSITIONAL first argument**, not `--field-id`. Example: `fduty field delete <field-id>`, not `--field-id <field-id>`. +- **`--options` replaces the whole list on `update`** — omitting it leaves options unchanged, but a partial list silently drops the missing values. Always pass the full desired set. +- **`--field-name` is the machine key** (`[a-zA-Z0-9_]`, starts with letter/underscore, ≤40 chars). It is the stable identifier for downstream enrichment rules — choose it carefully; it cannot be renamed. +- **`delete` is permanent and cascades** — any enrichment rules that reference the field by `field_name` will lose their target. Confirm the name against `field list` before deleting. +- **Empty `field list` is authoritative** — if the field isn't listed, it doesn't exist for this account. Do not retry with widened queries. + +## Worked example + +```bash +# Create a checkbox field (value-type must be bool; options must be omitted) +fduty field create \ + --display-name "Needs Postmortem" \ + --field-name "needs_postmortem" \ + --field-type checkbox \ + --value-type bool \ + --description "Flag incidents that require a postmortem write-up." +``` diff --git a/skills/flashduty/reference/incident.md b/skills/flashduty/reference/incident.md new file mode 100644 index 0000000..3913105 --- /dev/null +++ b/skills/flashduty/reference/incident.md @@ -0,0 +1,383 @@ +# fduty incident — command card + +Prereq: `SKILL.md` read. Read verbs are free. **Mutating verbs notify responders or alter state** — confirm scope first. `merge` and `remove` are **irreversible**; `remove` permanently deletes. + +## Route here when + +"告警 / 故障 / 事件 / 响应 / 值班 / incident / page / outage / triage / acknowledge / resolve / snooze / escalate / post-mortem" → **incident**, NOT `alert` (alert = deduplicated signal; incident = actionable item responders work). NOT `insight` (metrics/MTTA/MTTR). You need **`incident_id` (24-char MongoDB ObjectID)** for most verbs — not the 6-char `num` shown in the UI. If you only have a num, use `incident info --num <num>` first. + +## Intent → verb + +| want | verb | +|---|---| +| list / search active incidents | `list` | +| look up by 6-char UI num | `info --num <num>` | +| full detail + AI summary for a 24-char id | `detail <id>` (narrative) or `info --incident-id <id>` (same endpoint) | +| get structured data for one or more ids | `get <id> [<id2>...]` | +| contributing alerts | `alerts <id>` | +| full event history (short) | `timeline <id>` | +| paginated event history | `feed <id>` | +| past similar incidents | `similar <id>` | +| historical incidents related to this one | `past-list <incident-id>` | +| create a manual incident | `create` | +| edit title/description/severity | `update <id>` | +| edit title/description/severity/impact/root-cause/resolution | `reset <incident-id>` | +| set one custom field | `field-reset <incident-id>` | +| acknowledge (Triggered → Processing) | `ack <incident-id> [<id2>...]` | +| un-acknowledge | `unack <incident-id> [<id2>...]` | +| close | `close <id> [<id2>...]` | +| reopen | `reopen <incident-id> [<id2>...]` | +| resolve with optional note | `resolve <incident-id> [<id2>...]` | +| snooze / un-snooze | `snooze <id> [<id2>...]` / `wake <incident-id> [<id2>...]` | +| add comment | `comment <id> [<id2>...]` | +| add responder by person ID | `add-responder <id>` | +| replace responder list | `reassign <id>` | +| merge duplicates (IRREVERSIBLE) | `merge <target_id>` | +| stop auto-merging alerts in | `disable-merge <incident-id> [<id2>...]` | +| permanently delete (IRREVERSIBLE) | `remove <id> [<id2>...]` | +| post-mortem reports | `post-mortem-list` / `post-mortem-info <post-mortem-id>` / `post-mortem-delete <post-mortem-id>` | +| war room (IM chat) | `war-room-list <incident-id>` → `war-room-create <incident-id>` | + +## Hot flow — triage an active incident + +```bash +# 1. Find unacknowledged critical incidents (last 4h) +fduty incident list --severity Critical --progress Triggered --since 4h --output-format toon + +# 2. Get AI summary + full detail (use the 24-char incident_id from step 1) +fduty incident detail <incident-id> --output-format toon + +# 3. See contributing alerts +fduty incident alerts <incident-id> --output-format toon + +# 4. Check for prior similar incidents (channel-backed only; see Gotchas) +fduty incident similar <incident-id> --limit 5 --output-format toon + +# 5. Acknowledge ownership +fduty incident ack <incident-id> + +# 6. Post a status comment +fduty incident comment <incident-id> --comment "Root cause identified: DB failover. Fix deploying." + +# 7. Resolve with root-cause note +fduty incident resolve <incident-id> --root-cause "DB primary failover delay" --resolution "Failover completed; latency normal." +``` + +## Hot flow — full fault analysis (read-only summary) + +When asked to **summarize / analyze** an incident — 详情 + 关联告警 + 变更 + 时间线 + 相似故障 + 复盘 — `incident detail` does **not** contain the alerts / timeline / similar / post-mortem / change data; each is its own command. **Your first action must be the bundled script** — do not hand-pick one or two commands and write the rest from memory. One call fetches all six aspects: + +```bash +bash <skill-dir>/scripts/incident-summary.sh <incident-id> +``` + +`<skill-dir>` is this skill's base directory — you were given it when the skill loaded (it is also the folder you read this card from). The script runs every command below and prints the results in one block, so each section of your summary is backed by real output and there is nothing to guess. (To tie post-mortems to *this* incident, re-run `incident post-mortem-list --channel-ids <channel-id>` with the `channel_id` from `detail`.) + +If you fetch the pieces by hand instead, run **all six** — they are cheap reads: + +```bash +ID=<incident-id> # 24-char id from `incident list` +fduty incident detail "$ID" --output-format toon # ① 详情 + AI summary + alert counts + channel_id +fduty incident alerts "$ID" --output-format toon # ② contributing alerts (detail's embedded alerts are empty here) +fduty incident timeline "$ID" --output-format toon # ④ timeline (or `incident feed "$ID"` for the paginated view) +fduty incident similar "$ID" --limit 5 --output-format toon # ⑤ similar past incidents (channel-backed; see Gotchas) +fduty incident post-mortem-list --channel-ids <channel-id> --output-format toon # ⑥ post-mortems for this incident's channel +fduty change list --since 24h --output-format toon # ③ correlated changes — by shared labels + time; see reference/change.md +``` + +> **Never report a result you didn't fetch.** Do not write "返回空" / "无" / a count for any aspect whose command is **absent from your tool-call history this turn** — write `未查询 — 可运行 <command>` instead. "Empty" is a claim only a command you actually ran can make; inventing it is the worst failure mode of a fault summary. + +## Hot flow — resolve, document, and merge duplicates + +```bash +# Merge two duplicate incidents into a primary (IRREVERSIBLE — confirm first) +fduty incident merge <primary-incident-id> --source <dup1-id>,<dup2-id> + +# Record post-incident narrative on the primary +fduty incident reset <primary-incident-id> \ + --root-cause "Redis OOM on shard-3" \ + --impact "Checkout latency P99 >5s for 12 min" \ + --resolution "Increased memory limit; deployed hot patch" + +# Review the event timeline +fduty incident timeline <primary-incident-id> --output-format toon +``` + +<!-- GENERATED:incident START · 由 fduty __dump-commands 同步 · 勿手改 fence 内 --> + +### ack <incident-id> [<id2>...] +Acknowledge incident +- `<incident-ids>` (positional, required) stringSlice — Incident IDs to acknowledge. At most 100 per call. + +### add-responder <id> +Add responders to an incident +- `--follow-preference` bool +- `--notify-channel` string +- `--person` string +- `--template-id` string + +### alert-list <incident-id> +List alerts of incident +- `<incident-id>` (positional, required) string — Incident ID (MongoDB ObjectID). +- `--include-events` bool — When true, include raw alert events in each alert item. +- `--is-active` bool — When true return only active alerts (Critical/Warning/Info); when false return only recovered alerts (Ok). Omit to include all. +- `--limit` int64 — Page size, at most 1000. (0-1000) +- `--page` int64 — Page number starting at 1. (min 0) +- `--search-after-ctx` string + +### alerts <id> +View incident alerts +- `--limit` int + +### assign +Assign incident +- `--incident-id` string — Single incident ID. Ignored when 'incident_ids' is also provided. +- `--incident-ids` stringSlice — Batch incident IDs. +- body-only (`--data`): assigned_to (object) (required) + +### close <id> [<id2> ...] +Close incidents + +### comment <id> [<id2> ...] +Add a comment to incident timelines +- `--comment` string +- `--mute-reply` bool + +### create +Create a new incident +- `--assign` intSlice +- `--channel` int64 +- `--description` string +- `--severity` string +- `--title` string + +### custom-action-do +Execute custom action +- `--incident-id` string (required) — Incident ID (MongoDB ObjectID). +- `--integration-id` int64 (required) — Custom action integration ID. Must be enabled and associated with the incident's channel. + +### detail <id> +View full incident detail with AI summary + +### disable-merge <incident-id> [<id2>...] +Disable incident merge +- `<incident-ids>` (positional, required) stringSlice — Incident IDs whose automatic merge should be disabled. + +### feed <id> +View incident feed (paginated timeline) +- `--limit` int +- `--page` int + +### field-reset <incident-id> +Update incident custom field +- `--field-name` string (required) — Custom field name; must match a field defined on the account. +- `<incident-id>` (positional, required) string — Incident ID (MongoDB ObjectID). +- body-only (`--data`): field_value (any) + +### get <id> [<id2> ...] +Get incident details + +### info [<incident-id>] +Get incident detail +- `--incident-id` string — Incident ID (MongoDB ObjectID). +- `--num` string — Short incident ID (the 6-character uppercased id shown in the UI). Not unique — resolves to the most recent match. Supply either incident_id or num. + +### list +List incidents +- `--channel-id` int64 +- `--limit` int +- `--nums` string +- `--page` int +- `--progress` string +- `--query` string +- `--severity` string +- `--since` string +- `--until` string + +### list-by-ids <incident-id> [<id2>...] +List incidents by IDs +- `<incident-ids>` (positional, required) stringSlice — Incident IDs to fetch. + +### merge <target_id> +Merge incidents into a target incident +- `--source` string + +### past-list <incident-id> +List past incidents +- `<incident-id>` (positional, required) string — Reference incident ID (MongoDB ObjectID). +- `--limit` int64 — Maximum number of similar incidents to return. (0-100) + +### post-mortem-delete <post-mortem-id> +Delete post-mortem +- `<post-mortem-id>` (positional, required) string — Post-mortem ID. + +### post-mortem-info <post-mortem-id> +Get post-mortem +- `<post-mortem-id>` (positional, required) string — Post-mortem ID. Deterministic hash derived from account ID and the set of linked incident IDs. + +### post-mortem-list +List post-mortems +- `--asc` bool — Ascending order when true. +- `--channel-ids` intSlice — Channel IDs to restrict the query to. +- `--created-at-end-seconds` string — Filter by creation time: upper bound in seconds. (min 0) Accepts a duration (7d, 24h), '+7d' for the future, 'now', a date, or Unix seconds. +- `--created-at-start-seconds` string — Filter by creation time: lower bound in seconds. (min 0) Accepts a duration (7d, 24h), '+7d' for the future, 'now', a date, or Unix seconds. +- `--limit` int64 — Page size, at most 100. (0-100) +- `--order-by` string — Field used to order results. · enum: created_at_seconds | updated_at_seconds +- `--page` int64 — Page number starting at 1. (min 0) +- `--search-after-ctx` string — Cursor from a previous response for forward pagination. +- `--status` string — Report status. Defaults to 'published' on the server when omitted. · enum: drafting | published +- `--team-ids` intSlice — Team IDs to restrict the query to. + +### reassign <id> +Reassign an incident to new responders +- `--person` string + +### remove <id> [<id2> ...] +Permanently remove incidents +- `--force` bool + +### reopen <incident-id> [<id2>...] +Reopen incident +- `<incident-ids>` (positional, required) stringSlice — Incident IDs to reopen. At most 100 per call. +- `--reason` string — Optional reason recorded on the timeline. (≤1024 chars) + +### reset <incident-id> +Update incident fields +- `--description` string — New description. (3-6144 chars) +- `--impact` string — New impact description. (3-6144 chars) +- `<incident-id>` (positional, required) string — Incident ID (MongoDB ObjectID). +- `--incident-severity` string — New severity. · enum: Info | Warning | Critical +- `--resolution` string — New resolution notes. (3-6144 chars) +- `--root-cause` string — New root cause analysis. (3-6144 chars) +- `--title` string — New incident title. (3-200 chars) + +### resolve <incident-id> [<id2>...] +Resolve incident +- `<incident-ids>` (positional, required) stringSlice — Incident IDs to resolve. At most 100 per call. +- `--resolution` string — Optional resolution note applied to every resolved incident. (≤1024 chars) +- `--root-cause` string — Optional root cause note applied to every resolved incident. (≤1024 chars) + +### responder-add <person-id> [<id2>...] +Add incident responder +- `--incident-id` string (required) — Incident ID (MongoDB ObjectID). +- `<person-ids>` (positional, required) intSlice — Member IDs to add as responders. +- body-only (`--data`): notify (object) + +### similar <id> +Find similar incidents +- `--limit` int + +### snooze <id> [<id2> ...] +Snooze incidents +- `--duration` string + +### timeline <id> +View incident timeline + +### unack <incident-id> [<id2>...] +Unacknowledge incident +- `<incident-ids>` (positional, required) stringSlice — Incident IDs to unacknowledge. At most 100 per call. + +### update <id> +Update an incident +- `--description` string +- `--field` stringArray +- `--severity` string +- `--title` string + +### wake <incident-id> [<id2>...] +Wake incident +- `<incident-ids>` (positional, required) stringSlice — Incident IDs to wake. At most 100 per call. + +### add-member <chat_id> +Add members to an incident war room +- `--integration` int64 +- `--member` string + +### create <incident_id> +Create an incident war room +- `--add-observers` bool +- `--integration` int64 +- `--member` string + +### default-observers <incident_id> +Preview historical responders for war-room observer invitation + +### delete <incident_id> +Delete an incident war room +- `--force` bool +- `--integration` int64 + +### get <chat_id> +Get incident war room details +- `--integration` int64 + +### list <incident_id> +List incident war rooms +- `--integration` int64 + +### war-room-add-member <chat-id> +Add war-room member +- `<chat-id>` (positional, required) string — Chat ID of the war room within the IM platform. +- `--integration-id` int64 (required) — IM integration that hosts the war room. +- `--member-ids` intSlice (required) — Person IDs to add to the war room. + +### war-room-create +Create war room +- `--add-observers` bool — When true, also add historical responders of the incident as observers. +- `--incident-id` string (required) — Incident ID (MongoDB ObjectID). +- `--integration-id` int64 (required) — IM integration ID. Must have war room enabled; Feishu, DingTalk, WeCom (self-built), Slack and Teams are supported. +- `--member-ids` intSlice — Additional member IDs to add to the war room. + +### war-room-default-observers <incident-id> +Get war-room default observers +- `<incident-id>` (positional, required) string — Incident ID, a MongoDB ObjectID hex string. + +### war-room-delete +Delete war room +- `--incident-id` string (required) — Incident ID (MongoDB ObjectID). +- `--integration-id` int64 (required) — IM integration ID. + +### war-room-detail <chat-id> +Get war room detail +- `<chat-id>` (positional, required) string — Chat/group ID on the IM side. +- `--integration-id` int64 (required) — IM integration ID that hosts the war room. + +### war-room-list <incident-id> +List war rooms +- `<incident-id>` (positional, required) string — Incident ID (MongoDB ObjectID). +- `--integration-id` int64 — Optional filter: only return war rooms for this IM integration. + +<!-- GENERATED:incident END --> + +## Status / severity values + +- **progress** (`--progress` filter): `Triggered` → `Processing` → `Closed` +- **severity** (`--severity` filter / `--severity` on create/update/reset): `Critical` · `Warning` · `Info` +- `ack` moves Triggered → Processing. `close`/`resolve` move any state → Closed. `reopen` moves Closed → Triggered. + +## Gotchas + +- **24-char `incident_id` vs 6-char `num`**: positional-id verbs (`ack`, `close`, `resolve`, `detail`, `alerts`, `timeline`, `merge`, `reassign`, `comment`, `reset`, …) require the full ObjectID. Passing a 6-char num 400s. Use `incident info --num <num>` to resolve, or `incident list --query <num>` and read `incident_id`. +- **`similar` only works on channel-backed incidents** (those with a real `channel_id`). Manually created incidents with no channel return HTTP 400 "Channel not found" — this is expected, not transient. Fall back to `incident list --query "<keywords>"` for text search. +- **`update` vs `reset`**: `update <id>` edits title/description/severity/custom fields. `reset <incident-id>` additionally supports `--impact`, `--root-cause`, `--resolution` (the AI narrative fields). Use `reset` for post-incident write-back. +- **`--list` window cap**: `--since`/`--until` window must be < 31 days; `--limit` max 100. Empty result is authoritative — do not widen filters or retry. +- **`merge` is irreversible**: source incidents are absorbed into target permanently. Always list and confirm both IDs before running. +- **`remove --force`** bypasses the interactive confirmation prompt — never pass `--force` unless the user has explicitly said so. +- **`assign` needs `--data` for the nested `assigned_to` object** (either `person_ids` or `escalate_rule_id`). Pass via `--data '{"incident_ids":["<id>"],"assigned_to":{"person_ids":[101]}}'`. `reassign <id> --person <ids>` is simpler for direct-person assignment. + +## Worked example + +```bash +# Start: a prod alert paged out; you have the 6-char num "A3F9B1" from Slack. +# Step 1: resolve the num to full id and get AI summary in one call. +fduty incident info --num A3F9B1 --output-format toon + +# Step 2: acknowledge so teammates see it's being handled. +fduty incident ack <incident-id> + +# Step 3: after fix, resolve with context. +fduty incident resolve <incident-id> \ + --root-cause "Misconfigured health-check threshold after deploy" \ + --resolution "Reverted threshold; all pods healthy." +``` diff --git a/skills/flashduty/reference/insight.md b/skills/flashduty/reference/insight.md new file mode 100644 index 0000000..346cbbb --- /dev/null +++ b/skills/flashduty/reference/insight.md @@ -0,0 +1,354 @@ +# fduty insight — command card + +Prereq: `SKILL.md` read. All `insight` verbs are **read-only** — no mutations, no confirmations needed. + +## Route here when + +"噪声治理 / 高频告警 / MTTA / MTTR / 绩效复盘 / 月报 / SRE review / noise reduction / alert fatigue / who responds fastest / channel performance / team metrics / incident export / CSV export" → **insight**. + +Do **not** hand-aggregate from `alert list` / `incident list` — `insight` does server-side aggregation and gives authoritative numbers. Key IDs you may need: `--team-ids` and `--channel-ids` from `fduty channel list` or `fduty team list`; `--responder-ids` from `fduty member list`. + +## Intent → verb + +| want | verb | +|---|---| +| top noisy alert sources by check/resource | `top-alerts` | +| finer noise drill-down (sort, severity, team/channel filter, time buckets) | `alert-topk-by-label` | +| account-wide MTTA/MTTR/ack-rate roll-up | `account` | +| per-channel breakdown | `channel` | +| per-team breakdown | `team` | +| per-responder MTTA/workload breakdown | `responder` | +| per-incident list with response time columns | `incidents` | +| per-incident list with rich filters (severity, responder, cursor) | `incident-list` | +| CSV export of incidents | `incident-export` | +| CSV export of responder metrics | `responder-export` | +| CSV export of channel metrics | `channel-export` | +| CSV export of team metrics | `team-export` | + +## Hot flow — weekly SRE review + +```bash +# account-level roll-up for past 30 days +fduty insight account --start-time 30d --end-time now --output-format toon + +# per-team and per-channel breakdowns (same flags) +fduty insight team --start-time 30d --end-time now --output-format toon +fduty insight channel --start-time 30d --end-time now --output-format toon + +# who responded slowest (per-responder MTTA) +fduty insight responder --start-time 30d --end-time now --output-format toon + +# top-10 noisiest check sources this week +fduty insight top-alerts --label check --since 7d --output-format toon + +# per-incident list with MTTA/MTTR (uses --since not --start-time) +fduty insight incidents --since 30d --limit 50 --output-format toon +``` + +## Hot flow — 月报 CSV export + +```bash +# incident-export takes epoch seconds ONLY (int64, not relative strings) +S=$(date -v-30d +%s); E=$(date +%s) +fduty insight incident-export --start-time $S --end-time $E > incidents.csv + +# responder/channel/team-export accept relative strings +fduty insight responder-export --start-time 30d --end-time now > responders.csv +fduty insight channel-export --start-time 30d --end-time now > channels.csv +fduty insight team-export --start-time 30d --end-time now > teams.csv +``` + +<!-- GENERATED:insight START · 由 fduty __dump-commands 同步 · 勿手改 fence 内 --> + +### account +Get account-level insight +- `--aggregate-unit` string — Aggregate metrics into time buckets. When set, the time range must cover at least 24 hours; 'day' additionally caps the range at 31 days. · enum: day | week | month +- `--asc` bool — Sort ascending when 'true', descending otherwise. +- `--channel-ids` intSlice — Filter by channel IDs. At most 100 entries. +- `--description-html-to-text` bool — Strip HTML markup from the description column when exporting. +- `--end-time` string (required) — End time, Unix seconds. Must be greater than 'start_time'. Accepts a duration (7d, 24h), '+7d' for the future, 'now', a date, or Unix seconds. +- `--export-fields` stringSlice — Subset of CSV column keys to include in the export. At most 50 entries. Only used by the export endpoints. · enum: incident_id | title | severity | progress | channel_id | channel_name | team_id | team_name | created_at | seconds_to_ack | seconds_to_close | closed_by | engaged_seconds | hours | notifications | interruptions | acknowledgements | assignments | reassignments | escalations | manual_escalations | timeout_escalations | assigned_to | responders | description | labels | fields | creator_id | creator_name +- `--incident-ids` stringSlice — Filter by incident IDs (MongoDB ObjectIDs). At most 100 entries. +- `--is-my-team` bool — Restrict results to teams the caller belongs to. When true and the caller has no teams, the result set is empty. +- `--orderby` string — Field to sort the underlying incident set by. · enum: created_at +- `--query` string — Full-text query applied to incident title and description. +- `--responder-ids` intSlice — Filter by responder person IDs. At most 100 entries. +- `--seconds-to-ack-from` int64 — Lower bound (inclusive) on time-to-acknowledge, in seconds. +- `--seconds-to-ack-to` int64 — Upper bound (exclusive) on time-to-acknowledge, in seconds. Must be greater than 'seconds_to_ack_from' when both are set. +- `--seconds-to-close-from` int64 — Lower bound (inclusive) on time-to-close, in seconds. +- `--seconds-to-close-to` int64 — Upper bound (exclusive) on time-to-close, in seconds. Must be greater than 'seconds_to_close_from' when both are set. +- `--severities` stringSlice — Filter by severity. At most 3 entries. · enum: Critical | Warning | Info | Ok +- `--split-hours` bool — When true, metrics are split into 'work'/'sleep'/'off' hour buckets. +- `--start-time` string (required) — Start time, Unix seconds. Must be greater than 0. Accepts a duration (7d, 24h), '+7d' for the future, 'now', a date, or Unix seconds. +- `--team-ids` intSlice — Filter by team IDs. At most 100 entries. +- `--time-zone` string — IANA time zone name used to interpret the time range (e.g. 'Asia/Shanghai'). Defaults to the account time zone. +- body-only (`--data`): fields (object); labels (object) + +### alert-topk-by-label +Get top-K alerts grouped by check or resource +- `--aggregate-unit` string — Aggregate metrics into time buckets. When set, the time range must cover at least 24 hours; 'day' additionally caps the range at 31 days. · enum: day | week | month +- `--asc` bool — Sort ascending when 'true', descending otherwise. +- `--channel-ids` intSlice — Filter by channel IDs. At most 100 entries. +- `--description-html-to-text` bool — Strip HTML markup from the description column when exporting. +- `--end-time` string (required) — End time, Unix seconds. Must be greater than 'start_time'. Accepts a duration (7d, 24h), '+7d' for the future, 'now', a date, or Unix seconds. +- `--export-fields` stringSlice — Subset of CSV column keys to include in the export. At most 50 entries. Only used by the export endpoints. · enum: incident_id | title | severity | progress | channel_id | channel_name | team_id | team_name | created_at | seconds_to_ack | seconds_to_close | closed_by | engaged_seconds | hours | notifications | interruptions | acknowledgements | assignments | reassignments | escalations | manual_escalations | timeout_escalations | assigned_to | responders | description | labels | fields | creator_id | creator_name +- `--incident-ids` stringSlice — Filter by incident IDs (MongoDB ObjectIDs). At most 100 entries. +- `--is-my-team` bool — Restrict results to teams the caller belongs to. When true and the caller has no teams, the result set is empty. +- `--k` int64 — Number of top entries to return, between 1 and 100. +- `--label` string (required) — Dimension to aggregate by. · enum: check | resource +- `--orderby` string — Field to sort results by. · enum: total_alert_cnt | total_alert_event_cnt +- `--query` string — Full-text query applied to incident title and description. +- `--responder-ids` intSlice — Filter by responder person IDs. At most 100 entries. +- `--seconds-to-ack-from` int64 — Lower bound (inclusive) on time-to-acknowledge, in seconds. +- `--seconds-to-ack-to` int64 — Upper bound (exclusive) on time-to-acknowledge, in seconds. Must be greater than 'seconds_to_ack_from' when both are set. +- `--seconds-to-close-from` int64 — Lower bound (inclusive) on time-to-close, in seconds. +- `--seconds-to-close-to` int64 — Upper bound (exclusive) on time-to-close, in seconds. Must be greater than 'seconds_to_close_from' when both are set. +- `--severities` stringSlice — Filter by severity. At most 3 entries. · enum: Critical | Warning | Info | Ok +- `--split-hours` bool — When true, metrics are split into 'work'/'sleep'/'off' hour buckets. +- `--start-time` string (required) — Start time, Unix seconds. Must be greater than 0. Accepts a duration (7d, 24h), '+7d' for the future, 'now', a date, or Unix seconds. +- `--team-ids` intSlice — Filter by team IDs. At most 100 entries. +- `--time-zone` string — IANA time zone name used to interpret the time range (e.g. 'Asia/Shanghai'). Defaults to the account time zone. +- body-only (`--data`): fields (object); labels (object) + +### channel +Get channel insight +- `--aggregate-unit` string — Aggregate metrics into time buckets. When set, the time range must cover at least 24 hours; 'day' additionally caps the range at 31 days. · enum: day | week | month +- `--asc` bool — Sort ascending when 'true', descending otherwise. +- `--channel-ids` intSlice — Filter by channel IDs. At most 100 entries. +- `--description-html-to-text` bool — Strip HTML markup from the description column when exporting. +- `--end-time` string (required) — End time, Unix seconds. Must be greater than 'start_time'. Accepts a duration (7d, 24h), '+7d' for the future, 'now', a date, or Unix seconds. +- `--export-fields` stringSlice — Subset of CSV column keys to include in the export. At most 50 entries. Only used by the export endpoints. · enum: incident_id | title | severity | progress | channel_id | channel_name | team_id | team_name | created_at | seconds_to_ack | seconds_to_close | closed_by | engaged_seconds | hours | notifications | interruptions | acknowledgements | assignments | reassignments | escalations | manual_escalations | timeout_escalations | assigned_to | responders | description | labels | fields | creator_id | creator_name +- `--incident-ids` stringSlice — Filter by incident IDs (MongoDB ObjectIDs). At most 100 entries. +- `--is-my-team` bool — Restrict results to teams the caller belongs to. When true and the caller has no teams, the result set is empty. +- `--orderby` string — Field to sort the underlying incident set by. · enum: created_at +- `--query` string — Full-text query applied to incident title and description. +- `--responder-ids` intSlice — Filter by responder person IDs. At most 100 entries. +- `--seconds-to-ack-from` int64 — Lower bound (inclusive) on time-to-acknowledge, in seconds. +- `--seconds-to-ack-to` int64 — Upper bound (exclusive) on time-to-acknowledge, in seconds. Must be greater than 'seconds_to_ack_from' when both are set. +- `--seconds-to-close-from` int64 — Lower bound (inclusive) on time-to-close, in seconds. +- `--seconds-to-close-to` int64 — Upper bound (exclusive) on time-to-close, in seconds. Must be greater than 'seconds_to_close_from' when both are set. +- `--severities` stringSlice — Filter by severity. At most 3 entries. · enum: Critical | Warning | Info | Ok +- `--split-hours` bool — When true, metrics are split into 'work'/'sleep'/'off' hour buckets. +- `--start-time` string (required) — Start time, Unix seconds. Must be greater than 0. Accepts a duration (7d, 24h), '+7d' for the future, 'now', a date, or Unix seconds. +- `--team-ids` intSlice — Filter by team IDs. At most 100 entries. +- `--time-zone` string — IANA time zone name used to interpret the time range (e.g. 'Asia/Shanghai'). Defaults to the account time zone. +- body-only (`--data`): fields (object); labels (object) + +### channel-export +Export channel insight +- `--aggregate-unit` string — Aggregate metrics into time buckets. When set, the time range must cover at least 24 hours; 'day' additionally caps the range at 31 days. · enum: day | week | month +- `--asc` bool — Sort ascending when 'true', descending otherwise. +- `--channel-ids` intSlice — Filter by channel IDs. At most 100 entries. +- `--description-html-to-text` bool — Strip HTML markup from the description column when exporting. +- `--end-time` string (required) — End time, Unix seconds. Must be greater than 'start_time'. Accepts a duration (7d, 24h), '+7d' for the future, 'now', a date, or Unix seconds. +- `--export-fields` stringSlice — Subset of CSV column keys to include in the export. At most 50 entries. Only used by the export endpoints. · enum: incident_id | title | severity | progress | channel_id | channel_name | team_id | team_name | created_at | seconds_to_ack | seconds_to_close | closed_by | engaged_seconds | hours | notifications | interruptions | acknowledgements | assignments | reassignments | escalations | manual_escalations | timeout_escalations | assigned_to | responders | description | labels | fields | creator_id | creator_name +- `--incident-ids` stringSlice — Filter by incident IDs (MongoDB ObjectIDs). At most 100 entries. +- `--is-my-team` bool — Restrict results to teams the caller belongs to. When true and the caller has no teams, the result set is empty. +- `--orderby` string — Field to sort the underlying incident set by. · enum: created_at +- `--query` string — Full-text query applied to incident title and description. +- `--responder-ids` intSlice — Filter by responder person IDs. At most 100 entries. +- `--seconds-to-ack-from` int64 — Lower bound (inclusive) on time-to-acknowledge, in seconds. +- `--seconds-to-ack-to` int64 — Upper bound (exclusive) on time-to-acknowledge, in seconds. Must be greater than 'seconds_to_ack_from' when both are set. +- `--seconds-to-close-from` int64 — Lower bound (inclusive) on time-to-close, in seconds. +- `--seconds-to-close-to` int64 — Upper bound (exclusive) on time-to-close, in seconds. Must be greater than 'seconds_to_close_from' when both are set. +- `--severities` stringSlice — Filter by severity. At most 3 entries. · enum: Critical | Warning | Info | Ok +- `--split-hours` bool — When true, metrics are split into 'work'/'sleep'/'off' hour buckets. +- `--start-time` string (required) — Start time, Unix seconds. Must be greater than 0. Accepts a duration (7d, 24h), '+7d' for the future, 'now', a date, or Unix seconds. +- `--team-ids` intSlice — Filter by team IDs. At most 100 entries. +- `--time-zone` string — IANA time zone name used to interpret the time range (e.g. 'Asia/Shanghai'). Defaults to the account time zone. +- body-only (`--data`): fields (object); labels (object) + +### incident-export +Export insight incidents +- `--asc` bool +- `--channel-ids` intSlice +- `--description-html-to-text` bool +- `--end-time` int64 +- `--export-fields` stringSlice +- `--incident-ids` stringSlice +- `--is-my-team` bool +- `--orderby` string +- `--query` string +- `--responder-ids` intSlice +- `--seconds-to-ack-from` int64 +- `--seconds-to-ack-to` int64 +- `--seconds-to-close-from` int64 +- `--seconds-to-close-to` int64 +- `--severities` stringSlice +- `--start-time` int64 +- `--team-ids` intSlice +- `--time-zone` string +- body-only (`--data`): fields (JSON); labels (JSON) + +### incident-list +List insight incidents +- `--asc` bool — Sort ascending when 'true', descending otherwise. +- `--channel-ids` intSlice — Filter by channel IDs. At most 100 entries. +- `--description-html-to-text` bool — Strip HTML markup from the description column when exporting. +- `--end-time` string (required) — End time, Unix seconds. Must be greater than 'start_time'. Accepts a duration (7d, 24h), '+7d' for the future, 'now', a date, or Unix seconds. +- `--export-fields` stringSlice — Subset of CSV column keys to include in the export. At most 50 entries. Only used by the export endpoints. · enum: incident_id | title | severity | progress | channel_id | channel_name | team_id | team_name | created_at | seconds_to_ack | seconds_to_close | closed_by | engaged_seconds | hours | notifications | interruptions | acknowledgements | assignments | reassignments | escalations | manual_escalations | timeout_escalations | assigned_to | responders | description | labels | fields | creator_id | creator_name +- `--incident-ids` stringSlice — Filter by incident IDs (MongoDB ObjectIDs). At most 100 entries. +- `--is-my-team` bool — Restrict results to teams the caller belongs to. When true and the caller has no teams, the result set is empty. +- `--limit` int64 — Page size, between 1 and 100. Defaults to 20. (1-100) +- `--orderby` string — Field to sort the underlying incident set by. · enum: created_at +- `--page` int64 — Page number, starting at 1. Defaults to 1. (min 1) +- `--query` string — Full-text query applied to incident title and description. +- `--responder-ids` intSlice — Filter by responder person IDs. At most 100 entries. +- `--search-after-ctx` string — Cursor token returned by a previous page. Pass it back to fetch the next page. +- `--seconds-to-ack-from` int64 — Lower bound (inclusive) on time-to-acknowledge, in seconds. +- `--seconds-to-ack-to` int64 — Upper bound (exclusive) on time-to-acknowledge, in seconds. Must be greater than 'seconds_to_ack_from' when both are set. +- `--seconds-to-close-from` int64 — Lower bound (inclusive) on time-to-close, in seconds. +- `--seconds-to-close-to` int64 — Upper bound (exclusive) on time-to-close, in seconds. Must be greater than 'seconds_to_close_from' when both are set. +- `--severities` stringSlice — Filter by severity. At most 3 entries. · enum: Critical | Warning | Info | Ok +- `--start-time` string (required) — Start time, Unix seconds. Must be greater than 0. Accepts a duration (7d, 24h), '+7d' for the future, 'now', a date, or Unix seconds. +- `--team-ids` intSlice — Filter by team IDs. At most 100 entries. +- `--time-zone` string — IANA time zone name used to interpret the time range (e.g. 'Asia/Shanghai'). Defaults to the account time zone. +- body-only (`--data`): fields (object); labels (object) + +### incidents +Query incidents with performance metrics +- `--limit` int +- `--page` int +- `--since` string +- `--until` string + +### responder +Get responder insight +- `--aggregate-unit` string — Aggregate metrics into time buckets. When set, the time range must cover at least 24 hours; 'day' additionally caps the range at 31 days. · enum: day | week | month +- `--asc` bool — Sort ascending when 'true', descending otherwise. +- `--channel-ids` intSlice — Filter by channel IDs. At most 100 entries. +- `--description-html-to-text` bool — Strip HTML markup from the description column when exporting. +- `--end-time` string (required) — End time, Unix seconds. Must be greater than 'start_time'. Accepts a duration (7d, 24h), '+7d' for the future, 'now', a date, or Unix seconds. +- `--export-fields` stringSlice — Subset of CSV column keys to include in the export. At most 50 entries. Only used by the export endpoints. · enum: incident_id | title | severity | progress | channel_id | channel_name | team_id | team_name | created_at | seconds_to_ack | seconds_to_close | closed_by | engaged_seconds | hours | notifications | interruptions | acknowledgements | assignments | reassignments | escalations | manual_escalations | timeout_escalations | assigned_to | responders | description | labels | fields | creator_id | creator_name +- `--incident-ids` stringSlice — Filter by incident IDs (MongoDB ObjectIDs). At most 100 entries. +- `--is-my-team` bool — Restrict results to teams the caller belongs to. When true and the caller has no teams, the result set is empty. +- `--orderby` string — Field to sort the underlying incident set by. · enum: created_at +- `--query` string — Full-text query applied to incident title and description. +- `--responder-ids` intSlice — Filter by responder person IDs. At most 100 entries. +- `--seconds-to-ack-from` int64 — Lower bound (inclusive) on time-to-acknowledge, in seconds. +- `--seconds-to-ack-to` int64 — Upper bound (exclusive) on time-to-acknowledge, in seconds. Must be greater than 'seconds_to_ack_from' when both are set. +- `--seconds-to-close-from` int64 — Lower bound (inclusive) on time-to-close, in seconds. +- `--seconds-to-close-to` int64 — Upper bound (exclusive) on time-to-close, in seconds. Must be greater than 'seconds_to_close_from' when both are set. +- `--severities` stringSlice — Filter by severity. At most 3 entries. · enum: Critical | Warning | Info | Ok +- `--split-hours` bool — When true, metrics are split into 'work'/'sleep'/'off' hour buckets. +- `--start-time` string (required) — Start time, Unix seconds. Must be greater than 0. Accepts a duration (7d, 24h), '+7d' for the future, 'now', a date, or Unix seconds. +- `--team-ids` intSlice — Filter by team IDs. At most 100 entries. +- `--time-zone` string — IANA time zone name used to interpret the time range (e.g. 'Asia/Shanghai'). Defaults to the account time zone. +- body-only (`--data`): fields (object); labels (object) + +### responder-export +Export responder insight +- `--aggregate-unit` string — Aggregate metrics into time buckets. When set, the time range must cover at least 24 hours; 'day' additionally caps the range at 31 days. · enum: day | week | month +- `--asc` bool — Sort ascending when 'true', descending otherwise. +- `--channel-ids` intSlice — Filter by channel IDs. At most 100 entries. +- `--description-html-to-text` bool — Strip HTML markup from the description column when exporting. +- `--end-time` string (required) — End time, Unix seconds. Must be greater than 'start_time'. Accepts a duration (7d, 24h), '+7d' for the future, 'now', a date, or Unix seconds. +- `--export-fields` stringSlice — Subset of CSV column keys to include in the export. At most 50 entries. Only used by the export endpoints. · enum: incident_id | title | severity | progress | channel_id | channel_name | team_id | team_name | created_at | seconds_to_ack | seconds_to_close | closed_by | engaged_seconds | hours | notifications | interruptions | acknowledgements | assignments | reassignments | escalations | manual_escalations | timeout_escalations | assigned_to | responders | description | labels | fields | creator_id | creator_name +- `--incident-ids` stringSlice — Filter by incident IDs (MongoDB ObjectIDs). At most 100 entries. +- `--is-my-team` bool — Restrict results to teams the caller belongs to. When true and the caller has no teams, the result set is empty. +- `--orderby` string — Field to sort the underlying incident set by. · enum: created_at +- `--query` string — Full-text query applied to incident title and description. +- `--responder-ids` intSlice — Filter by responder person IDs. At most 100 entries. +- `--seconds-to-ack-from` int64 — Lower bound (inclusive) on time-to-acknowledge, in seconds. +- `--seconds-to-ack-to` int64 — Upper bound (exclusive) on time-to-acknowledge, in seconds. Must be greater than 'seconds_to_ack_from' when both are set. +- `--seconds-to-close-from` int64 — Lower bound (inclusive) on time-to-close, in seconds. +- `--seconds-to-close-to` int64 — Upper bound (exclusive) on time-to-close, in seconds. Must be greater than 'seconds_to_close_from' when both are set. +- `--severities` stringSlice — Filter by severity. At most 3 entries. · enum: Critical | Warning | Info | Ok +- `--split-hours` bool — When true, metrics are split into 'work'/'sleep'/'off' hour buckets. +- `--start-time` string (required) — Start time, Unix seconds. Must be greater than 0. Accepts a duration (7d, 24h), '+7d' for the future, 'now', a date, or Unix seconds. +- `--team-ids` intSlice — Filter by team IDs. At most 100 entries. +- `--time-zone` string — IANA time zone name used to interpret the time range (e.g. 'Asia/Shanghai'). Defaults to the account time zone. +- body-only (`--data`): fields (object); labels (object) + +### team +Get team insight +- `--aggregate-unit` string — Aggregate metrics into time buckets. When set, the time range must cover at least 24 hours; 'day' additionally caps the range at 31 days. · enum: day | week | month +- `--asc` bool — Sort ascending when 'true', descending otherwise. +- `--channel-ids` intSlice — Filter by channel IDs. At most 100 entries. +- `--description-html-to-text` bool — Strip HTML markup from the description column when exporting. +- `--end-time` string (required) — End time, Unix seconds. Must be greater than 'start_time'. Accepts a duration (7d, 24h), '+7d' for the future, 'now', a date, or Unix seconds. +- `--export-fields` stringSlice — Subset of CSV column keys to include in the export. At most 50 entries. Only used by the export endpoints. · enum: incident_id | title | severity | progress | channel_id | channel_name | team_id | team_name | created_at | seconds_to_ack | seconds_to_close | closed_by | engaged_seconds | hours | notifications | interruptions | acknowledgements | assignments | reassignments | escalations | manual_escalations | timeout_escalations | assigned_to | responders | description | labels | fields | creator_id | creator_name +- `--incident-ids` stringSlice — Filter by incident IDs (MongoDB ObjectIDs). At most 100 entries. +- `--is-my-team` bool — Restrict results to teams the caller belongs to. When true and the caller has no teams, the result set is empty. +- `--orderby` string — Field to sort the underlying incident set by. · enum: created_at +- `--query` string — Full-text query applied to incident title and description. +- `--responder-ids` intSlice — Filter by responder person IDs. At most 100 entries. +- `--seconds-to-ack-from` int64 — Lower bound (inclusive) on time-to-acknowledge, in seconds. +- `--seconds-to-ack-to` int64 — Upper bound (exclusive) on time-to-acknowledge, in seconds. Must be greater than 'seconds_to_ack_from' when both are set. +- `--seconds-to-close-from` int64 — Lower bound (inclusive) on time-to-close, in seconds. +- `--seconds-to-close-to` int64 — Upper bound (exclusive) on time-to-close, in seconds. Must be greater than 'seconds_to_close_from' when both are set. +- `--severities` stringSlice — Filter by severity. At most 3 entries. · enum: Critical | Warning | Info | Ok +- `--split-hours` bool — When true, metrics are split into 'work'/'sleep'/'off' hour buckets. +- `--start-time` string (required) — Start time, Unix seconds. Must be greater than 0. Accepts a duration (7d, 24h), '+7d' for the future, 'now', a date, or Unix seconds. +- `--team-ids` intSlice — Filter by team IDs. At most 100 entries. +- `--time-zone` string — IANA time zone name used to interpret the time range (e.g. 'Asia/Shanghai'). Defaults to the account time zone. +- body-only (`--data`): fields (object); labels (object) + +### team-export +Export team insight +- `--aggregate-unit` string — Aggregate metrics into time buckets. When set, the time range must cover at least 24 hours; 'day' additionally caps the range at 31 days. · enum: day | week | month +- `--asc` bool — Sort ascending when 'true', descending otherwise. +- `--channel-ids` intSlice — Filter by channel IDs. At most 100 entries. +- `--description-html-to-text` bool — Strip HTML markup from the description column when exporting. +- `--end-time` string (required) — End time, Unix seconds. Must be greater than 'start_time'. Accepts a duration (7d, 24h), '+7d' for the future, 'now', a date, or Unix seconds. +- `--export-fields` stringSlice — Subset of CSV column keys to include in the export. At most 50 entries. Only used by the export endpoints. · enum: incident_id | title | severity | progress | channel_id | channel_name | team_id | team_name | created_at | seconds_to_ack | seconds_to_close | closed_by | engaged_seconds | hours | notifications | interruptions | acknowledgements | assignments | reassignments | escalations | manual_escalations | timeout_escalations | assigned_to | responders | description | labels | fields | creator_id | creator_name +- `--incident-ids` stringSlice — Filter by incident IDs (MongoDB ObjectIDs). At most 100 entries. +- `--is-my-team` bool — Restrict results to teams the caller belongs to. When true and the caller has no teams, the result set is empty. +- `--orderby` string — Field to sort the underlying incident set by. · enum: created_at +- `--query` string — Full-text query applied to incident title and description. +- `--responder-ids` intSlice — Filter by responder person IDs. At most 100 entries. +- `--seconds-to-ack-from` int64 — Lower bound (inclusive) on time-to-acknowledge, in seconds. +- `--seconds-to-ack-to` int64 — Upper bound (exclusive) on time-to-acknowledge, in seconds. Must be greater than 'seconds_to_ack_from' when both are set. +- `--seconds-to-close-from` int64 — Lower bound (inclusive) on time-to-close, in seconds. +- `--seconds-to-close-to` int64 — Upper bound (exclusive) on time-to-close, in seconds. Must be greater than 'seconds_to_close_from' when both are set. +- `--severities` stringSlice — Filter by severity. At most 3 entries. · enum: Critical | Warning | Info | Ok +- `--split-hours` bool — When true, metrics are split into 'work'/'sleep'/'off' hour buckets. +- `--start-time` string (required) — Start time, Unix seconds. Must be greater than 0. Accepts a duration (7d, 24h), '+7d' for the future, 'now', a date, or Unix seconds. +- `--team-ids` intSlice — Filter by team IDs. At most 100 entries. +- `--time-zone` string — IANA time zone name used to interpret the time range (e.g. 'Asia/Shanghai'). Defaults to the account time zone. +- body-only (`--data`): fields (object); labels (object) + +### top-alerts +Query top alert sources by label +- `--label` string +- `--limit` int +- `--since` string +- `--until` string + +<!-- GENERATED:insight END --> + +## Time-flag families (critical — wrong name = unknown flag error) + +Two families with **identical value syntax** but different flag names: + +| flag names | required? | commands | +|---|---|---| +| `--since` / `--until` | optional (defaults: `7d` / `now`) | `top-alerts`, `incidents` | +| `--start-time` / `--end-time` | **both required** | all others (`account`, `alert-topk-by-label`, `channel`, `channel-export`, `incident-export`, `incident-list`, `responder`, `responder-export`, `team`, `team-export`) | + +Both families accept: relative duration (`30d`, `24h`), `now`, `+7d`, a date, or Unix seconds — **except** `incident-export`, which takes **epoch seconds only** for `--start-time`/`--end-time` (flag type is int64). + +## Gotchas + +- **Two time-flag families.** Passing `--since` to an `--start-time` command (or vice-versa) fails with `unknown flag`. See the table above. +- **`incident-export --start-time`/`--end-time` are epoch seconds only**, not relative strings — use `$(date -v-30d +%s)`. All other `--start-time` commands accept `30d`/`now`. +- **`top-alerts --label`** only accepts `check` or `resource`. Any other value (e.g. `integration_name`) returns HTTP 400. +- **Export commands output raw CSV, not JSON.** Redirect to a file; dumping CSV into context burns tokens and is unreadable. No `--limit`/`--page` — exports emit the full filtered set. +- **`insight incidents` and `incident-list` are siblings**, not the same. `incidents` uses `--since`/`--until`, paginates, and is token-light. `incident-list` uses `--start-time`/`--end-time`, adds `--severities`/`--responder-ids`/`--query`/cursor (`--search-after-ctx`), and is the filterable variant. +- **All `insight` commands hit the OLAP backend.** HTTP 500 means the backend is down — report it, do not retry. +- **Empty result is authoritative.** A zero-row response means no matching data for that scope/window — do not widen filters or re-query with shifted keywords. +- **`--aggregate-unit`** (on `account`, `alert-topk-by-label`, `channel`, `responder`, `team` and their exports) splits results into time buckets: `day` / `week` / `month`. When set, the window must span ≥24 h; `day` additionally caps the range at 31 days. + +## Worked example — identify noisiest check sources + +```bash +# Top-20 noisiest check sources in the past 7 days, sorted by raw event count +fduty insight alert-topk-by-label \ + --label check \ + --k 20 \ + --orderby total_alert_event_cnt \ + --start-time 7d --end-time now \ + --output-format toon +# → returns label, total_alert_cnt, total_alert_event_cnt per check +# Drill into a specific team: add --team-ids <id> +``` diff --git a/skills/flashduty/reference/member.md b/skills/flashduty/reference/member.md new file mode 100644 index 0000000..bf1b3c5 --- /dev/null +++ b/skills/flashduty/reference/member.md @@ -0,0 +1,147 @@ +# fduty member — command card + +Prereq: `SKILL.md` read. `invite` sends invitation emails immediately (up to 20 per call). `delete` is **irreversible** — it removes the member from the organization; default safety check rejects deletes when the member is referenced by escalation rules or schedules (pass `--is-force` to bypass). `role-update` **replaces** all role assignments atomically; `role-grant`/`role-revoke` are additive/subtractive. + +## Route here when + +"成员 / 邀请 / 用户 / 角色 / member / invite / user profile / role assignment / org roster" → **member**. Sibling domains: `team` (team membership lists, not org-level members); `role` (role definitions — get role IDs here first). Key IDs: **`member_id` (int)** from `member list`; **`role_id` (int)** from `fduty role list`. + +## Intent → verb + +| want | verb | +|---|---| +| find a member / look up their ID | `list` | +| who am I (current user) | `info` | +| update a member's profile fields | `info-reset` | +| invite new members to the org | `invite` | +| remove a member from the org | `delete` | +| add roles without touching others | `role-grant` | +| remove specific roles | `role-revoke` | +| set exactly these roles (replace all) | `role-update` | + +## Hot flow — invite then assign role + +```bash +# 1. find available role IDs +fduty role list --output-format toon + +# 2. invite up to 20 members in one call; members array MUST go via --data +fduty member invite \ + --data '{"members":[{"email":"alice@example.com","member_name":"Alice","role_ids":[<role_id>]},{"email":"bob@example.com","member_name":"Bob","role_ids":[<role_id>]}]}' +# → returns items[].member_id for each new member + +# 3. confirm they appear (status will be 'pending' until invite accepted) +fduty member list --query "alice" --output-format toon +``` + +## Hot flow — role change for an existing member + +```bash +# 1. look up the member +fduty member list --query "alice" --output-format toon +# note member_id and current account_role_ids + +# 2a. add a role without disturbing others (role-id is POSITIONAL) +fduty member role-grant <role_id> --member-id <member_id> + +# 2b. OR: set the complete new role list (role-ids positional; replaces ALL roles) +fduty member role-update <role_id> <role_id2> --member-id <member_id> + +# 3. verify +fduty member list --query "alice" --output-format toon +``` + +<!-- GENERATED:member START · 由 fduty __dump-commands 同步 · 勿手改 fence 内 --> + +### delete +Delete member +- `--country-code` string — Phone country code, used with phone +- `--email` string — Email address +- `--is-force` bool — Force delete. Defaults to false, which checks for references from escalation rules, schedules, etc. Set to true to skip the reference check and delete immediately +- `--member-id` int64 — Member ID +- `--member-name` string — Member name +- `--phone` string — Phone number +- `--ref-id` string — External reference ID + +### info +Get current member info + +### info-reset <member-id> +Reset member info +- `--avatar` string — Avatar URL +- `--country-code` string — Country code +- `--email` string — Email address +- `--locale` string — Locale · enum: zh-CN | en-US +- `<member-id>` (positional, required) int64 — Member ID of the member to update +- `--member-name` string — Display name (2-39 chars) +- `--phone` string — Phone number +- `--time-zone` string — Time zone + +### invite +Invite members +- `--from` string — Invite source context +- body-only (`--data`): members (array<object>) (required) + +### list +List members +- `--asc` bool — Ascending order +- `--limit` int64 — Page size (1-100) +- `--orderby` string — Sort field · enum: created_at | updated_at +- `--page` int64 — Page number (min 1) +- `--query` string — Search keyword +- `--role-id` int64 — Filter by role ID +- `--search-after-ctx` string + +### role-grant <role-id> [<id2>...] +Grant role to member +- `--member-id` int64 (required) — Member ID +- `<role-ids>` (positional, required) intSlice — Role IDs to grant; appended to the member's current roles (duplicates are deduplicated). + +### role-revoke <role-id> [<id2>...] +Revoke role from member +- `--member-id` int64 (required) — Member ID +- `<role-ids>` (positional, required) intSlice — Role IDs to remove from the member. + +### role-update <role-id> [<id2>...] +Update member roles +- `--member-id` int64 (required) — Member ID +- `<role-ids>` (positional, required) intSlice — New set of role IDs + +<!-- GENERATED:member END --> + +## Status values + +`member list` returns `status` on each row: + +- `enabled` — active, can log in +- `pending` — invitation sent, not yet accepted +- `deleted` — removed from the org (only visible if the API returns them; typically filtered out) + +## Gotchas + +- **`invite` members array is body-only — use `--data`.** Individual members cannot be passed as flat flags; the `members` array (with nested `role_ids`, `email`, `phone`, etc.) lives only in the JSON body. Up to 20 members per call. +- **`info-reset <member-id>` is POSITIONAL.** Pass the member ID as the first bare argument, not `--member-id`: `fduty member info-reset <member_id> --member-name "New Name"`. The `--member-id` flag exists but the positional form is required per the `use` field. +- **`role-grant / role-revoke / role-update` — role IDs are POSITIONAL.** All three verbs take role IDs as positional args: `fduty member role-grant <role_id> [<role_id2>...] --member-id <member_id>`. The `--role-ids` flag also exists but the positional form is authoritative. +- **`role-update` is a full replacement.** List current roles with `member list` first; omitting a role removes it. +- **`delete` default is safe** (checks escalation rules / schedules). If it rejects with a reference error, review those references before using `--is-force`. +- **Empty `member list` result is authoritative** — if `--query` returns nothing the member does not exist; do not widen the query. + +## Worked example + +Look up a member then promote them to a new role: + +```bash +# find member +fduty member list --query "carol" --output-format toon +# → member_id=4217, account_role_ids=[2] + +# find the admin role ID +fduty role list --output-format toon +# → role_id=1 is "Admin" + +# grant admin role (keeps existing role 2) +fduty member role-grant 1 --member-id 4217 + +# confirm +fduty member list --query "carol" --output-format toon +``` diff --git a/skills/flashduty/reference/monit-agent.md b/skills/flashduty/reference/monit-agent.md new file mode 100644 index 0000000..5964907 --- /dev/null +++ b/skills/flashduty/reference/monit-agent.md @@ -0,0 +1,64 @@ +# fduty monit-agent — command card + +Prereq: `SKILL.md` read. On-box diagnostics: run diagnostic tools on a host or database target via its installed monit-agent. Both verbs are read-only probes. Pairs with **`monit-query`** (datasource-side RCA). + +## Route here when + +"主机诊断 / 进程 / 负载 / 锁 / 慢查询 / mysql 诊断 / 可达性 / on-box / 看那台机器上发生了什么" → **monit-agent**. You need a **target locator** (host/instance identifier). Always `catalog` first to learn what tools that target exposes — tool names are not guessable. + +## Intent → verb + +| want | verb | +|---|---| +| list the diagnostic tools available for a target | `catalog --target-locator <t>` | +| run up to 8 of those tools on the target | `invoke --target-locator <t> --data '{"tools":[…]}'` | + +## Hot flow — diagnose a host + +```bash +# 1. see which tools this target exposes (tool names come from here, never guess) +fduty monit-agent catalog --target-locator <host-or-instance> --output-format toon +# 2. invoke up to 8 tools concurrently; tool names taken verbatim from the catalog +fduty monit-agent invoke --target-locator <host-or-instance> \ + --data '{"tools":[{"tool":"host.top","params":{}},{"tool":"host.disk","params":{}}]}' +``` + +<!-- GENERATED:monit-agent START · 由 fduty __dump-commands 同步 · 勿手改 fence 内 --> + +### catalog +List the diagnostic tools the agent exposes for a target +- `--target-kind` string +- `--target-locator` string + +### invoke +Run up to 8 monit-agent tools concurrently on a target +- `--target-kind` string +- `--target-locator` string + +<!-- GENERATED:monit-agent END --> + +## Key concepts + +- **`catalog` → `invoke` is the order.** `catalog` returns each tool's `name` (+ `input_schema` for its params); `invoke` runs them. Tool names are target-specific — take them verbatim from the catalog, do not invent. +- **`invoke` carries the tool list in `--data`**: `{"tools":[{"tool":"<name>","params":{…}}, … up to 8]}`. `params` defaults to `{}`. `--target-locator` (required) and `--target-kind` override matching `--data` keys. +- Each result carries `agent_elapsed_ms` (agent-side) vs `e2e_elapsed_ms` (end-to-end) — a large gap signals network/edge slowness, not a slow tool. + +## Gotchas + +- **Quoted/comma params (e.g. SQL) → use `--data -` with a heredoc** to avoid shell-quoting hell: + ```bash + fduty monit-agent invoke --target-locator 'db-1' --data - <<'FDUTY' + {"tools":[{"tool":"mysql.query","params":{"sql":"SELECT a, b FROM t WHERE s='RUNNING'","max_rows":50}}]} + FDUTY + ``` +- **`ambiguous_target_kind` error** ⇒ the locator matched multiple kinds; re-issue with `--target-kind`. +- A `target_unavailable` / `target_unreachable` error means the agent isn't connected — report it; don't retry endlessly or fall back to SSH. +- Per-tool errors (`timeout`, `denied`, `unknown_tool`…) are reported per result, mutually exclusive with that tool's `data`. + +## Worked example — top processes + disk on a host + +```bash +fduty monit-agent invoke --target-locator web-prod-3 \ + --data '{"tools":[{"tool":"host.top","params":{"limit":10}},{"tool":"host.disk","params":{}}]}' \ + --output-format toon +``` diff --git a/skills/flashduty/reference/monit-query.md b/skills/flashduty/reference/monit-query.md new file mode 100644 index 0000000..cd121ca --- /dev/null +++ b/skills/flashduty/reference/monit-query.md @@ -0,0 +1,68 @@ +# fduty monit-query — command card + +Prereq: `SKILL.md` read. Datasource-side RCA: query a monitoring datasource directly. Both verbs are read-only. Pairs with **`monit`** (rule config) and **`monit-agent`** (on-box host/db diagnostics). + +## Route here when + +"指标查询 / 日志查询 / PromQL / LogsQL / SQL 验证 / 趋势 / 日志聚类 / 数据源 RCA" → **monit-query**. You need a **datasource name + type** — get them from `fduty monit datasource-list` first; **never guess a datasource name** (a wrong name 400s `can not find datasource`). + +## Intent → verb + +| want | verb | +|---|---| +| pre-clustered RCA findings (surging log patterns / notable metric trends) | `diagnose --operation log_patterns\|metric_trends` | +| run a raw query and get values/rows back as the datasource returns them | `rows --expr "<query>"` | + +## Hot flow — diagnose a noisy datasource + +```bash +# 1. discover the real datasource name + type (never guess) +fduty monit datasource-list --output-format toon +# 2a. validate / run a raw query — time goes INSIDE the query, there are NO time flags +fduty monit-query rows --ds-name <name> --ds-type <type> --expr "rate(http_requests_total[5m])" +# 2b. or get pre-clustered RCA over a window +fduty monit-query diagnose --ds-name <name> --ds-type <type> \ + --operation log_patterns --time-start -1h --time-end now +``` + +<!-- GENERATED:monit-query START · 由 fduty __dump-commands 同步 · 勿手改 fence 内 --> + +### diagnose +Pre-clustered RCA findings (log_patterns or metric_trends) +- `--ds-name` string +- `--ds-type` string +- `--input-query` string +- `--max-logs` int +- `--max-patterns` int +- `--operation` string +- `--time-end` string +- `--time-start` string +- `--timeout-seconds` int + +### rows +Raw datasource passthrough (returns values/rows as the datasource itself would) +- `--args` stringSlice +- `--ds-name` string +- `--ds-type` string +- `--expr` string + +<!-- GENERATED:monit-query END --> + +## Key concepts + +- **`rows` = raw passthrough.** Response `data` is a **top-level array** of row objects — pipe `jq '.[]'`, NOT `.items[]`. Numeric fields under `values` (metric canonical key `__value__`); labels/columns under `fields`. **Time belongs in the query expression**, not in flags. +- **`diagnose` = pre-clustered findings.** `--operation log_patterns` returns surging/new/gone log templates (RCA-sorted); `metric_trends` returns notable series (current vs baseline). Takes `--time-start` / `--time-end` (relative like `-1h`, `now`, or unix seconds). + +## Gotchas + +- **Discover the datasource name first** (`monit datasource-list`). A wrong/guessed name 400s `can not find datasource` — re-list, don't retry variants. +- **A 5xx or HTML-body error is TRANSIENT** — retry the same call ≤3×. Do NOT fall back to SSH, `monit-agent`, or incident search on a transient datasource error. +- `rows` has **no time flags** — putting `--time-start` on `rows` is wrong; embed the range in `--expr`. +- Empty results = the query genuinely matched nothing in that window — report it, don't widen blindly. + +## Worked example — surging log patterns in the last hour + +```bash +fduty monit-query diagnose --ds-name prod-loki --ds-type loki \ + --operation log_patterns --time-start -1h --time-end now --output-format toon +``` diff --git a/skills/flashduty/reference/monit.md b/skills/flashduty/reference/monit.md new file mode 100644 index 0000000..b18b785 --- /dev/null +++ b/skills/flashduty/reference/monit.md @@ -0,0 +1,346 @@ +# fduty monit — command card + +Prereq: `SKILL.md` read. **SKILL.md + this card = full competence on monitors — no `--help` needed.** Read verbs are free. Mutating verbs (`datasource-create/update/delete`, `rule-create/update/delete/delete-batch/import`, `rule-update-fields`, `rule-move`, `store-ruleset-create/update/delete`, `tools-invoke`) change state — confirm before running. `datasource-delete` and `rule-delete-batch` are **irreversible**. + +## Route here when + +"监控规则 / 告警规则 / 数据源 / PromQL查询 / 日志查询 / 诊断 / 监控目标 / 主机工具" or "alert rule / datasource / metric query / log pattern / diagnose / monitored host / tools catalog" → **monit**. NOT `incident` (that domain = the alert graph after rules fire). Key IDs: **rule ID (int)** from `rule-list-basic`; **datasource name (string)** — never guess, always discover via `datasource-list`. + +## Intent → verb + +| want | verb | +|---|---| +| list all datasources (by type) | `datasource-list` | +| datasource detail | `datasource-info` | +| create / update a datasource | `datasource-create` / `datasource-update` | +| delete a datasource | `datasource-delete` | +| SLS project/logstore discovery | `datasource-sls-projects` / `datasource-sls-logstores` | +| list alert rules (all or by folder) | `rule-list-basic` | +| full rule config | `rule-info` | +| create / update a rule | `rule-create` / `rule-update` | +| delete one or many rules | `rule-delete` / `rule-delete-batch` | +| move rules to another folder | `rule-move` | +| toggle enabled/channels in bulk | `rule-update-fields` | +| rule trigger status by folder | `rule-status` / `rule-counter-status` | +| rule change history | `rule-audits` → detail via `rule-audit-detail` | +| export / import rules (backup/migrate) | `rule-export` / `rule-import` | +| what datasource types support rules | `rule-dstypes` | +| per-channel / per-node / total counters | `rule-counter-channel` / `rule-counter-node` / `rule-counter-total` | +| run ad-hoc PromQL / SQL / LogQL | `query-rows` | +| log pattern clustering / trend RCA | `query-diagnose` | +| list monitored hosts/targets | `targets` | +| what tools a target exposes | `tools-catalog` | +| run host/db diagnostic tools | `tools-invoke` | +| store ruleset CRUD | `store-ruleset-create/list/info/update/delete` | + +## Hot flow — ad-hoc query + diagnose + +```bash +# 1. discover the real datasource name — NEVER guess +fduty monit datasource-list --output-format toon +fduty monit datasource-list --type prometheus --output-format toon + +# 2a. point-in-time query (PromQL/SQL/LogQL); ALL time range goes INSIDE --expr +fduty monit query-rows --ds-type prometheus --ds-name <ds-name> \ + --expr 'rate(http_requests_total{job="api"}[5m])' --output-format toon + +# 2b. log pattern RCA over last 15 min (time_range via --data; omit = last 15 min default) +fduty monit query-diagnose --ds-type loki --ds-name <ds-name> \ + --data '{"input":{"query":"{app=\"payment\"} |= \"error\""}}' + +# 2c. metric trend analysis with explicit window +fduty monit query-diagnose --ds-type prometheus --ds-name <ds-name> \ + --data '{"input":{"query":"rate(http_errors_total[5m])"},"time_range":{"start":1718780000,"end":1718783600}}' +``` + +## Hot flow — host diagnostics + +```bash +# 1. find the target locator (prefix search; --keyword is prefix-only) +fduty monit targets --keyword prod-web --output-format toon + +# 2. discover what tools the target exposes +fduty monit tools-catalog --target-locator <hostname-or-ip> --output-format toon + +# 3. invoke tools (up to 8 concurrently); use heredoc to avoid shell quoting hell +fduty monit tools-invoke --target-locator <hostname-or-ip> --output-format toon --data - <<'EOF' +{"tools":[{"tool":"host.cpu","params":{}},{"tool":"host.mem","params":{}}]} +EOF +``` + +<!-- GENERATED:monit START · 由 fduty __dump-commands 同步 · 勿手改 fence 内 --> + +### datasource-create +Create datasource +- `--address` string — Connection address. For Prometheus/Loki/VictoriaLogs: HTTP URL. For MySQL/Oracle/Postgres/ClickHouse: 'host:port'. For SLS: endpoint without http/https prefix. Not required for Elasticsearch cloud deployment. +- `--edge-cluster-name` string (required) — Monitors edge cluster name responsible for evaluating rules using this datasource. +- `--id` int64 — Datasource ID. Required for update; omit for create. +- `--name` string (required) — Datasource display name. +- `--note` string — Optional description. +- `--type-ident` string (required) — Datasource type identifier. Allowed: 'prometheus', 'loki', 'mysql', 'oracle', 'postgres', 'clickhouse', 'elasticsearch', 'sls', 'victorialogs'. +- body-only (`--data`): payload (object) (required) + +### datasource-delete +Delete datasource +- `--id` int64 (required) — Resource ID. + +### datasource-info +Get datasource detail +- `--id` int64 (required) — Resource ID. + +### datasource-list +List datasources +- `--type` string — Filter by datasource type identifier. Omit to return all types. Allowed values: 'prometheus', 'loki', 'mysql', 'oracle', 'postgres', 'clickhouse', 'elasticsearch', 'sls', 'victorialogs'. + +### datasource-sls-logstores +List SLS logstores +- `--id` int64 — SLS datasource ID. +- `--offset` int64 — Pagination offset. +- `--project` string — SLS project name. +- `--size` int64 — Page size. + +### datasource-sls-projects +List SLS projects +- `--id` int64 — SLS datasource ID. +- `--offset` int64 — Pagination offset. +- `--query` string — Name prefix filter. +- `--size` int64 — Page size. + +### datasource-update +Update datasource +- `--address` string — Connection address. For Prometheus/Loki/VictoriaLogs: HTTP URL. For MySQL/Oracle/Postgres/ClickHouse: 'host:port'. For SLS: endpoint without http/https prefix. Not required for Elasticsearch cloud deployment. +- `--edge-cluster-name` string (required) — Monitors edge cluster name responsible for evaluating rules using this datasource. +- `--id` int64 — Datasource ID. Required for update; omit for create. +- `--name` string (required) — Datasource display name. +- `--note` string — Optional description. +- `--type-ident` string (required) — Datasource type identifier. Allowed: 'prometheus', 'loki', 'mysql', 'oracle', 'postgres', 'clickhouse', 'elasticsearch', 'sls', 'victorialogs'. +- body-only (`--data`): payload (object) (required) + +### query-diagnose +Diagnose data source +- `--account-id` int64 — Optional consistency check. Must equal the authenticated account when supplied. +- `--ds-name` string (required) — Data source name configured under the tenant. +- `--ds-type` string (required) — Data source type. 'log_patterns' supports 'loki' and 'victorialogs'; 'metric_trends' supports 'prometheus'. +- `--operation` string — Diagnostic operation. When omitted, inferred from 'ds_type' (loki / victorialogs → 'log_patterns', prometheus → 'metric_trends'). Other sources must specify explicitly. · enum: log_patterns | metric_trends +- body-only (`--data`): input (object) (required); methods (array<object>); options (object); time_range (object) + +### query-rows +Query data source rows +- `--account-id` int64 — Optional consistency check. Must equal the authenticated account when supplied; mismatched values are rejected. Business execution always uses the authenticated account. +- `--delay-seconds` int64 — Look-back offset in seconds applied to point-in-time queries (Prometheus, Loki stats, VictoriaLogs stats). Ignored for raw / detail queries. +- `--ds-name` string (required) — Data source name; must match a configured data source under the tenant. +- `--ds-type` string (required) — Data source type; must match a configured data source under the tenant. Examples: 'prometheus', 'loki', 'victorialogs', 'sls', 'elasticsearch', 'mysql', 'postgres', 'oracle', 'clickhouse'. +- `--expr` string (required) — Query expression. Syntax depends on 'ds_type' and is interpreted by the corresponding monit-edge client (PromQL for Prometheus, LogQL for Loki, SQL for SQL sources, etc.). +- body-only (`--data`): args (object) + +### rule-audit-detail +Get rule audit snapshot +- `--id` int64 (required) — Audit record ID — the 'id' of an audit row returned by 'POST /monit/rule/audits', NOT the rule ID. Passing a rule ID returns HTTP 400. + +### rule-audits +List rule change history +- `--id` int64 (required) — Rule ID. + +### rule-counter-channel +Get rule counts by channel + +### rule-counter-node +Get rule counts by folder node + +### rule-counter-status +Get rule status counters for top-level folders + +### rule-counter-total +Get rule counter time series + +### rule-create +Create alert rule +- `--account-id` int64 +- `--channel-ids` intSlice — Channel IDs to send alerts to. +- `--created-at` int64 +- `--creator-id` int64 +- `--creator-name` string +- `--cron-pattern` string — 5-field cron schedule. +- `--debug-log-enabled` bool +- `--delay-seconds` int64 +- `--description` string +- `--description-type` string — enum: text | markdown +- `--ds-ids` intSlice — Specific data source IDs. +- `--ds-list` stringSlice — Data source name patterns (supports wildcards). +- `--ds-type` string — Data source type. +- `--enabled` bool +- `--folder-id` int64 — Folder the rule belongs to. +- `--id` int64 +- `--name` string — Rule name. +- `--repeat-interval` int64 — Notification repeat interval in seconds. +- `--repeat-total` int64 — Max number of repeat notifications. +- `--updated-at` int64 +- `--updater-id` int64 +- `--updater-name` string +- body-only (`--data`): annotations (object); enabled_times (array<object>); labels (object); rule_configs (object) + +### rule-delete +Delete alert rule +- `--id` int64 (required) — Rule ID. + +### rule-delete-batch +Batch delete alert rules +- `--ids` intSlice (required) — Rule IDs. + +### rule-dstypes +List available datasource types + +### rule-export +Export alert rules +- `--ids` intSlice (required) — Rule IDs. + +### rule-import +Import alert rules + +### rule-info +Get alert rule detail +- `--id` int64 (required) — Rule ID. + +### rule-list-basic +List alert rules +- `--folder-id` int64 — Folder ID. 0 to list all accessible rules. + +### rule-move +Move alert rules to folder +- `--dest-folder-id` int64 (required) — Destination folder ID. +- `--ids` intSlice (required) — Rule IDs to move. + +### rule-status +Get rule trigger status under folder +- `--folder-id` int64 — Folder ID. 0 for all. + +### rule-update +Update alert rule +- `--account-id` int64 +- `--channel-ids` intSlice — Channel IDs to send alerts to. +- `--created-at` int64 +- `--creator-id` int64 +- `--creator-name` string +- `--cron-pattern` string — 5-field cron schedule. +- `--debug-log-enabled` bool +- `--delay-seconds` int64 +- `--description` string +- `--description-type` string — enum: text | markdown +- `--ds-ids` intSlice — Specific data source IDs. +- `--ds-list` stringSlice — Data source name patterns (supports wildcards). +- `--ds-type` string — Data source type. +- `--enabled` bool +- `--folder-id` int64 — Folder the rule belongs to. +- `--id` int64 +- `--name` string — Rule name. +- `--repeat-interval` int64 — Notification repeat interval in seconds. +- `--repeat-total` int64 — Max number of repeat notifications. +- `--updated-at` int64 +- `--updater-id` int64 +- `--updater-name` string +- body-only (`--data`): annotations (object); enabled_times (array<object>); labels (object); rule_configs (object) + +### rule-update-fields +Batch update rule fields +- `--channel-ids` intSlice +- `--cron-pattern` string +- `--debug-log-enabled` bool +- `--delay-seconds` int64 +- `--description` string +- `--ds-ids` intSlice +- `--ds-list` stringSlice +- `--ds-type` string +- `--enabled` bool +- `--fields` stringSlice (required) — Field names to update. +- `--ids` intSlice (required) — Rule IDs to update. +- `--repeat-interval` int64 +- `--repeat-total` int64 +- body-only (`--data`): annotations (object); enabled_times (array<object>); labels (object) + +### store-ruleset-create +Create ruleset +- `--note` string (required) — Description or title of the ruleset. +- `--open-flag` int64 — Sharing scope. '0' = private (creator only), '1' = account-shared, '2' = public. Defaults to '0' if omitted. +- `--payload` string (required) — JSON string containing the alert rule definitions. +- `--type-ident` string (required) — Datasource type identifier this ruleset applies to, e.g. 'prometheus'. + +### store-ruleset-delete +Delete ruleset +- `--id` int64 (required) — Resource ID. + +### store-ruleset-info +Get ruleset detail +- `--id` int64 (required) — Resource ID. + +### store-ruleset-list +List rulesets +- `--type-ident` string (required) — Datasource type identifier to filter by, e.g. 'prometheus'. + +### store-ruleset-update +Update ruleset +- `--id` int64 (required) — Ruleset ID to update. +- `--note` string (required) — New description. +- `--open-flag` int64 — New sharing scope. '0' = private, '1' = account-shared, '2' = public. +- `--payload` string (required) — New JSON string of alert rule definitions. + +### targets +List monitored targets +- `--account-id` int64 — Optional consistency check. Must equal the authenticated account when supplied. +- `--cursor` string — Opaque pagination cursor from the previous response's 'next_cursor'. Omit / pass empty string for the first page. Reset whenever 'keyword', 'limit', or tenant changes. +- `--keyword` string — Prefix match against 'target_locator'. ASCII only, no whitespace, no '|', max 256 bytes. Substring search is not supported. +- `--limit` int64 — Page size. Default 50, max 200. (max 200) + +### tools-catalog +List target tool catalog +- `--account-id` int64 — Optional consistency check. Must equal the authenticated account when supplied. +- `--include-output-shape` bool — When true, each tool entry includes its 'output_shape' JSON Schema. Defaults to false to keep responses small for LLM consumption. +- `--target-kind` string — Optional target kind. When omitted webapi auto-infers across currently known kinds. Built-in kinds: 'host', 'mysql'. Required on retry when the previous call returned 'ambiguous_target_kind'. +- `--target-locator` string (required) — Target identifier (host name, MySQL address, …). Max 256 bytes; no whitespace, control characters, or '|'. + +### tools-invoke +Invoke target tools +- `--account-id` int64 — Optional consistency check. Must equal the authenticated account when supplied. +- `--target-kind` string — Optional target kind; auto-inferred when omitted. +- `--target-locator` string (required) — Target identifier. Same validation rules as '/monit/tools/catalog'. +- body-only (`--data`): tools (array<object>) (required) + +<!-- GENERATED:monit END --> + +## Key concepts + +**Check types in `rule_configs`** — three independent checks per rule; enable one or more: +- `check_threshold` — fires when a PromQL value crosses `critical` / `warning` / `info` thresholds (string expressions). +- `check_anydata` — fires when the query returns any rows (useful for log-pattern rules). +- `check_nodata` — fires when the query returns no data (detect silent failures). + +**Severity enum** (inside `check_*`): `Critical` · `Warning` · `Info` (capital first letter; lowercase is rejected). + +**Query name** — `rule_configs.queries[].name` is a single letter (e.g. `A`, `B`). `R` is reserved — do not use it. + +**`operation` on `query-diagnose`**: `log_patterns` (loki / victorialogs) or `metric_trends` (prometheus); inferred from `--ds-type` when omitted — only pass it explicitly for ambiguous source types. + +**`targets` response shape** — rows are under `items[]` (not `data[]`); pipe `jq '.items[]'`, not `jq '.[]'`. `updated_at` means "last seen", not "online now". + +## Gotchas + +- **Datasource name is not guessable.** A `can not find datasource` 400 means the name is wrong — re-run `datasource-list` and copy the exact `Name`. Never invent variants. +- **`query-rows` has no time flags.** There is no `--time-start` / `--time-end` / `--operation`. Embed all time range and bucketing inside `--expr`. Passing those flags is a silent no-op or error. +- **`query-diagnose` time window via `--data`**, not flags. Pass `{"time_range":{"start":<unix>,"end":<unix>},...}`. Window wider than 6 hours is rejected server-side. Omitting `time_range` defaults to the last 15 minutes. +- **`rule_configs` and nested arrays require `--data`.** The queries, thresholds, enabled_times, and labels objects cannot be expressed as flat flags — pass them as inline JSON via `--data '{"rule_configs":{...}}'`. Typed scalar flags (`--name`, `--enabled`, `--cron-pattern`, `--ds-type`) override matching `--data` keys. +- **`tools-catalog` / `tools-invoke` `--target-locator` is required and not guessable.** If the user has not provided a host or IP, ask — do not invent one. Tool names in `invoke` must come from the `tools-catalog` response — never hallucinate them. +- **`rule-delete-batch` and `datasource-delete` are irreversible.** Confirm IDs with `rule-list-basic` / `datasource-info` first. +- **`rule-audit-detail --id` takes the audit record ID**, not the rule ID. Get audit record IDs from `rule-audits --id <rule-id>` first; passing the rule ID returns HTTP 400. + +## Worked example — inspect a firing rule then batch-disable it + +```bash +# 1. find triggered rules in folder 0 (all accessible) +fduty monit rule-list-basic --folder-id 0 --output-format toon +# look at triggered=true rows; note their ids + +# 2. get full config of one rule +fduty monit rule-info --id <rule-id> --output-format toon + +# 3. disable several rules at once without touching other fields +fduty monit rule-update-fields --ids <id1>,<id2> --fields enabled --enabled false +``` diff --git a/skills/flashduty/reference/role.md b/skills/flashduty/reference/role.md new file mode 100644 index 0000000..48f57f9 --- /dev/null +++ b/skills/flashduty/reference/role.md @@ -0,0 +1,135 @@ +# fduty role — command card + +Prereq: `SKILL.md` read. Read verbs are free; `delete` is **irreversible** — confirm the role-id first. `upsert --permission-ids` **replaces** the entire permission set on an existing role. + +## Route here when + +"角色 / 权限 / RBAC / 授权 / 角色成员 / 自定义角色" → **role**, NOT `member` (member = person identity/contact) or `team` (team = ownership group). Key IDs: **`role-id` (int)** from `role list`; **`member-id` (int)** from `member list`; **`permission-id` (int)** from `role permission-list`. + +## Intent → verb + +| want | verb | +|---|---| +| all roles | `list` | +| one role's detail | `info` | +| create a custom role | `upsert` (omit `--role-id` or set to 0) | +| update role name / description / permissions | `upsert --role-id` (replaces permission set) | +| disable a role temporarily | `disable` | +| re-enable a disabled role | `enable` | +| permanently remove a role | `delete` | +| assign a role to members | `member-grant` | +| remove a role from members | `member-revoke` | +| browse all available permissions | `permission-list` | +| browse raw permission factors | `permission-factor-list` | + +## Hot flow — create a role and assign it + +```bash +# 1. Browse available permissions with role membership annotation +fduty role permission-list --with-all --output-format toon + +# 2. Create the role with chosen permission IDs (note: ids from step 1) +fduty role upsert --role-name "Incident Responder" \ + --description "Read incidents and manage on-call." \ + --permission-ids 101,102,305 + +# 3. Find the new role ID +fduty role list --output-format toon + +# 4. Find member IDs to assign (member-id is POSITIONAL, role-id is a flag) +fduty member list --output-format toon + +# 5. Grant role to members (first member-id is positional; additional ids space-separated) +fduty role member-grant <member-id> --role-id <role-id> +# Grant to multiple: fduty role member-grant <id1> <id2> <id3> --role-id <role-id> +``` + +## Hot flow — audit and update an existing role + +```bash +# 1. Find the role +fduty role list --output-format toon + +# 2. Inspect current permissions (is_granted shows which are currently set) +fduty role permission-list --role-ids <role-id> --with-all --output-format toon + +# 3. Update permissions (--permission-ids is the FULL replacement set) +fduty role upsert --role-id <role-id> --role-name "Incident Responder" \ + --permission-ids 101,102,305,410 +``` + +<!-- GENERATED:role START · 由 fduty __dump-commands 同步 · 勿手改 fence 内 --> + +### delete <role-id> +Delete a role +- `<role-id>` (positional, required) int64 — Role ID. + +### disable <role-id> +Disable a role +- `<role-id>` (positional, required) int64 — Role ID. + +### enable <role-id> +Enable a role +- `<role-id>` (positional, required) int64 — Role ID. + +### info <role-id> +Get role detail +- `<role-id>` (positional, required) int64 — Role ID. + +### list +List roles +- `--asc` bool — Ascending sort order. +- `--orderby` string — Sort field. · enum: created_at | updated_at + +### member-grant <member-id> [<id2>...] +Grant role to members +- `<member-ids>` (positional, required) intSlice — Member IDs to grant/revoke the role. Max 100. +- `--role-id` int64 (required) — Role ID to grant or revoke. + +### member-revoke <member-id> [<id2>...] +Revoke role from members +- `<member-ids>` (positional, required) intSlice — Member IDs to grant/revoke the role. Max 100. +- `--role-id` int64 (required) — Role ID to grant or revoke. + +### permission-factor-list +List permission factors +- `--factor-types` stringSlice — Filter by factor type. · enum: api | button | visit | menu | url + +### permission-list +List permissions +- `--role-ids` intSlice — Filter to permissions granted to these roles. +- `--with-all` bool — If true, return all permissions with is_granted set to indicate which are granted. + +### upsert +Create or update a role +- `--description` string — Role description. (≤499 chars) +- `--permission-ids` intSlice — Permission IDs to grant. Replaces the existing set. +- `--role-id` int64 — Role ID. Omit or set to 0 to create. +- `--role-name` string (required) — Role display name. 1–39 characters. (1-39 chars) + +<!-- GENERATED:role END --> + +## Key concepts + +- **`permission-id` vs `permission-factor`**: `permission-list` returns coarse permission objects (id, name, class, scope, type=read|manage) — use these ids in `upsert --permission-ids`. `permission-factor-list` returns fine-grained factors (api/button/menu/url/visit strings like `template:read:info`) — useful for auditing what a permission covers, but not accepted by `upsert`. +- **`permission-list --with-all`**: returns every permission in the system with `is_granted=true/false` for the requested `--role-ids`. Omit `--role-ids` + `--with-all` to see the full catalog without annotation. +- **`permission-list` response shape**: rows are under `items[]` — pipe `jq '.items[]'`, NOT `.data.items[]`. +- **`permission-factor-list` response shape**: top-level array — pipe `jq '.[]'`, NOT `.items[]`. + +## Gotchas + +- **`delete`, `disable`, `enable`, `info` take `<role-id>` as a POSITIONAL arg**, not `--role-id`: `fduty role delete <role-id>`. The flag form is silently ignored. +- **`member-grant` / `member-revoke`: `<member-id>` is POSITIONAL (one or more space-separated); `--role-id` is a flag** — easy to flip. Example: `fduty role member-grant 123 456 --role-id 7`. +- **`upsert --permission-ids` replaces the full set** on update — omitting it clears all permissions. Always read `permission-list --role-ids <id> --with-all` first to get the current set before modifying. +- **`upsert` with no `--role-id` (or `--role-id 0`) creates; with `--role-id N` updates** — the verb doubles as create and update; check for an existing role with `list` to avoid accidental duplicates. +- **`delete` is irreversible** — members who had this role lose its permissions immediately. Prefer `disable` to park a role without destroying it. +- **Max 100 members per grant/revoke call** — batch if the list is longer. + +## Worked example + +```bash +# Revoke a role from a single member +fduty role member-revoke <member-id> --role-id <role-id> +# Revoke from multiple members in one call +fduty role member-revoke <id1> <id2> <id3> --role-id <role-id> +``` diff --git a/skills/flashduty/reference/route.md b/skills/flashduty/reference/route.md new file mode 100644 index 0000000..6cab941 --- /dev/null +++ b/skills/flashduty/reference/route.md @@ -0,0 +1,107 @@ +# fduty route — command card + +Prereq: `SKILL.md` read. `upsert` is a **full replacement** of the rule — it overwrites all cases; always read first and pass `--version` for optimistic concurrency. + +## Route here when + +"路由规则 / 告警路由 / 集成路由 / 分派到频道 / route rule / alert routing / integration routing / which channel gets alerts" → **route**. Key IDs needed: +- **`integration-id`** (int) — the integration the rule belongs to. Get a real one from **`fduty alert list`** (every alert carries `integration_id` + `integration_name`). It is **NOT** a `channel_id`; `channel list` does not surface integration IDs. If none is in scope, ask which integration rather than probing IDs. +- **`channel-id`** (int) — the target channel matched alerts route to; from `fduty channel list`. + +Do NOT use `route` for scheduling (→ `schedule`), templates (→ `template`), or channel management (→ `channel`). + +## Intent → verb + +| want | verb | +|---|---| +| read the rule for one integration | `info` | +| read rules for multiple integrations at once | `list` | +| create a rule / update an existing rule | `upsert` | + +## Hot flow — read then upsert a routing rule + +```bash +# 1. Read the current rule; note the returned `version` field for concurrency control. +fduty route info <integration-id> --output-format toon + +# 2. Upsert: route critical alerts to channel 101, all others to channel 102 (default). +# Pass the `version` from step 1 to prevent races. +fduty route upsert <integration-id> --version <version> \ + --data '{ + "cases": [ + { + "if": [{"key": "alert_severity", "oper": "IN", "vals": ["Critical"]}], + "channel_ids": [101], + "fallthrough": false + } + ], + "default": {"channel_ids": [102]} + }' + +# 3. Verify +fduty route info <integration-id> --output-format toon +``` + +```bash +# Bulk read — check rules for several integrations at once (positional ids). +fduty route list <integration-id-1> <integration-id-2> --output-format toon +``` + +<!-- GENERATED:route START · 由 fduty __dump-commands 同步 · 勿手改 fence 内 --> + +### info <integration-id> +Get routing rule detail +- `<integration-id>` (positional, required) int64 — Integration ID. Must be greater than 0. + +### list <integration-id> [<id2>...] +List routing rules +- `<integration-ids>` (positional, required) intSlice — Integration IDs to fetch routing rules for. + +### upsert <integration-id> +Upsert routing rule +- `<integration-id>` (positional, required) int64 — Integration the rule belongs to. +- `--version` int64 — Expected current version for optimistic concurrency control. Pass the value returned by the latest read. +- body-only (`--data`): cases (array<object>); default (object); sections (array<object>) + +<!-- GENERATED:route END --> + +## Key concepts + +- **Cases are evaluated top-to-bottom.** The first matching case wins unless `fallthrough: true`, which lets evaluation continue to the next case even after a match. +- **`routing_mode`** per case: + - `standard` (default / empty) — routes to the fixed `channel_ids` list. + - `name_mapping` — reads `name_mapping_label` from the alert event label map and resolves the channel by name dynamically. +- **`default` branch** — fires when no case matches (or matched cases yield no valid channels). At least one of `cases` or `default` must be provided on upsert. +- **`vals` match patterns**: literal string, wildcard (`*`, `?`), regex (`/pattern/`), CIDR (`cidr:10.0.0.0/8`), numeric comparison (`num:lt:100`). +- **Condition operator** (`oper`): `IN` (field value is in `vals`) or `NOTIN` (field value is not in `vals`). + +## Gotchas + +- **`upsert` is a full replacement.** It overwrites all `cases`, `default`, and `sections` for the integration. Always `info` first, reconstruct the full body, then `upsert` — never send a partial update. +- **`--version` is strongly recommended on upsert.** Omitting it skips optimistic concurrency and silently overwrites concurrent changes. Pass the `version` value from the latest `info` response. +- **`list` positional form.** `use` is `list <integration-id> [<id2>...]` — pass all integration IDs as positional arguments; the `--integration-ids` flag is also accepted but the positional form is simpler for a handful of IDs. +- **`info` returns `null` when no rule is configured** — not an error. An empty result from `list` means none of the requested integrations have a routing rule. +- **All case sub-fields via `--data`** — `cases`, `default`, `sections` are nested arrays/objects and cannot be expressed as flat flags; use `--data '{...}'` or `--data -` to pipe JSON. `--integration-id` and `--version` remain flat flags and override matching `--data` keys. +- **Sections are display-only.** `sections[].position` is an index into `cases[]` — off-by-one errors here cause a 400. They have no effect on matching logic. + +## Worked example + +```bash +# Read current rule for integration 5000, then add a name-mapping case for team-based routing. +fduty route info <integration-id> --output-format toon +# → note current version, e.g. 3 + +fduty route upsert <integration-id> --version 3 \ + --data '{ + "cases": [ + { + "if": [{"key": "labels.team", "oper": "IN", "vals": ["*"]}], + "routing_mode": "name_mapping", + "name_mapping_label": "team", + "channel_ids": [], + "fallthrough": false + } + ], + "default": {"channel_ids": [<fallback-channel-id>]} + }' +``` diff --git a/skills/flashduty/reference/rum.md b/skills/flashduty/reference/rum.md new file mode 100644 index 0000000..b9a1dff --- /dev/null +++ b/skills/flashduty/reference/rum.md @@ -0,0 +1,170 @@ +# fduty rum — command card + +Prereq: `SKILL.md` read. Read verbs are free. `application-create` / `application-update` / `application-delete` / `issue-update` mutate state — confirm before running. `application-delete` is **irreversible**. + +## Route here when + +"前端监控 / RUM / web应用 / iOS应用 / Android应用 / 前端报错 / JS报错 / 崩溃 / crash / error tracking / RUM application / real user monitoring" → **rum**, NOT `monit` (server-side rules), NOT `channel`, NOT `team`. You need two distinct IDs: **`application_id` (string)** from `application-list`, and **`issue_id` (string)** from `issue-list` — they are NOT interchangeable. + +## Intent → verb + +| want | verb | +|---|---| +| find a RUM app by name / list all | `application-list` | +| config detail for one app | `application-info` | +| config detail for several apps at once | `application-infos` | +| **create** a new RUM app | `application-create` | +| edit app name / privacy / tracing / alerting | `application-update` | +| delete an app | `application-delete` | +| list front-end error issues (with time window) | `issue-list` | +| full detail of one error issue | `issue-info` | +| mark issue resolved / label cause | `issue-update` | + +## Hot flow — triage front-end errors + +```bash +# 1. find the app (application_id is a string) +fduty rum application-list --query "checkout" --output-format toon + +# 2. list open errors in the last 7 days (both time flags required, MILLISECOND epoch) +NOW=$(date +%s000) +WEEK_AGO=$(( $(date +%s) - 604800 ))000 +fduty rum issue-list \ + --application-ids <application_id> \ + --start-time $WEEK_AGO --end-time $NOW \ + --statuses for_review --orderby error_count \ + --output-format toon + +# 3. get full detail of the top issue +fduty rum issue-info <issue_id> --output-format toon + +# 4. mark resolved after fix is confirmed +fduty rum issue-update <issue_id> --status resolved --suspected-cause code.exception +``` + +## Hot flow — create a new RUM application + +```bash +# team-id is POSITIONAL (use: "application-create <team-id>"); other fields are flags +fduty rum application-create <team_id> \ + --application-name "Checkout Web" \ + --type browser +# → returns application_id + client_token for SDK init +``` + +<!-- GENERATED:rum START · 由 fduty __dump-commands 同步 · 勿手改 fence 内 --> + +### application-create <team-id> +Create application +- `--application-name` string (required) — Application name. 1–40 characters. +- `--is-private` bool — Restrict access to team members only. +- `--no-geo` bool — Do not infer geographic location. +- `--no-ip` bool — Do not collect IP addresses. +- `<team-id>` (positional, required) int64 — Owning team ID. +- `--type` string (required) — Application type. · enum: browser | ios | android | react-native | flutter | kotlin-multiplatform | roku | unity +- body-only (`--data`): alerting (object); tracing (object) + +### application-delete <application-id> +Delete application +- `<application-id>` (positional, required) string — RUM application ID. + +### application-info <application-id> +Get application detail +- `<application-id>` (positional, required) string — RUM application ID. + +### application-infos <application-id> [<id2>...] +Batch get applications +- `<application-ids>` (positional, required) stringSlice — Up to 200 application IDs. + +### application-list +List applications +- `--asc` bool — Sort ascending if 'true'. +- `--is-my-team` bool — If 'true', return only applications belonging to the current user's teams. +- `--limit` int64 — Page size. Range: 1–100. Default: 20. +- `--orderby` string — Sort field. · enum: created_at | updated_at +- `--page` int64 — Page number (1-based). Default: 1. +- `--query` string — Search query to filter by application name. +- `--search-after-ctx` string +- `--team-id` int64 — Filter by team ID. + +### application-update <application-id> +Update application +- `<application-id>` (positional, required) string — Application ID to update. +- `--application-name` string — New application name. +- `--is-private` bool +- `--no-geo` bool +- `--no-ip` bool +- `--team-id` int64 +- `--type` string — enum: browser | ios | android | react-native | flutter | kotlin-multiplatform | roku | unity +- body-only (`--data`): alerting (object); tracing (object) + +### issue-info <issue-id> +Get issue detail +- `<issue-id>` (positional, required) string — Issue ID. + +### issue-list +List issues +- `--application-ids` stringSlice — Filter by application IDs. +- `--asc` bool +- `--by-intersection` bool +- `--dql` string — DQL query for advanced filtering. Cannot be used with 'sql'. +- `--end-time` int64 (required) — End of time range, millisecond timestamp. Maximum range: 183 days. +- `--error-required` bool — If 'true', only return issues with at least one associated error event. +- `--limit` int64 — Page size. Range: 1–100. Default: 20. +- `--orderby` string — enum: created_at | updated_at | session_count | error_count +- `--page` int64 — Page number. Default: 1. +- `--search-after-ctx` string +- `--sql` string — SQL-style query for advanced filtering. Cannot be used with 'dql'. +- `--start-time` int64 (required) — Start of time range, millisecond timestamp. +- `--statuses` stringSlice — Filter by statuses. · enum: for_review | reviewed | ignored | resolved +- `--suspected-causes` stringSlice — Filter by suspected causes. +- `--team-ids` intSlice — Filter by team IDs. + +### issue-update <issue-id> +Update issue +- `<issue-id>` (positional, required) string — Issue ID to update. +- `--status` string — New status. · enum: for_review | reviewed | ignored | resolved +- `--suspected-cause` string — Suspected cause. · enum: api.failed_request | network.error | code.exception | code.invalid_object_access | code.invalid_argument | unknown + +<!-- GENERATED:rum END --> + +## Key enums & state machine + +**`--type` (application-create / update) — closed enum:** +`browser` · `ios` · `android` · `react-native` · `flutter` · `kotlin-multiplatform` · `roku` · `unity` +No `miniprogram` / `wechat` — unsupported, do not guess a value. + +**Issue `--status` (issue-update / issue-list `--statuses`):** +`for_review` → `reviewed` → `ignored` | `resolved` +Regression: a `resolved` issue that recurs gets a `regression{}` object on its record. + +**Issue `--suspected-cause` / `--suspected-causes`:** +`api.failed_request` · `network.error` · `code.exception` · `code.invalid_object_access` · `code.invalid_argument` · `unknown` + +**Application `status` (read-only on list/info):** `enabled` · `disabled` · `deleted` + +## Gotchas + +- **`issue-list` time flags are MILLISECOND epoch, both required.** Use `--start-time` / `--end-time` (NOT `--since`/`--until`, NOT seconds). Max range 183 days. Example: `$(date +%s)000` converts a seconds epoch to ms. +- **`application_id` ≠ `issue_id`.** `issue_id` comes from `issue-list` — never pass an `application_id` where `issue_id` is expected. +- **`application-create` positional:** `use` is `application-create <team-id>` — pass the team id as the first bare arg, NOT `--team-id`. Same pattern: `application-delete`, `application-info`, `application-infos`, `application-update`, `issue-info`, `issue-update` all take their primary id as positional. `application-list` and `issue-list` are all-flags. +- **`alerting` and `tracing` are nested objects** — configure them via `--data '{"alerting":{...},"tracing":{...}}'`; there are no flat flags for their sub-fields. Scalar flags (`--application-name`, `--type`, …) override matching `--data` keys. +- **Application records hold CONFIG only** — no traffic volume, error-rate, or session-count fields. For trend data, query `monit` RUM series. +- **Empty `issue-list` is authoritative** — a filter returning no items means no matching issues, not a missing feature. Do not widen the query or guess. +- **No `rum sourcemap` subcommand** — don't attempt it; it does not exist. + +## Worked example + +```bash +# Find the worst unreviewed crash in the "payment" app this week, then mark it resolved +APP_ID=$(fduty rum application-list --query "payment" --output-format json | jq -r '.items[0].application_id') +NOW=$(date +%s000) +WEEK_AGO=$(( $(date +%s) - 604800 ))000 +fduty rum issue-list \ + --application-ids "$APP_ID" \ + --start-time $WEEK_AGO --end-time $NOW \ + --statuses for_review --orderby session_count \ + --limit 1 --output-format json | jq -r '.items[0].issue_id' +# → paste the returned issue_id below +fduty rum issue-update <issue_id> --status resolved --suspected-cause code.exception +``` diff --git a/skills/flashduty/reference/schedule.md b/skills/flashduty/reference/schedule.md new file mode 100644 index 0000000..2a4faa5 --- /dev/null +++ b/skills/flashduty/reference/schedule.md @@ -0,0 +1,191 @@ +# fduty schedule — command card + +Prereq: `SKILL.md` read. **Read verbs are free. `delete` is irreversible — confirm IDs before executing. `create` / `update` immediately change the live rotation — confirm scope first.** + +## Route here when + +"值班 / 排班 / 轮班 / 轮值 / 值班表 / 班表 / 谁在值班 / 当前值班 / 下一班 / 排班配置 / on-call / who is on call / schedule / rotation / shift / next on call / view or edit shifts" → **schedule**. This is the single home for everything 值班/on-call. The key ID you need is **`schedule_id` (int)** — get it from `schedule list`. + +**Who is on call right now** is computed from a schedule, not stored: `schedule info <id> --start now --end +1h` returns the current shift (and its `person_ids`). The legacy **`oncall who`** aggregates the live on-call across *all* schedules in one call, but the **`oncall` command group is being deprecated and folded into `schedule`** — use `oncall who` only as a convenience for the global snapshot; prefer `schedule info` for the durable path, and do not build new flows on `oncall *`. + +## Intent → verb + +| want | verb | +|---|---| +| who is on call right now (one schedule) | `info <schedule-id> --start now --end +1h` | +| who is on call right now (all schedules, legacy) | `oncall who` — *deprecated group; prefer per-schedule `info`* | +| list all schedules (with name search / team filter) | `list` | +| schedules I am assigned to | `self` | +| detail + computed shifts for a schedule | `info <schedule-id>` | +| batch-fetch multiple schedules (no shifts) | `infos <schedule-id> [<id2>…]` | +| preview a schedule definition before saving | `preview` | +| create a new schedule | `create` | +| update an existing schedule | `update` | +| delete one or more schedules | `delete <schedule-id> [<id2>…]` | + +## Hot flow — who is on call right now + +```bash +# 1. Find the schedule ID +fduty schedule list --query "SRE" --output-format toon + +# 2a. Current on-call for THIS schedule — a tiny now-window yields the live shift +fduty schedule info <schedule-id> --start now --end +1h --output-format toon + +# 2b. Or the live on-call across ALL schedules in one call (legacy oncall group, +# being deprecated into schedule — fine for a quick global snapshot) +fduty oncall who --output-format toon +``` + +Both return `person_ids` (integers), not names. Resolve names by joining `member list` client-side — its rows live under `.items[]` keyed by `member_id` (+ `member_name`): + +```bash +members=$(fduty member list --json) +fduty schedule info <schedule-id> --start now --end +1h --json | jq --argjson m "$members" ' + [.. | .person_ids? // empty | .[]] | unique | map(. as $id | ($m.items[]? | select(.member_id==$id) | .member_name))' +# If the join is fiddly, just report person_ids — do NOT loop refining jq. +``` + +## Hot flow — inspect a schedule's upcoming shifts + +```bash +fduty schedule list --query "SRE" --output-format toon # find the schedule_id +fduty schedule info <schedule-id> --start now --end +7d --output-format toon # next 7 days +``` + +## Hot flow — create a schedule via --data + +```bash +# Layers are deeply nested; pass the full body via --data; scalar flags override matching keys. +fduty schedule create --schedule-name "SRE Weekly" --team-id <team-id> \ + --data '{ + "layers": [{ + "layer_name": "Week rotation", + "mode": 0, + "rotation_unit": "week", + "rotation_value": 1, + "rotation_duration": 604800, + "handoff_time": 0, + "enable_time": 1700000000, + "expire_time": 0, + "weight": 1, + "hidden": 0, + "fair_rotation": false, + "restrict_mode": 0, + "restrict_start": 0, + "restrict_end": 0, + "restrict_periods": [], + "mask_continuous_enabled": false, + "day_mask": {"repeat": [1,2,3,4,5]}, + "groups": [{ + "group_name": "Group A", + "name": "group_a", + "start": 1700000000, + "end": 1700604800, + "members": [{"person_ids": [<person-id>], "role_id": 0}] + }], + "name": "layer_1", + "schedule_id": 0, + "account_id": 0, + "create_at": 0, + "create_by": 0, + "update_at": 0, + "update_by": 0 + }] + }' +# → returns schedule_id; verify with: fduty schedule info <schedule-id> --start now --end +7d +``` + +<!-- GENERATED:schedule START · 由 fduty __dump-commands 同步 · 勿手改 fence 内 --> + +### create +Create schedule +- `--description` string — Schedule description. Max 500 characters. (≤500 chars) +- `--end` string — Preview window end (Unix seconds, 10 digits). Required for /schedule/preview. Max 45 days after start. Accepts a duration (7d, 24h), '+7d' for the future, 'now', a date, or Unix seconds. +- `--name` string — Legacy schedule name field. Used when schedule_name is empty. (≤40 chars) +- `--schedule-id` int64 — Schedule ID. Required on update. +- `--schedule-name` string — Schedule display name. Max 40 characters. (≤40 chars) +- `--start` string — Preview window start (Unix seconds, 10 digits). Required for /schedule/preview. Accepts a duration (7d, 24h), '+7d' for the future, 'now', a date, or Unix seconds. +- `--team-id` int64 — Owning team ID. +- body-only (`--data`): layers (array<object>); notify (object) + +### delete <schedule-id> [<id2>...] +Delete schedules +- `<schedule-ids>` (positional, required) intSlice — Schedule IDs to operate on. + +### info <schedule-id> +Get schedule info +- `--end` string (required) — Preview end timestamp (Unix seconds, 10 digits). Accepts a duration (7d, 24h), '+7d' for the future, 'now', a date, or Unix seconds. +- `<schedule-id>` (positional, required) int64 — Schedule ID. +- `--start` string (required) — Preview start timestamp (Unix seconds, 10 digits). Accepts a duration (7d, 24h), '+7d' for the future, 'now', a date, or Unix seconds. + +### infos <schedule-id> [<id2>...] +Batch get schedules +- `<schedule-ids>` (positional, required) intSlice — Schedule ID list. + +### list +List schedules +- `--end` string — Window end timestamp (Unix seconds). Accepts a duration (7d, 24h), '+7d' for the future, 'now', a date, or Unix seconds. +- `--is-my-manage` bool — Only return schedules created by the current user within their teams. +- `--is-my-team` bool — Only return schedules whose owning team the current user belongs to. +- `--limit` int64 — Page size. Default 10, max 100. (max 100) +- `--page` int64 — Page number (1-indexed). +- `--query` string — Search keyword matched against schedule names. +- `--search-after-ctx` string +- `--start` string — When set together with end, computed layer schedules are returned. Span must be less than 45 days. Accepts a duration (7d, 24h), '+7d' for the future, 'now', a date, or Unix seconds. +- `--team-ids` intSlice — Filter by team IDs. + +### preview +Preview schedule +- `--description` string — Schedule description. Max 500 characters. (≤500 chars) +- `--end` string — Preview window end (Unix seconds, 10 digits). Required for /schedule/preview. Max 45 days after start. Accepts a duration (7d, 24h), '+7d' for the future, 'now', a date, or Unix seconds. +- `--name` string — Legacy schedule name field. Used when schedule_name is empty. (≤40 chars) +- `--schedule-id` int64 — Schedule ID. Required on update. +- `--schedule-name` string — Schedule display name. Max 40 characters. (≤40 chars) +- `--start` string — Preview window start (Unix seconds, 10 digits). Required for /schedule/preview. Accepts a duration (7d, 24h), '+7d' for the future, 'now', a date, or Unix seconds. +- `--team-id` int64 — Owning team ID. +- body-only (`--data`): layers (array<object>); notify (object) + +### self +List my schedules +- `--end` string — Window end (Unix seconds, 10 digits). Must be within 30 days of start. Accepts a duration (7d, 24h), '+7d' for the future, 'now', a date, or Unix seconds. +- `--start` string — Window start (Unix seconds, 10 digits). Accepts a duration (7d, 24h), '+7d' for the future, 'now', a date, or Unix seconds. + +### update +Update schedule +- `--description` string — Schedule description. Max 500 characters. (≤500 chars) +- `--end` string — Preview window end (Unix seconds, 10 digits). Required for /schedule/preview. Max 45 days after start. Accepts a duration (7d, 24h), '+7d' for the future, 'now', a date, or Unix seconds. +- `--name` string — Legacy schedule name field. Used when schedule_name is empty. (≤40 chars) +- `--schedule-id` int64 — Schedule ID. Required on update. +- `--schedule-name` string — Schedule display name. Max 40 characters. (≤40 chars) +- `--start` string — Preview window start (Unix seconds, 10 digits). Required for /schedule/preview. Accepts a duration (7d, 24h), '+7d' for the future, 'now', a date, or Unix seconds. +- `--team-id` int64 — Owning team ID. +- body-only (`--data`): layers (array<object>); notify (object) + +<!-- GENERATED:schedule END --> + +## Key concepts + +- **Window for shifts:** `info` (and `list` when `--start`+`--end` are both set) computes actual rotation slots in the requested window. Max span = 45 days. `info` requires both `--start` and `--end`. +- **`self` window:** returns schedules the current user is assigned to in the given window. Max span = 30 days. +- **Layer `mode`:** `0` = common rotation, `1` = override layer (higher `weight` wins). +- **`rotation_unit`:** `hour | day | week | month`. +- **`restrict_mode`:** `0` = none, `1` = restrict by day, `2` = restrict by week. +- **`expire_time: 0`** means the layer never expires (open-ended). + +## Gotchas + +- **`info`, `infos`, `delete` take positional `<schedule-id>` — NOT `--schedule-id`.** Pass the ID bare: `fduty schedule info 123 --start now --end +7d`. Using `--schedule-id` on these verbs fails. +- **`create` / `update` / `preview` take all inputs as flags** (no positional). `update` requires `--schedule-id` as a flag to identify the target. +- **`layers` is body-only.** There is no per-layer typed flag — you must pass the entire `layers` array via `--data`. Scalar top-level flags (`--schedule-name`, `--team-id`) override matching `--data` keys. +- **`list` without `--start`/`--end` omits computed shifts** — only schedule metadata is returned. Pass both flags (≤45 day span) to get rotation slots in the list response. +- **`delete` is irreversible** — takes one or more `<schedule-id>` positionals; double-check IDs before executing. +- **`list` default page size is 10** — pass `--limit 100` when scanning all schedules. +- **Legacy `oncall who`:** `--team` does **not** filter server-side (any value returns the full list — scope by `--query <schedule_name>` instead), and an empty result is authoritative ("no one on call in that window") — report it, don't widen or fabricate a responder. The `oncall` group will be removed; don't depend on it. + +## Worked example + +```bash +# Find my own on-call windows for the next two weeks +fduty schedule self --start now --end +14d --output-format toon +``` diff --git a/skills/flashduty/reference/status-page.md b/skills/flashduty/reference/status-page.md new file mode 100644 index 0000000..149fea0 --- /dev/null +++ b/skills/flashduty/reference/status-page.md @@ -0,0 +1,190 @@ +# fduty status-page — command card + +Prereq: `SKILL.md` read. **SKILL.md + this card = full competence on status pages — no `--help` needed.** Read verbs are free; any `change-*` create/update with `--notify-subscribers` pages subscribers immediately — confirm scope first. + +## Route here when + +"公开事件 / 公开时间线 / 状态页 / 维护窗口 / 订阅者 / 状态页迁移" → **status-page**, NOT `incident` (incident = the internal alert graph; status-page = the public-facing page). You need two IDs, both from `status-page list`: **`page_id` (int)** and **`component_id` (ULID string)**. + +## Intent → verb + +| want | verb | +|---|---| +| pages + their component IDs | `list` | +| what's live on a page now | `change-active-list` | +| every event incl. closed | `change-list` | +| one event's detail | `change-info` | +| **open** an incident/maintenance | `change-create` (save the returned `change_id`) | +| post a progress update | `change-timeline-create` | +| edit event title/responders | `change-update` | +| delete an event | `change-delete` | +| fix/remove a timeline entry | `change-timeline-update` / `change-timeline-delete` | +| subscribers | `subscriber-list` / `subscriber-import` / `subscriber-export` | +| migrate from Atlassian Statuspage | `migrate-structure` → (verify) → `migrate-email-subscribers`; poll `migration-status`; `migration-cancel` | + +## Hot flow — publish & resolve an incident + +```bash +# page-id is POSITIONAL here (see fence headings: `### change-active-list <page-id>`); change-id stays a flag. +# 1. find the page + impacted component IDs +fduty status-page list --output-format toon +# 2. confirm nothing already open (empty = nothing open; if one exists, reuse its change_id) +fduty status-page change-active-list <page_id> --type incident +# 3. open it (page-id positional; scalars as flags; the required `updates` array via --data); save change_id +fduty status-page change-create <page_id> --type incident \ + --title "API latency elevated" --status investigating --description "Investigating elevated latency." \ + --data '{"updates":[{"status":"investigating","description":"Team is investigating.","component_changes":[{"component_id":"<component_id>","status":"degraded"}]}]}' +# 4. post progress: investigating → identified → monitoring (change-timeline-create takes BOTH ids as flags) +fduty status-page change-timeline-create --page-id <page_id> --change-id <change_id> \ + --status identified --description "Root cause identified." +# 5. resolve — every referenced component MUST go back to operational +fduty status-page change-timeline-create --page-id <page_id> --change-id <change_id> \ + --status resolved --description "Recovered." \ + --data '{"component_changes":[{"component_id":"<component_id>","status":"operational"}]}' +# 6. confirm closed +fduty status-page change-active-list <page_id> --type incident +``` + +<!-- GENERATED:status-page START · 由 fduty __dump-commands 同步 · 勿手改 fence 内 --> + +### change-active-list <page-id> +List active status page events +- `<page-id>` (positional, required) int64 — Status page ID. +- `--type` string (required) — Event type filter. Required. Returns only in-progress (non-terminal) events — 'investigating'/'identified'/'monitoring' for 'incident', 'scheduled'/'ongoing' for 'maintenance'. · enum: incident | maintenance + +### change-create <page-id> +Create status page event +- `--auto-update-by-schedule` bool — Maintenance only: automatically advance the status based on the scheduled window. +- `--close-at-seconds` string — Scheduled close time for retrospective events. Must be greater than 'start_at_seconds'. Accepts a duration (7d, 24h), '+7d' for the future, 'now', a date, or Unix seconds. +- `--description` string — Event description (Markdown). Required by the validator. +- `--is-retrospective` bool — Mark this event as a retrospective (historical) one. +- `--linked-changes` stringSlice — Linked change IDs (related incidents, deployments, etc.). +- `--notify-subscribers` bool — Notify subscribers about this event and all its updates. +- `<page-id>` (positional, required) int64 — Status page ID. +- `--responders` intSlice — Member IDs responsible for this event. +- `--start-at-seconds` string — Event start time in unix seconds. Defaults to now when omitted. Accepts a duration (7d, 24h), '+7d' for the future, 'now', a date, or Unix seconds. +- `--status` string (required) — Initial event status. 'investigating'/'identified'/'monitoring'/'resolved' apply to incidents; 'scheduled'/'ongoing'/'completed' apply to maintenances. · enum: investigating | identified | monitoring | resolved | scheduled | ongoing | completed +- `--title` string (required) — Event title, up to 255 characters. (≤255 chars) +- `--type` string (required) — Event type. · enum: incident | maintenance +- body-only (`--data`): updates (array<object>) (required) + +### change-delete +Delete status page event +- `--change-id` int64 (required) — Target event ID. +- `--page-id` int64 (required) — Status page ID. + +### change-info +Get status page event detail +- `--change-id` int64 (required) — Event (change) ID. +- `--page-id` int64 (required) — Status page ID. + +### change-list <page-id> +List status page events +- `--end-at-seconds` string — Filter events started at or before this unix timestamp (seconds). Accepts a duration (7d, 24h), '+7d' for the future, 'now', a date, or Unix seconds. +- `<page-id>` (positional, required) int64 — Status page ID. +- `--start-at-seconds` string — Filter events started at or after this unix timestamp (seconds). Accepts a duration (7d, 24h), '+7d' for the future, 'now', a date, or Unix seconds. +- `--status` string (required) — Event status filter. Required. Must be a status valid for the given 'type' (e.g. 'investigating'/'identified'/'monitoring'/'resolved' for incidents; 'scheduled'/'ongoing'/'completed' for maintenances). · enum: investigating | identified | monitoring | resolved | scheduled | ongoing | completed +- `--type` string (required) — Event type filter. Required. · enum: incident | maintenance + +### change-timeline-create +Create event timeline entry +- `--at-seconds` string — Update timestamp in unix seconds. Defaults to now when omitted. Accepts a duration (7d, 24h), '+7d' for the future, 'now', a date, or Unix seconds. +- `--change-id` int64 (required) — Target event ID. +- `--description` string — Update description (Markdown). Required. +- `--page-id` int64 (required) — Status page ID. +- `--status` string (required) — New event status. Must match the event type. When the status transitions to 'resolved' or 'completed', all referenced components must become 'operational'. · enum: investigating | identified | monitoring | resolved | scheduled | ongoing | completed +- body-only (`--data`): component_changes (array<object>) + +### change-timeline-delete +Delete event timeline entry +- `--change-id` int64 (required) — Parent event ID. +- `--page-id` int64 (required) — Status page ID. +- `--update-id` string (required) — Timeline update ID to delete. + +### change-timeline-update +Update event timeline entry +- `--at-seconds` string — New update timestamp in unix seconds. Accepts a duration (7d, 24h), '+7d' for the future, 'now', a date, or Unix seconds. +- `--change-id` int64 (required) — Parent event ID. +- `--description` string — New update description (Markdown). +- `--page-id` int64 (required) — Status page ID. +- `--update-id` string (required) — Target timeline update ID. + +### change-update +Update status page event +- `--change-id` int64 (required) — Target event ID. +- `--linked-changes` stringSlice — Linked event IDs. Pass the full replacement list. +- `--page-id` int64 (required) — Status page ID. +- `--responders` intSlice — Member IDs responsible for this event. Pass the full replacement list. +- `--title` string — New event title, up to 255 characters. Omit to keep the existing value. (≤255 chars) + +### list +List status pages + +### migrate-email-subscribers +Migrate email subscribers +- `--api-key` string (required) — Atlassian Statuspage API key with access to the source page. +- `--source-page-id` string (required) — Atlassian Statuspage source page ID. +- `--target-page-id` int64 (required) — Flashduty target status page ID that will receive the imported subscribers. + +### migrate-structure <source-page-id> +Migrate status page structure +- `--api-key` string (required) — Atlassian Statuspage API key with access to the source page. +- `<source-page-id>` (positional, required) string — Atlassian Statuspage source page ID. +- `--url-name` string — Target URL name for the migrated status page. When omitted, the source page's URL name is reused. + +### migration-cancel <job-id> +Cancel status page migration +- `<job-id>` (positional, required) string — Migration job ID. + +### migration-status <job-id> +Get migration status +- `<job-id>` (positional, required) string — Migration job ID returned by 'migrate-structure' or 'migrate-email-subscribers'. + +### subscriber-export <page-id> +Export subscribers +- `--component-ids` stringSlice — Optional component IDs to filter subscribers by. +- `<page-id>` (positional, required) int64 — Status page ID. + +### subscriber-import <page-id> +Import subscribers +- `--method` string (required) — Subscription method. 'email' is only valid for public pages; 'im' is only valid for internal pages. · enum: email | im +- `<page-id>` (positional, required) int64 — Target status page ID. +- body-only (`--data`): subscribers (array<object>) + +### subscriber-list <page-id> +List status page subscribers +- `--component-ids` string — Comma-separated component IDs to filter subscribers by. +- `--limit` int64 — Page size (1-100). (1-100) +- `--page` int64 — Page number (1-based). (min 1) +- `<page-id>` (positional, required) int64 — Status page ID. + +<!-- GENERATED:status-page END --> + +## Status values (load-bearing — a wrong value 400s) + +- **Component status** (`component_changes[].status`), by event type: + - incident → `operational` · `degraded` · `partial_outage` · `full_outage` + - maintenance → `operational` · `under_maintenance` +- **Event status** (`--status` on create / timeline): + - incident → `investigating` → `identified` → `monitoring` → `resolved` + - maintenance → `scheduled` → `ongoing` → `completed` +- Transitioning to `resolved` / `completed` ⇒ **all** referenced components must be `operational` (the server rejects the update otherwise). + +## Gotchas + +- **`page_id` is POSITIONAL on some verbs, a `--page-id` flag on others — follow the fence heading.** Where the heading reads `### <verb> <page-id>` (change-create, change-active-list, change-list, subscriber-export/import/list, migrate-structure), pass the id as the first bare argument: `change-create <page_id> …`. Passing `--page-id` there fails with `missing page_id`. Verbs that need *both* `page-id` and `change-id` (change-info, change-delete, change-timeline-*, change-update) take both as flags. The fence heading is authoritative. +- **`page_id` (int) ≠ `change_id` (int)** — page is the status page; change is one incident/maintenance within it. Don't cross them. +- **`updates` is required on `change-create`** and goes via `--data` (it nests `component_changes[]`, which can't be flat flags). `--description` is also required by the server even though it's not flagged required. Typed scalar flags (`--title`, `--status`…) override matching `--data` keys. +- **`--notify-subscribers` emails + pushes every subscriber immediately** — set it only once scope is confirmed. +- **Migration is async and TWO separate jobs.** `migrate-structure` (structure + history, no emails) is deliberately separate from `migrate-email-subscribers` — verify the imported content before any subscriber verification emails go out. Poll `migration-status` until `completed` / `failed` / `cancelled`. +- Empty `change-active-list` is the authoritative "nothing open" — don't widen the query. + +## Worked example — open an incident + +```bash +fduty status-page change-create <page_id> --type incident \ + --title "Web Console Degraded" --status investigating \ + --description "Investigating degraded performance on the web console." \ + --data '{"updates":[{"status":"investigating","description":"Team is investigating.","component_changes":[{"component_id":"<component_id>","status":"degraded"}]}]}' +# → returns change_id; feed it to change-timeline-create for follow-up updates. +``` diff --git a/skills/flashduty/reference/team.md b/skills/flashduty/reference/team.md new file mode 100644 index 0000000..9f023ce --- /dev/null +++ b/skills/flashduty/reference/team.md @@ -0,0 +1,137 @@ +# fduty team — command card + +Prereq: `SKILL.md` read. **SKILL.md + this card = full competence on teams — no `--help` needed.** Read verbs are free; `delete` is **irreversible** (always `--force` in scripted contexts); `update --person-ids` **replaces** the entire member list — dangerous without a prior `get`. + +## Route here when + +"团队 / 成员管理 / 创建团队 / 查找团队 / HR同步 / team ID / person ID归属" → **team**. Key IDs: +- **`team_id` (int64)** — from `fduty team list` or `team get --name`. +- **`person_id` (int64)** — look up via `fduty member list --query <name-or-email>` (member card, not here). + +NOT this card: on-call schedules (oncall), incidents (incident), channels (channel). + +## Intent → verb + +| want | verb | +|---|---| +| browse all teams + their member IDs | `list` | +| one team's full detail (members, ref-id, status) | `get` | +| same but via generated API path | `info` | +| batch resolve several team IDs at once | `infos` | +| create a brand-new team | `create` | +| rename / change description / swap members | `update` | +| create-or-update idempotently (HR sync) | `upsert` | +| permanently remove a team | `delete` | + +## Hot flow — create a team and verify membership + +```bash +# 1. Check name doesn't already exist +fduty team list --name "SRE Platform" --output-format toon +# 2. Create with initial members (person IDs from member list) +fduty team create --name "SRE Platform" --description "Site Reliability" \ + --person-ids 1001,1002,1003 +# 3. Verify — note the returned team_id +fduty team get --name "SRE Platform" --output-format toon +``` + +## Hot flow — update members safely + +```bash +# ALWAYS read current members before --person-ids (it REPLACES, not appends) +fduty team get --id <team-id> --output-format toon +# Then pass the FULL desired set (existing + new) +fduty team update --id <team-id> --person-ids 1001,1002,1003,1004 +``` + +<!-- GENERATED:team START · 由 fduty __dump-commands 同步 · 勿手改 fence 内 --> + +### create +Create a new team +- `--description` string +- `--emails` string +- `--name` string +- `--person-ids` string +- `--ref-id` string + +### delete +Delete a team +- `--force` bool +- `--id` int64 +- `--name` string +- `--ref-id` string + +### get [<id>] +Get team detail +- `--id` int64 +- `--name` string +- `--ref-id` string + +### info +Get team detail +- `--ref-id` string — External reference ID. +- `--team-id` int64 — Team ID. +- `--team-name` string — Team name. + +### infos <team-id> [<id2>...] +Batch get teams +- `<team-ids>` (positional, required) intSlice — List of team IDs to look up. Max 100. + +### list +List teams +- `--asc` bool +- `--limit` int +- `--name` string +- `--orderby` string +- `--page` int +- `--person-id` int64 + +### update +Update an existing team +- `--description` string +- `--emails` string +- `--id` int64 +- `--name` string +- `--person-ids` string +- `--ref-id` string + +### upsert +Create or update a team +- `--country-code` string — Default country code applied to any 'phones' entries that are not in E.164 format. +- `--description` string — Free-form description. (≤500 chars) +- `--emails` stringSlice — Email addresses to invite as members. +- `--person-ids` intSlice — Member IDs to set as team members. Replaces the existing member list. +- `--phones` stringSlice — Phone numbers to invite as members. +- `--ref-id` string — External reference ID for HR system integration. +- `--reset-if-name-exist` bool — If true and a team with the same name already exists, reset its membership to the provided person_ids. +- `--team-id` int64 — Team ID. Omit or set to 0 to create a new team. +- `--team-name` string (required) — Team display name. 1–39 characters. (1-39 chars) + +<!-- GENERATED:team END --> + +## Key concepts + +- **`status`** on `team list` rows: `enabled` | `disabled`. A disabled team still exists but is excluded from most operational contexts. +- **`infos <team-id> [<id2>...]`** — takes team IDs as **positional args** (space-separated), not `--team-ids`. The response wraps under `items[]` (pipe `jq '.items[]'` with `--json`), NOT `.data.items[]`. +- **`upsert` lookup key** — matched by `--team-id` (if non-zero) or by `--team-name` (name collision). Pass `--reset-if-name-exist` to overwrite membership on a name match; omit it to leave the existing members untouched. + +## Gotchas + +- **`--person-ids` on `update` / `create` / `upsert` is a full replacement**, not an append. Read the current list with `get --id` first, or you will silently remove members. +- **`get` vs `info`** — both fetch a single team; `get` accepts `--id`/`--name`/`--ref-id`; `get [<id>]` also allows the ID as a positional arg. `info` uses `--team-id`/`--team-name`/`--ref-id` flags only. Prefer `get` for interactive lookup. +- **`delete` is irreversible** and requires confirmation unless `--force` is set. Always confirm the correct `--id` (not `--name`) in scripts to avoid name-collision accidents. +- **`infos` positional trap** — the `use` is `infos <team-id> [<id2>...]`; IDs are space-separated positional args, not a flag. `fduty team infos 101 102 103`, not `--team-ids 101,102,103`. +- **`list` JSON shape** — `--json` returns a top-level array; pipe `jq '.[]'`, NOT `.items[]`. +- **`upsert` requires `--team-name`** even when updating by `--team-id`; omitting it returns a validation error. + +## Worked example + +```bash +# Idempotent HR-sync upsert: create "Payments" or reset its membership if it already exists +fduty team upsert --team-name "Payments" \ + --description "Payments engineering" \ + --person-ids 2001,2002,2003 \ + --reset-if-name-exist \ + --output-format toon +# → returns team_id; store it for oncall schedule / channel filtering +``` diff --git a/skills/flashduty/reference/template.md b/skills/flashduty/reference/template.md new file mode 100644 index 0000000..0e6d554 --- /dev/null +++ b/skills/flashduty/reference/template.md @@ -0,0 +1,173 @@ +# fduty template — command card + +Prereq: `SKILL.md` read. Read verbs are free. `create`, `update`, `delete` mutate account-wide notification templates — confirm before running. `delete <template-id>` is **irreversible**. + +## Route here when + +"通知模板 / 消息模板 / 告警通知格式 / 飞书模板 / Slack 模板 / 邮件模板 / template CRUD / custom template / preview notification / validate template" → **template**. NOT `channel` (channel = escalation policy routing; template = the rendered text/card body). The key ID is **`template_id`** (string), returned by `list` or `create`. + +## Intent → verb + +| want | verb | +|---|---| +| list all custom templates | `list` | +| detail of one template | `info <template-id>` | +| create a new template | `create` | +| update an existing template | `update <template-id>` | +| delete a template | `delete <template-id>` | +| see the built-in preset for a channel | `get-preset` | +| validate + preview a template file | `validate` | +| render inline template content against incident data | `preview` | +| browse available Go template variables | `variables` | +| browse Sprig / custom template functions | `functions` | + +## Hot flow — customize and deploy a channel template + +```bash +# 1. Fetch the built-in preset as a starting point (channel enum below) +fduty template get-preset --channel feishu --output-format toon + +# 2. Save the source, edit in an editor, then validate from file +fduty template validate --channel feishu --file ./feishu.tpl + +# 3. Preview with a real incident for realistic rendering (no file — inline content) +fduty template preview \ + --type feishu \ + --content "$(cat ./feishu.tpl)" \ + --incident-id <incident-id> + +# 4. Create the template (template-name unique per account) +fduty template create \ + --template-name "Critical-Feishu-v2" \ + --feishu "$(cat ./feishu.tpl)" \ + --team-id 0 + +# 5. Verify +fduty template info <template-id> --output-format toon +``` + +## Hot flow — update one channel on an existing template + +```bash +# template-id is POSITIONAL; --template-name is required even on update +fduty template update <template-id> \ + --template-name "Critical-Feishu-v2" \ + --feishu "$(cat ./feishu-v3.tpl)" +``` + +<!-- GENERATED:template START · 由 fduty __dump-commands 同步 · 勿手改 fence 内 --> + +### create +Create a template +- `--description` string — Free-form description. Up to 500 characters. (≤500 chars) +- `--dingtalk` string — DingTalk robot message template source. +- `--dingtalk-app` string — DingTalk app message template source. +- `--email` string — Email body template source (Go 'html/template' syntax). +- `--feishu` string — Feishu robot message template source. +- `--feishu-app` string — Feishu app message template source. +- `--slack` string — Slack robot message template source. +- `--slack-app` string — Slack app message template source. +- `--sms` string — SMS template source (Go 'text/template' syntax). +- `--team-id` int64 — Team scope. 0 for account-wide. +- `--teams-app` string — Microsoft Teams app message template source. +- `--telegram` string — Telegram bot message template source. +- `--template-name` string (required) — Template name, unique per account. 1–39 characters. (1-39 chars) +- `--voice` string — Voice call script template source. +- `--wecom` string — WeCom robot message template source. +- `--wecom-app` string — WeCom app message template source. +- `--zoom` string — Zoom bot message template source. + +### delete <template-id> +Delete a template +- `<template-id>` (positional, required) string — Target template ID. Pass '000000000000000000000001' to address the built-in preset. + +### functions +List available template functions +- `--type` string + +### get-preset +Get the preset template for a channel +- `--channel` string + +### info <template-id> +Get template detail +- `<template-id>` (positional, required) string — Target template ID. Pass '000000000000000000000001' to address the built-in preset. + +### list +List templates +- `--asc` bool — Ascending sort order. +- `--creator-id` int64 — Filter by creator member ID. +- `--is-my-team` bool — When true, only return templates scoped to teams the caller belongs to. +- `--limit` int64 — Page size. Capped at 100. (1-100) +- `--orderby` string — Sort field. · enum: created_at | updated_at +- `--page` int64 — Page number, starting at 1. (min 1) +- `--query` string — Regex or substring match on template_name. +- `--search-after-ctx` string +- `--team-ids` intSlice — Filter by specific team IDs. + +### preview +Preview template +- `--content` string (required) — Template content to render. +- `--incident-id` string — Incident ID whose data is used to render the template; mock data is used when omitted. A MongoDB ObjectID hex string. +- `--type` string (required) — Template channel type that selects the rendering engine. + +### update <template-id> +Update a template +- `--description` string — Free-form description. Up to 500 characters. (≤500 chars) +- `--dingtalk` string — DingTalk robot message template source. +- `--dingtalk-app` string — DingTalk app message template source. +- `--email` string — Email body template source (Go 'html/template' syntax). +- `--feishu` string — Feishu robot message template source. +- `--feishu-app` string — Feishu app message template source. +- `--slack` string — Slack robot message template source. +- `--slack-app` string — Slack app message template source. +- `--sms` string — SMS template source (Go 'text/template' syntax). +- `--team-id` int64 — Team scope. 0 for account-wide. +- `--teams-app` string — Microsoft Teams app message template source. +- `--telegram` string — Telegram bot message template source. +- `<template-id>` (positional, required) string — Target template ID. +- `--template-name` string (required) — Template name. 1–39 characters. (1-39 chars) +- `--voice` string — Voice call script template source. +- `--wecom` string — WeCom robot message template source. +- `--wecom-app` string — WeCom app message template source. +- `--zoom` string — Zoom bot message template source. + +### validate +Validate and preview a template +- `--channel` string +- `--file` string +- `--incident` string + +### variables +List available template variables +- `--category` string + +<!-- GENERATED:template END --> + +## Channel identifiers (load-bearing — wrong value 400s) + +`--channel` / `--type` values (both flags use the same enum): + +`dingtalk` · `dingtalk_app` · `email` · `feishu` · `feishu_app` · `slack` · `slack_app` · `sms` · `teams_app` · `telegram` · `wecom` · `wecom_app` · `zoom` + +Note: `create` / `update` flags use **hyphenated** names (`--dingtalk-app`, `--feishu-app`, `--slack-app`, `--wecom-app`, `--teams-app`). `get-preset` / `validate` / `preview` use **underscored enum values** (`dingtalk_app`, `feishu_app` …). + +## Gotchas + +- **`info`, `update`, `delete` take `<template-id>` as a positional first argument** — pass it bare, not as `--template-id`. `create`, `list`, `preview`, `validate`, `get-preset`, `functions`, `variables` take all inputs as flags. +- **`update` replaces every channel field you pass — omitted channel flags are left unchanged** (server behavior: only supplied fields overwrite). Always pass `--template-name` even if the name is unchanged — it is required on update. +- **`delete` is permanent.** The built-in preset (`template_id = 000000000000000000000001`) can be addressed by that sentinel ID in `info` and `delete` — don't delete it. +- **`validate` reads from a local `--file`; `preview` takes inline `--content`.** They are complementary: `validate` gives size-vs-limit diagnostics; `preview` renders against real or mock incident data. +- **`email` uses `html/template` syntax; `sms` and `voice` use `text/template`** — auto-escaping rules differ. Don't mix them. +- **`functions --type` values**: `custom`, `sprig`, or `all`. **`variables --category` values**: `core`, `time`, `people`, `alerts`, `labels`, `context`, `notification`, `post_incident`. + +## Worked example + +```bash +# Browse variables available in templates, then validate a draft +fduty template variables --category core --output-format toon +fduty template validate --channel slack --file ./slack-draft.tpl --incident <incident-id> +# On success, create it +fduty template create --template-name "Ops-Slack-Alert" --slack "$(cat ./slack-draft.tpl)" +# → returns template_id; assign it to a channel in the escalation policy UI. +``` diff --git a/skills/flashduty/scripts/incident-summary.sh b/skills/flashduty/scripts/incident-summary.sh new file mode 100644 index 0000000..8288100 --- /dev/null +++ b/skills/flashduty/scripts/incident-summary.sh @@ -0,0 +1,31 @@ +#!/usr/bin/env bash +# incident-summary.sh <incident-id> — one-shot, read-only fault-analysis fetch. +# +# A full incident summary needs six different commands (detail does NOT bundle them). +# This runs all of them and prints the results in one block, so the summary is written +# from real output with nothing to guess or fabricate. Read-only; safe to run anytime. +# +# usage: bash incident-summary.sh <incident-id> +# +# To tie post-mortems to this incident specifically, re-run the last section with the +# channel_id from "incident detail": fduty incident post-mortem-list --channel-ids <id> +# +# Note: errexit (-e) is intentionally NOT set — every section must run even if one +# command fails, so the summary stays as complete as possible. Each command's own +# errors are captured inline via the `2>&1` in run(). +set -uo pipefail + +ID="${1:-}" +if [ -z "$ID" ]; then + echo "usage: bash incident-summary.sh <incident-id>" >&2 + exit 2 +fi + +run() { echo "===== fduty $* ====="; fduty "$@" --output-format toon 2>&1; echo; } + +run incident detail "$ID" # ① 详情 + AI summary + alert counts + channel_id +run incident alerts "$ID" # ② contributing alerts +run incident timeline "$ID" # ④ timeline +run incident similar "$ID" --limit 5 # ⑤ similar past incidents (channel-backed) +run incident post-mortem-list --limit 10 # ⑥ recent post-mortems (add --channel-ids to scope) +run change list --since 24h # ③ correlated changes (shared labels + time)