diff --git a/.claude/commands/code-quality.md b/.claude/commands/code-quality.md new file mode 100644 index 00000000..423113bd --- /dev/null +++ b/.claude/commands/code-quality.md @@ -0,0 +1,90 @@ +--- +name: code-quality +description: Quality pipeline — compile, checkstyle, build, tests, coverage check. Pass a class name to target a single test class. +paths: + - src/**/*.java + - pom.xml +exclude: + - src/main/java/com/skyflow/generated/** +context: fork +--- + +Run the Skyflow Java SDK quality pipeline. + +Use `$ARGUMENTS` to target a specific test class (e.g. `BearerTokenTests`). If empty, run the full suite. + +> Baseline failures are documented in the Known Pre-existing Test Failures table. +> Do not investigate them unless specifically asked. Only report failures **beyond** that baseline. + +## Coverage Requirements + +Follow the Tests coding rules (100% instruction + branch coverage). Public interface packages: +- `src/main/java/com/skyflow/vault/` (controllers, data, tokens, connection, audit, bin, detect) +- `src/main/java/com/skyflow/config/` +- `src/main/java/com/skyflow/serviceaccount/` + +Flag any gap as a blocker — **NEEDS FIXES** if coverage is below 100% on Claude-written or public interface code. + +--- + +## Pipeline + +### Step 1 — Compile +```bash +mvn compile -q 2>&1 | tail -20 +``` +Expected: no output (clean compile). Report any errors. + +### Step 2 — Checkstyle +```bash +mvn checkstyle:check -q 2>&1 | tail -20 +``` +Note: `failsOnError=false` in pom.xml means the build will not fail even if violations exist — check the output for `[WARN]` checkstyle lines. Violations are excluded from `generated/` by pom config. + +### Step 3 — Build +```bash +mvn package -DskipTests -q 2>&1 | tail -20 +``` +Expected: BUILD SUCCESS. + +### Step 4 — Tests +If `$ARGUMENTS` is set: +```bash +mvn test -Dtest=$ARGUMENTS -q 2>&1 | tail -40 +``` +Otherwise: +```bash +mvn test -q 2>&1 | tail -40 +``` +Report: tests run, failures, errors. Flag any pre-existing failures separately from new ones. + +### Step 5 — Coverage analysis +For every public interface class and every class touched by Claude in this session: +- Check for a corresponding test file under `src/test/` +- Check that every public method has at least one positive and one negative test case +- Check that every branch (if/else, switch, try/catch) is covered + +List all gaps. Any gap on Claude-written or public interface code is a **blocker**. + +### Step 6 — Edge case identification +For any class below 100% coverage, identify missing scenarios: +- Null / empty inputs +- Invalid types / wrong enum values +- Concurrent / reuse scenarios +- Error paths (API rejection, network failure) + +Write concrete JUnit 4 test method stubs (not full implementations) for each gap. + +### Step 7 — Report + +``` +| Step | Status | Notes | +|------------------|-----------|------------------------------| +| Compile | ✅ / ❌ | ... | +| Checkstyle | ✅ / ❌ | ... | +| Build | ✅ / ❌ | ... | +| Tests | ✅ / ❌ | N passed, M failed | +| Coverage (100%) | ✅ / ❌ | list classes with gaps | +``` + +Conclude with **READY TO MERGE** or **NEEDS FIXES** and a prioritised fix list. diff --git a/.claude/commands/code-review.md b/.claude/commands/code-review.md new file mode 100644 index 00000000..e61a00b4 --- /dev/null +++ b/.claude/commands/code-review.md @@ -0,0 +1,107 @@ +--- +name: code-review +description: Full code review — SDK patterns, naming, test coverage, then runs /code-smell and /code-security. +paths: + - src/main/java/**/*.java + - src/test/java/**/*.java +exclude: + - src/main/java/com/skyflow/generated/** +context: fork +--- + +You are a senior engineer performing a thorough code review on the Skyflow Java SDK. + +## Pre-requisite + +If `GITHUB_ACTIONS` environment variable is set, skip this step (CI runs compile/test in a separate job). + +Otherwise, confirm `/code-quality` has been run and passed (compile, tests, 100% coverage). If it has not been run, run it now before proceeding with the review. + +## Scope + +Use `$ARGUMENTS` to determine scope: +- `full review` — scan all files under `src/main/java/com/skyflow/` recursively (exclude `generated/`) +- A file or directory path — review only that path +- Empty / default — review files changed on current PR/branch vs base: + ```bash + # REVIEW_BASE_SHA, when set (CI incremental review), is the last commit already + # reviewed by the bot — diff only lines added since then. Otherwise fall back to + # the PR base branch. + BASE="${REVIEW_BASE_SHA:-${GITHUB_BASE_REF:+origin/$GITHUB_BASE_REF}}" + BASE="${BASE:-main}" + git diff "$BASE"...HEAD --name-only | grep '\.java$' | grep -v 'generated' + ``` + **If `GITHUB_ACTIONS` is set:** work from the diff output directly (changed lines only) instead of reading full files: + ```bash + git diff "$BASE"...HEAD -- '*.java' | grep -v 'src/main/java/com/skyflow/generated/' + ``` + Review only added lines (`+` prefix) from the diff. Do not comment on unchanged context lines or pre-existing code. + +--- + +## Step 1 — SDK Pattern Review + +Review all files in scope against the rules defined in `CLAUDE.md` (loaded automatically from the project root). Check every rule category: naming conventions, error handling, request/response patterns, string literals, tests, and code quality. + +Group findings by file and produce a table: + +``` +### path/to/File.java + +| Severity | Category | Line | Finding | +|----------|----------|------|---------| +| Critical | Security | 42 | SkyflowException swallowed in catch block | +| High | Correctness | 87 | skyflow_id not normalised to skyflowId | +| Low | Pattern | 103 | Magic string "records" — use Constants | +``` + +Every finding has **two independent axes** — don't conflate them: + +**Severity** — *how serious* (one scale shared by all three steps): + +| Severity | Meaning | Blocks merge? | +|---|---|---| +| **Critical** | Data loss, security breach, silent failure | Yes | +| **High** | Wrong behaviour / bug / guaranteed runtime failure | Yes | +| **Medium** | Likely problem, risky or unhandled input, missing safeguard | Yes | +| **Low** | Minor maintainability, naming, style, code smell | No — advisory | +| **Info** | Note / FYI | No — advisory | + +**Category** — *what kind*: `Correctness` (a bug), `Edge case`, `Security`, `Pattern`, `Naming`, `Tests`, `Smell`. + +A logic bug is **Severity `High`/`Critical` + Category `Correctness`** — never severity "Bug". A magic string is **Severity `Low` + Category `Pattern`** — never severity "Quality". Keep level in the Severity column and kind in the Category column. + +--- + +## Step 2 — Code Smell Analysis + +Read the file `.claude/commands/code-smell.md` and follow all of its instructions for the same files in scope. Produce its full output (per-file smell table + smell summary + recommendation). + +**If `GITHUB_ACTIONS` is set:** apply that command's **PR / CI mode** — report only smells introduced by added (`+`) lines; do not report whole-file metrics (Long class/method, large parameter list) or any pre-existing debt. Do **not** print code-smell's standalone tables, summary, or recommendation — collect its findings into the single consolidated report defined in **Output (PR / CI mode)** below. + +--- + +## Step 3 — Security Audit + +Read the file `.claude/commands/code-security.md` and follow all of its instructions for the same files in scope. Produce its full output (per-finding blocks + summary table + overall risk rating). + +**If `GITHUB_ACTIONS` is set:** apply that command's **PR / CI mode** — report only issues introduced by added (`+`) lines; do not raise pre-existing vulnerabilities or whole-project checks the diff does not touch. **The security audit's scope additionally includes a changed `pom.xml`** (its own diff command covers it): when this PR modifies `pom.xml`, audit the added/changed dependency lines for known CVEs even though Steps 1–2 stay `.java`-only. Do **not** print code-security's standalone per-finding blocks, summary, or risk rating — collect its findings into the single consolidated report defined in **Output (PR / CI mode)** below. + +--- + +## Final Verdict (local mode only) + +> Skip this section when `GITHUB_ACTIONS` is set — use **Output (PR / CI mode)** instead. + +After all three steps, close with: +1. A tech-debt summary table grouped by category (SDK Patterns / Error Handling / Naming / Tests / Smells / Security) +2. A verdict: `APPROVE` / `APPROVE WITH FIXES` / `REQUEST CHANGES` +3. Remind: run `/code-quality` again after any fixes before merging. + +--- + +## Output (PR / CI mode) + +**If `GITHUB_ACTIONS` is set:** read `.claude/includes/code-review-ci.md` and produce **exactly** the consolidated code-review-comment report it specifies — nothing else. Do **not** emit the local-mode **Final Verdict** above. (This file is fetched only in CI; local runs never read it.) + +Otherwise (local mode), this section does not apply — use the **Final Verdict (local mode)** section above. diff --git a/.claude/commands/code-security.md b/.claude/commands/code-security.md new file mode 100644 index 00000000..0d5300a0 --- /dev/null +++ b/.claude/commands/code-security.md @@ -0,0 +1,84 @@ +--- +name: code-security +description: Security audit — credential exposure, input validation, path traversal, HTTP security, token lifecycle, dependency CVEs. +paths: + - src/main/java/com/skyflow/**/*.java + - pom.xml +exclude: + - src/main/java/com/skyflow/generated/** +context: fork +--- + +You are a security engineer auditing the Skyflow Java SDK for vulnerabilities. + +## Audit Scope + +Use `$ARGUMENTS` to determine target files. If none provided, run: +```bash +# CI: GITHUB_BASE_REF is set (e.g. "main") — use origin/ prefix +# Local: unset — use main directly +BASE="${GITHUB_BASE_REF:+origin/$GITHUB_BASE_REF}" +BASE="${BASE:-main}" +git diff "$BASE"...HEAD --name-only | grep -E '\.java$|(^|/)pom\.xml$' | grep -v 'generated' +``` + +**If `GITHUB_ACTIONS` is set (PR review mode):** audit only the code this PR changed. Work from the diff — **note the pathspec includes `pom.xml` so dependency changes are never invisible to the audit:** +```bash +git diff "$BASE"...HEAD -- '*.java' 'pom.xml' | grep -v 'src/main/java/com/skyflow/generated/' +``` +Report a finding **only if an added line (`+` prefix) introduces or directly exposes it.** Do not raise pre-existing vulnerabilities in unchanged code, and skip whole-project checks the diff does not touch. **The diff above includes `pom.xml`; whenever a changed `` appears in it, you MUST run §6 against those lines — do not treat the audit as `.java`-only.** If the added lines introduce no security issues, state that explicitly rather than listing pre-existing risks. (Local / non-CI runs and explicit file arguments keep full-file auditing.) + +## Security Checks + +Where a finding maps to an **OWASP Top 10** category (e.g. `A01 — Broken Access Control`, `A06 — Vulnerable and Outdated Components`), tag it with that category in the output — only where it genuinely applies; don't force a mapping. + +### 1. Credential and token exposure (Critical) +- Bearer tokens, API keys, and private keys must never appear in logs, error messages, exception messages, or `toString()` output +- `Credentials` fields (`path`, `token`, `apiKey`, `credentialsString`) must not be serialised to logs +- JWT claims must not be logged + +### 2. Input validation (High) +- All string inputs from callers must be null/empty checked before use +- File paths passed to `new File(path)` must not allow path traversal (`../`) +- JSON strings parsed with `JsonParser` must be wrapped in try/catch for `JsonSyntaxException` + +### 3. Credentials file handling (High) +- Credentials files must only be read from paths provided by the caller — no environment variable path injection without sanitisation +- `FileReader` must be in a try-with-resources or explicitly closed + +### 4. HTTP security (Medium) +- All API calls must go over HTTPS — verify `Utils.getBaseURL` enforces this +- Authorization headers must not be logged at any log level +- HTTP timeouts must be configured + +### 5. Error information leakage (Medium) +- `SkyflowException` messages must not include raw server response bodies that could contain PII +- Stack traces must not be surfaced to callers — wrap in `SkyflowException` + +### 6. Dependency vulnerabilities (Critical) +- Flag any dependency with a known CVE (check `pom.xml` versions). Report at **Critical** severity so it surfaces in the serious-findings table and gets an inline comment on the changed `pom.xml` line. + +### 7. Authentication lifecycle (Medium) +- Bearer token caching must check expiry before reuse +- Token refresh must be thread-safe (`synchronized` or equivalent) + +## Account for every check + +Before writing the report, walk checks **1–7 in order** against the changed lines and account for each one — do not report only the issues that first stand out. The Medium-severity categories (§4 HTTP, §5 error leakage, §7 auth lifecycle) and the dependency check (§6) are missed far more often than credential exposure (§1); give them equal scrutiny. + +## Output Format + +For each finding: + +``` +### path/to/File.java : line N + +**Severity:** Critical / High / Medium / Low / Info +**Risk:** What an attacker could do +**Trigger:** Input or code path that triggers the vulnerability +**Fix:** Concrete remediation with code example +**CWE:** CWE-NNN +**OWASP:** Relevant OWASP Top 10 category, e.g. `A06 — Vulnerable and Outdated Components` — include only when the finding clearly maps to one; omit otherwise. +``` + +End with a summary table and overall risk rating. diff --git a/.claude/commands/code-smell.md b/.claude/commands/code-smell.md new file mode 100644 index 00000000..d405436f --- /dev/null +++ b/.claude/commands/code-smell.md @@ -0,0 +1,167 @@ +--- +name: code-smell +description: Structural smell analysis + spell check — long methods, dead code, misplaced validation, deep nesting, magic numbers. Does not check patterns or security. +paths: + - src/main/java/**/*.java + - src/test/java/**/*.java + - .claude/**/*.md + - docs/**/*.md +exclude: + - src/main/java/com/skyflow/generated/** +context: fork +--- + +You are a senior engineer performing a code smell analysis on the Skyflow Java SDK. + +## Scope + +Use `$ARGUMENTS` to determine scope: +- A file or directory path — analyse only that path +- Empty / default — analyse files changed on current branch vs base: + ```bash + BASE="${GITHUB_BASE_REF:+origin/$GITHUB_BASE_REF}" + BASE="${BASE:-main}" + git diff "$BASE"...HEAD --name-only | grep '\.java$' | grep -v 'generated' + ``` + **If `GITHUB_ACTIONS` is set (PR review mode):** work from the diff, not whole files, and apply the **PR / CI mode** rules below: + ```bash + git diff "$BASE"...HEAD -- '*.java' | grep -v 'src/main/java/com/skyflow/generated/' + ``` + +--- + +## PR / CI mode (changed lines only) + +When `GITHUB_ACTIONS` is set, the analysis must reflect **only what this PR changed** — pre-existing debt must not be re-litigated on every PR: + +- Report a smell **only if an added line (`+` prefix) introduces it.** Never flag smells in unchanged/context lines or pre-existing code. +- **Do not report whole-file metrics** — *Long class, Long method, Large parameter list, pre-existing dead code, raw HashMap chains* — unless the diff itself *creates* the violation (e.g. the PR adds a brand-new method over 40 lines, or pushes a class past 300 lines for the first time). A small diff to a large legacy file must **not** trigger "Long class" or a pre-existing "Long method". +- Duplicated-code, deep-nesting, and magic-number smells: flag only when they appear in **added** lines. +- If the added lines introduce no smells, state **"No new smells introduced by this PR."** Do not enumerate pre-existing debt. +- This restriction applies to PR review only. Local / non-CI runs and explicit path arguments keep full-file analysis. + +--- + +## Spell check + +Before analysing smells, run cspell on the files in scope: + +```bash +npx cspell --no-progress "src/**/*.java" ".claude/**/*.md" "CLAUDE.md" "docs/**/*.md" 2>&1 | grep "Unknown word" +``` + +Report any spelling violations at **Smell** severity in the per-file table. The word list is in `.cspell.json` — add legitimate project-specific terms there rather than fixing them as typos. + +--- + +## What Are Code Smells + +Code smells are structural signals — they do not necessarily mean the code is broken, but they indicate areas of technical debt, reduced readability, or future maintenance risk. All findings are reported at **Smell** severity and do not block merge unless they indicate a design violation. + +--- + +## Smell Catalogue + +### Method & Class Size + +**Long method** — any method over 40 lines. +Signal: the method is doing too much. Candidate for decomposition into named private helpers. + +**Long class** — any class over 300 lines. +Signal: the class may be taking on too many responsibilities. Check if it can be split by concern. + +**Large parameter list** — more than 4 parameters on a method. +Signal: consider a config/options object or a builder to group related parameters. + +--- + +### Responsibility Violations + +**Business logic in Request/Response classes** +Request and Response classes are data holders — they carry data, nothing more. Flag any conditional logic, field transformation, or computation beyond null-safe getters. +Example of a violation: a Response class that renames map keys in `toString()` instead of letting the controller do it. + +**toString() with business logic** +`toString()` should only serialise state for debugging. Logic like field renaming, manual JSON construction, conditional field injection, or iteration belongs in the controller or formatter methods. + +**Validation outside `Validations.java`** +Any `if (x == null) throw new SkyflowException(...)` outside `src/main/java/com/skyflow/utils/validations/` is misplaced validation. All request validation must live in `Validations.validateXxxRequest()`. + +--- + +### Control Flow + +**Deep nesting** — more than 3 levels of `if` / `for` / `try` nesting. +Signal: extract inner blocks to named private methods. Deep nesting hides the happy path. + +**Long if-else chains** — more than 4 branches on the same condition. +Signal: consider a `Map`, `switch`, or polymorphism. + +**Null checks scattered** +Multiple consecutive null guards that could be replaced with `Optional` or an early return guard clause. + +--- + +### Data + +**Magic numbers** +Literal integers or sizes (e.g. `25`, `3600`, `100`) without a named constant. Use `Constants`. + +**Raw HashMap chains** +`HashMap` passed through more than 2 method boundaries without a typed wrapper or explanatory comment. Flag for awareness — do not require an immediate fix. + +**Temporary field** +A class field that is only set in certain code paths and is `null` the rest of the time. Should be a local variable or method parameter instead. + +--- + +### Dead Code + +**Unused private methods** — private methods with no callers. + +**Unused imports** — any `import` not referenced in the file. + +**Unreachable code** — code after `return` / `throw` in the same branch. + +**Commented-out code** — blocks of commented code without explanation. Remove entirely or add a `// TODO: [ticket]` with context. + +--- + +### Comments + +**Explains what, not why** +A comment that restates what the code does (`// get the vault ID`) adds no value. Only flag comments that explain the *what* without explaining *why*. + +**Stale comment** +A comment that contradicts the current code — e.g. references a removed parameter, an old method name, or a behaviour that has changed. + +--- + +## Output Format + +Group findings by file: + +``` +### path/to/File.java + +| Smell | Line | Detail | +|---------------------------|------|-----------------------------------------------------------| +| Long method | 42 | processInsertResponse() is 67 lines — decompose | +| Business logic in Response| 88 | toString() renames skyflow_id — move to formatter | +| Magic number | 103 | Literal 25 — extract to Constants.MAX_QUERY_RECORDS | +| Stale comment | 210 | References removed tokenizedData field | +| Dead code | 315 | Private method buildHeaders() has no callers | +``` + +End with a **Smell Summary** table: + +``` +| Category | Count | Files affected | +|-----------------------|-------|------------------------| +| Long methods | 2 | VaultController.java | +| Business logic in DTO | 1 | QueryResponse.java | +| Magic numbers | 3 | Validations.java | +| Dead code | 2 | Utils.java | +``` + +Close with a recommendation: **CLEAN** / **MINOR DEBT** / **SIGNIFICANT DEBT** and a one-sentence summary. diff --git a/.claude/commands/git-commit.md b/.claude/commands/git-commit.md new file mode 100644 index 00000000..429ca746 --- /dev/null +++ b/.claude/commands/git-commit.md @@ -0,0 +1,60 @@ +--- +name: git-commit +description: Stage check + Jira-aware commit — extracts ticket ID from branch name and validates against pr.yml commit-message check. +context: fork +--- + +Create a git commit for staged changes on the current branch. + +Use `$ARGUMENTS` as the commit message description. If empty, ask the user for a description before proceeding. + +## Step 1 — Extract ticket ID from branch name + +```bash +git rev-parse --abbrev-ref HEAD +``` + +Extract the Jira ticket ID using the pattern `[A-Z]{1,5}-[0-9]+`: +- `devesh/SK-1234-fix-foo` → `SK-1234` +- `karthik/GV-770-ext-auth-json-error` → `GV-770` +- `username/SDK-2814-some-fix` → `SDK-2814` + +If no ticket ID is found, **stop** and ask the user to provide one before continuing. + +## Step 2 — Check what is staged + +```bash +git status --short +git diff --cached --stat +``` + +If nothing is staged, list the unstaged files and ask the user which files to stage. Do not run `git add .` — ask for explicit paths (`.env`, `credentials.json`, and `generated/` must never be staged). + +## Step 3 — Assemble and validate the commit message + +Build the message as: +``` + +``` + +If the user provided a Conventional Commits prefix (`feat`, `fix`, `chore`, `docs`, `refactor`, `test`), prepend it: +``` +feat: SK-1234 add bulk insert support +fix: GV-770 handle null bearer token on refresh +``` + +Validate against the `pr.yml` enforced pattern: `(\[?[A-Z]{1,5}-[1-9][0-9]*)|(\[AUTOMATED\])|(Merge)|(Release).+$` +- Must contain a Jira ID — a bare description without a ticket ID will fail CI. +- If validation fails, report the exact requirement and stop. + +## Step 4 — Quality check + +Before committing, confirm `/code-quality` has been run and passed (compile, tests, 100% coverage on changed code). If it has not been run, ask the user whether to run it now before proceeding. + +## Step 5 — Commit + +```bash +git commit -m "" +``` + +Report the resulting commit SHA and the commit message first line. diff --git a/.claude/commands/sdk-sample.md b/.claude/commands/sdk-sample.md new file mode 100644 index 00000000..97740142 --- /dev/null +++ b/.claude/commands/sdk-sample.md @@ -0,0 +1,74 @@ +--- +name: sdk-sample +description: Generate a Skyflow Java SDK sample file for a vault feature or service account operation. Compile-verified after creation. +context: fork +paths: + - samples/**/*.java + - samples/pom.xml + - src/main/java/com/skyflow/**/*.java +exclude: + - src/main/java/com/skyflow/generated/** +--- + +Create a Skyflow Java SDK sample file demonstrating: $ARGUMENTS + +## File placement + +| Feature type | Package | Directory | +|---|---|---| +| Vault ops (insert/get/update/delete/query/tokenize) | `com.example.vault` | `samples/src/main/java/com/example/vault/` | +| Service account auth | `com.example.serviceaccount` | `samples/src/main/java/com/example/serviceaccount/` | +| Connection | `com.example.connection` | `samples/src/main/java/com/example/connection/` | +| Detect | `com.example.detect` | `samples/src/main/java/com/example/detect/` | +| Audit event operations | `com.example.audit` | `samples/src/main/java/com/example/audit/` | +| BIN lookup | `com.example.bin` | `samples/src/main/java/com/example/bin/` | + +File name: `Example.java` + +## Structure (follow this order) + +1. Package declaration +2. Imports — only from `com.skyflow.*`, `java.*`; never from `com.skyflow.generated.*` +3. Public class with `main(String[] args) throws SkyflowException` +4. Credentials setup — choose based on feature: + - **Vault ops:** `credentials.setApiKey("")` or `credentials.setCredentialsString("")` + - **Service account:** `credentials.setPath("credentials.json")` (path to the service account JSON file) +5. `VaultConfig` with `setVaultId`, `setClusterId`, `setEnv(Env.PROD)`, `setCredentials(credentials)` +6. Build the Skyflow client: + ```java + Skyflow skyflowClient = Skyflow.builder() + .setLogLevel(LogLevel.DEBUG) + .addVaultConfig(vaultConfig) + .build(); + ``` +7. Request object via `*Request.builder()` — options go directly on the builder (no separate Options class): + ```java + // Example: InsertRequest with tokenMode + InsertRequest request = InsertRequest.builder() + .table("...") + .values(records) + .tokenMode(TokenMode.ENABLE) + .build(); + ``` +8. Call the vault method inside a try/catch for `SkyflowException`: + ```java + InsertResponse response = skyflowClient.vault().insert(request); + System.out.println(response); + ``` + +## Rules + +- Vault IDs / cluster IDs use placeholders: `""`, `""` +- Credential values use placeholders: `""`, `""` +- Credentials file path: `"credentials.json"` (relative — no absolute paths) +- Always catch `SkyflowException` and print `e.getMessage()` +- No separate `*Options` classes — they don't exist in this SDK; use request builder methods +- Keep under 80 lines + +## After creating the file + +```bash +cd samples && mvn compile -q 2>&1 | tail -20 +``` + +Report the file path and any compile errors. diff --git a/.claude/hooks/checkstyle-on-edit.py b/.claude/hooks/checkstyle-on-edit.py new file mode 100644 index 00000000..689b2b74 --- /dev/null +++ b/.claude/hooks/checkstyle-on-edit.py @@ -0,0 +1,34 @@ +# checkstyle-on-edit.py — PostToolUse hook for the Skyflow Java SDK. +# +# Registered in .claude/settings.json under hooks.PostToolUse with matcher "Edit|Write". +# Fires automatically after every Edit or Write tool call on any file. +# +# What it does: +# - Ignores non-.java files immediately (no Maven overhead). +# - For files under src/main/java/, runs checkstyle scoped to that single file +# via -Dcheckstyle.includes= to keep it fast. +# - For files outside src/main/java/ (e.g. tests, samples), runs full-module checkstyle. +# - Prints the last 20 lines of any violations so Claude sees them in-turn +# without needing a separate /quality run. +# +# Config: checkstyle.xml — generated/ is excluded by pom.xml config so Fern +# auto-generated code is never flagged. +import sys, json, subprocess, os + +d = json.load(sys.stdin) +f = d.get('tool_input', {}).get('file_path', d.get('file_path', '')) +if not f or not f.endswith('.java'): + sys.exit(0) + +root = '/home/devb/SDK/skyflow-java' +marker = 'src/main/java/' +if marker in f: + rel = f.split(marker, 1)[1] + args = ['mvn', 'checkstyle:check', '-q', '-Dcheckstyle.includes=' + rel] +else: + args = ['mvn', 'checkstyle:check', '-q'] + +r = subprocess.run(args, capture_output=True, text=True, cwd=root) +out = (r.stdout + r.stderr).strip() +if out: + print('\n'.join(out.splitlines()[-20:])) diff --git a/.claude/includes/code-review-ci.md b/.claude/includes/code-review-ci.md new file mode 100644 index 00000000..ffe0f397 --- /dev/null +++ b/.claude/includes/code-review-ci.md @@ -0,0 +1,63 @@ +# Code Review — PR / CI Output Spec + +> Loaded by `/code-review` (`.claude/commands/code-review.md`) **only when `GITHUB_ACTIONS` is set**. +> This is a plain include, not a slash command — it has no frontmatter and is not in `commands/`, +> so it cannot be invoked directly. It defines the consolidated review-comment format the CI +> workflow extracts and posts. + +When `GITHUB_ACTIONS` is set, your **entire output is the body of a code-review comment** — not a chat reply. The **first line must be the findings-count summary** (item 1 below). Never emit any preamble, planning, narration, acknowledgement, "Now I have…/Let me…" line, restatement of the diff, or per-step headers. + +**Do NOT emit any verdict** — no `APPROVE`, `APPROVE WITH FIXES`, `REQUEST CHANGES`, or any pass/fail statement. CI reviews are advisory; merge gating is handled by GitHub branch protection, not by this comment. + +Merge every finding from Steps 1–3 into one de-duplicated report (same issue flagged by multiple steps → keep once at the highest severity). + +**If the changed lines introduce no findings at all, output exactly `` and nothing else** — the workflow posts no comment in that case. Otherwise emit **exactly** the following, and nothing else. + +**Rendering rules (GitHub markdown):** emit each part below as a **top-level block at the left margin**, separated by a blank line. The numbers are labels for you — do **not** reproduce them as a markdown numbered list, and do **not** indent the tables or `
` (tables/`
` nested inside list items do not render on GitHub). Every table needs a blank line before and after it, and `
` needs a blank line after the `` tag. + +**Severity (a separate axis from Category; used for the tables, not for any verdict):** +- **Blocking-worth** (serious): `Critical`, `High`, `Medium`. +- **Advisory** (minor): `Low`, `Info`. + +1. **Findings summary (first line)** — a single line counting findings by type on the changed lines, in this exact style (omit any type whose count is 0; if every count is 0 you should have emitted the empty marker instead): + + `**Findings on the changed lines:** 2 bugs, 2 security, 3 smells, 1 other` + + Count by Category: + - **bugs** = `Correctness` + `Edge case` + - **security** = `Security` + - **smells** = `Smell` + - **other** = `Pattern` + `Naming` + `Tests` + + Do not restate individual findings on this line — the tables below list them. + +2. **Serious-findings table** — `Critical` / `High` / `Medium` only (never `Low` / `Info`). The `Finding` cell is a **terse identifier (≤ ~12 words, a noun phrase)** — no mechanism, no "because…", no fix; the full explanation lives in the inline comment, so never repeat it here. If the **same issue appears at multiple locations**, emit **one row** with the locations comma-separated in `File:Line` (the inline block still gets one entry per location) — do not create near-duplicate rows. Omit the table and write "No serious findings on the changed lines." if there are none. + ``` + | File:Line | Severity · Category | Finding | + |-----------|---------------------|---------| + | HttpUtility.java:88 | High · Correctness | getMessage() returns null on the no-body error path | + | VaultClient.java:942, ConnectionClient.java:92 | Medium · Pattern | misleading EmptyCredentials message in generic catch | + ``` + +3. **Advisory section (collapsed)** — every advisory finding, one crisp line each; `N` must equal the row count. + ``` +
Advisory (Low / Info) — N items + + | File:Line | Severity · Category | Finding | + |-----------|---------------------|---------| +
+ ``` + +4. **Inline-findings block** — the **very last thing** in your output, wrapped in an **HTML comment** so it never renders even if parsing fails. Its body is a JSON array of **only serious (Critical/High/Medium) findings whose line is an added (`+`) line** (never advisory items, never lines outside the diff). Put the **full explanation in `comment`** (this is what renders inline on the code); keep the summary table above terse. + + **Critical:** `comment` must be **plain text** — you may use single backticks for short identifiers, but **never triple backticks / code fences / `\`\`\`` anywhere inside the JSON** (they corrupt extraction). Describe the fix in prose, not a code block. + + Use exactly these sentinels (emit `[]` for the array if there are no inline findings): + + ``` + + ``` + + The workflow extracts this block, strips it from the visible summary, and renders items 1–3 as the review body. diff --git a/.claude/settings.json b/.claude/settings.json new file mode 100644 index 00000000..1d00baed --- /dev/null +++ b/.claude/settings.json @@ -0,0 +1,30 @@ +{ + "hooks": { + "PostToolUse": [ + { + "matcher": "Edit|Write", + "hooks": [ + { + "type": "command", + "command": "python3 .claude/hooks/checkstyle-on-edit.py" + } + ] + } + ] + }, + "permissions": { + "allow": [ + "Bash(mvn *)", + "Bash(java *)", + "Bash(python3 *)", + "Bash(git *)", + "Bash(find *)", + "Bash(grep *)", + "Bash(npx cspell *)" + ], + "deny": [ + "Edit(src/main/java/com/skyflow/generated/**)", + "Write(src/main/java/com/skyflow/generated/**)" + ] + } +} diff --git a/.claude/skills/requesting-code-review/SKILL.md b/.claude/skills/requesting-code-review/SKILL.md new file mode 100644 index 00000000..61e0e05d --- /dev/null +++ b/.claude/skills/requesting-code-review/SKILL.md @@ -0,0 +1,45 @@ +--- +name: requesting-code-review +description: Use when completing tasks, implementing major features, or before merging to verify work meets requirements +paths: + - src/main/java/**/*.java + - src/test/java/**/*.java +exclude: + - src/main/java/com/skyflow/generated/** +context: fork +--- + +# Requesting Code Review + +**Core principle:** Review early, review often. Review after each task — catch issues before they compound. + +## When to Request Review + +**Mandatory:** +- After each task in subagent-driven development +- After completing a major feature +- Before merge to main + +**Optional but valuable:** +- When stuck (fresh perspective) +- Before refactoring (baseline check) +- After fixing a complex bug + +## How to Request + +**1. Pick the right command:** + +| Change type | Command | +|---|---| +| SDK logic, patterns, naming, tests | `/code-review` — SDK checks + smell + security | +| Structural debt only | `/code-smell` — standalone smell analysis | +| Auth, credentials, tokens, HTTP | `/code-security` — standalone security audit | +| Compile + tests + 100% coverage | `/code-quality` — run after fixing review findings, before `/commit` | + +For security-sensitive changes, run both: +```bash +/code-review src/main/java/com/skyflow/ +/code-security src/main/java/com/skyflow/ +``` + +All review rules, severity definitions, output format, and post-review steps are defined in `.claude/commands/code-review.md` — that file is the single source of truth. diff --git a/.cspell.json b/.cspell.json index 757f0c48..a982f837 100644 --- a/.cspell.json +++ b/.cspell.json @@ -91,6 +91,7 @@ "Behaviour", "behaviours", "sanitisation", + "prioritise", "recognised", "unrecognised", "nocreds", diff --git a/.github/workflows/claude-changelog.yml b/.github/workflows/claude-changelog.yml new file mode 100644 index 00000000..aae70fa2 --- /dev/null +++ b/.github/workflows/claude-changelog.yml @@ -0,0 +1,86 @@ +name: Claude Changelog + +on: + push: + tags: + - '[0-9]+.[0-9]+.[0-9]+' + - '*.*.*-beta.*' + +permissions: + contents: write + +jobs: + generate-changelog: + name: Generate Release Notes + runs-on: ubuntu-latest + timeout-minutes: 10 + steps: + - uses: actions/checkout@v4 + with: + fetch-depth: 0 + + - name: Get previous tag + id: previoustag + uses: WyriHaximus/github-action-get-previous-tag@v1 + with: + fallback: '0.0.0' + + - name: Get commits since previous tag + id: commits + run: | + PREV="${{ steps.previoustag.outputs.tag }}" + CURR="${{ github.ref_name }}" + COMMITS=$(git log "${PREV}..${CURR}" --oneline \ + | grep -v '^\S* \[AUTOMATED\]' \ + | grep -v '^\S* Merge ' \ + | grep -v '^\S* \[AUTOMATED\]') + echo "log<> $GITHUB_OUTPUT + echo "$COMMITS" >> $GITHUB_OUTPUT + echo "EOF" >> $GITHUB_OUTPUT + + - name: Install Claude CLI + run: npm install -g @anthropic-ai/claude-code + + - name: Generate release notes + id: notes + continue-on-error: true + env: + ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }} + run: | + PREV="${{ steps.previoustag.outputs.tag }}" + CURR="${{ github.ref_name }}" + COMMITS="${{ steps.commits.outputs.log }}" + NOTES=$(claude --print --model claude-sonnet-4-5 -p " + Generate GitHub Release notes for the Skyflow Java SDK. + + Release: $CURR (previous: $PREV) + + Commits: + $COMMITS + + Rules: + - Group into sections: ## Features, ## Bug Fixes, ## Security, ## Breaking Changes + - Omit any section with no entries + - Each entry: bullet point with a concise one-line description; include the Jira ticket ID if present (e.g. SK-1234) + - Strip PR merge numbers like (#323) — keep the substance + - Skip [AUTOMATED] commits, version bump commits, and bare merge commits + - Breaking Changes section must come first if present + - End with: _Full changelog: https://github.com/skyflowapi/skyflow-java/compare/${PREV}...${CURR}_ + + Output only the markdown. No preamble or explanation. + ") + echo "notes<> $GITHUB_OUTPUT + echo "$NOTES" >> $GITHUB_OUTPUT + echo "EOF" >> $GITHUB_OUTPUT + + - name: Create or update GitHub Release + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + run: | + TAG="${{ github.ref_name }}" + NOTES="${{ steps.notes.outputs.notes }}" + if gh release view "$TAG" > /dev/null 2>&1; then + gh release edit "$TAG" --notes "$NOTES" + else + gh release create "$TAG" --notes "$NOTES" --title "Release $TAG" + fi diff --git a/.github/workflows/claude-pr-review.yml b/.github/workflows/claude-pr-review.yml new file mode 100644 index 00000000..d1b9fcf9 --- /dev/null +++ b/.github/workflows/claude-pr-review.yml @@ -0,0 +1,272 @@ +name: Claude PR Review + +on: + pull_request: + # TEMP (SK-2832): devesh/SK-2832-claude-setup-v2 added so the probe PR (#339), + # which targets the setup branch rather than main, triggers the review bot. + # Revert to [main] once probe evaluation is done. + branches: [main, devesh/SK-2832-claude-setup-v2] + paths: + - 'src/**/*.java' + workflow_dispatch: + inputs: + pr_number: + description: 'PR number to re-run review on' + required: false + +permissions: + pull-requests: write + contents: read + +# Debounce: when several pushes land on the same PR in quick succession, cancel +# superseded runs so only the latest commit (current HEAD) gets reviewed. +concurrency: + group: claude-pr-review-${{ github.event.pull_request.number || github.event.inputs.pr_number }} + cancel-in-progress: true + +jobs: + sdk-review: + name: SDK PR Review (changed lines only) + runs-on: ubuntu-latest + timeout-minutes: 20 + steps: + - uses: actions/checkout@v4 + with: + fetch-depth: 0 + + - name: Install Claude CLI + run: npm install -g @anthropic-ai/claude-code + + # Incremental review: find the commit the bot last reviewed (the commit_id of + # its most recent review on this PR). Subsequent runs diff only lines added + # since then, so unchanged commits are not re-reviewed and not re-commented. + - name: Determine last reviewed commit + id: lastrev + uses: actions/github-script@v7 + with: + script: | + const prNumber = context.payload.pull_request?.number + || '${{ github.event.inputs.pr_number }}'; + if (!prNumber) { core.setOutput('sha', ''); return; } + let sha = ''; + try { + const reviews = await github.paginate(github.rest.pulls.listReviews, { + ...context.repo, pull_number: Number(prNumber), per_page: 100, + }); + const mine = reviews + .filter(r => r.user && r.user.login === 'github-actions[bot]' && r.commit_id) + .sort((a, b) => new Date(b.submitted_at) - new Date(a.submitted_at)); + if (mine.length) sha = mine[0].commit_id; + } catch (e) { + core.warning(`listReviews failed (${e.message}); falling back to full review.`); + } + core.setOutput('sha', sha); + core.info(`last reviewed commit: ${sha || '(none — full review)'}`); + + - name: Check for non-generated Java changes + id: check + env: + LAST_SHA: ${{ steps.lastrev.outputs.sha }} + EVENT_NAME: ${{ github.event_name }} + BASE_BRANCH: ${{ github.base_ref }} + run: | + BASE_REF="origin/${BASE_BRANCH:-main}" + # Default to a full review against the PR base. Switch to incremental only + # for a real push event when the last reviewed commit is still an ancestor + # of HEAD (a rebase/force-push or manual re-run forces a full re-review). + RANGE_BASE="$BASE_REF" + INCREMENTAL=false + if [ "$EVENT_NAME" != "workflow_dispatch" ] && [ -n "$LAST_SHA" ] \ + && git merge-base --is-ancestor "$LAST_SHA" HEAD 2>/dev/null; then + RANGE_BASE="$LAST_SHA" + INCREMENTAL=true + echo "Incremental review: ${LAST_SHA}...HEAD" + else + echo "Full review: ${BASE_REF}...HEAD" + fi + echo "range_base=$RANGE_BASE" >> $GITHUB_OUTPUT + # incremental=true means this is a re-review of only the latest commits, so + # the posted comment reminds the author to resolve earlier unresolved comments. + echo "incremental=$INCREMENTAL" >> $GITHUB_OUTPUT + + CHANGED=$(git diff --name-only "$RANGE_BASE"...HEAD \ + | grep '\.java$' | grep -v 'generated') + if [ -z "$CHANGED" ]; then + echo "has_changes=false" >> $GITHUB_OUTPUT + else + echo "has_changes=true" >> $GITHUB_OUTPUT + # Count added/removed lines over the same range the review uses, excluding + # generated/ so the metric matches what is actually reviewed. + LINES=$(git diff "$RANGE_BASE"...HEAD -- '*.java' ':(exclude)**/generated/**' \ + | grep -c '^[+-][^+-]' 2>/dev/null || echo 1) + echo "changed_lines=$LINES" >> $GITHUB_OUTPUT + fi + + - name: Verify Claude CLI + env: + ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }} + run: | + if [ -z "$ANTHROPIC_API_KEY" ]; then + echo "::error::ANTHROPIC_API_KEY secret is not set." + exit 1 + fi + echo "Claude version: $(claude --version)" + set +e + TEST=$(claude --dangerously-skip-permissions --output-format json -p "Reply with only the word: WORKING" 2>&1) + EXIT=$? + set -e + IS_ERROR=$(echo "$TEST" | jq -r '.is_error // empty' 2>/dev/null) + if [ "$EXIT" -ne 0 ] || [ "$IS_ERROR" = "true" ]; then + MSG=$(echo "$TEST" | jq -r '.result // .error // .' 2>/dev/null | head -c 300) + echo "::error::Claude CLI check failed (invalid key, no API credits, or connectivity): $MSG" + exit 1 + fi + echo "Claude CLI OK" + + - name: Run code review + if: steps.check.outputs.has_changes == 'true' + id: review + env: + ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }} + CLAUDE_CODE_DISABLE_NONESSENTIAL_TRAFFIC: "1" + GITHUB_BASE_REF: ${{ github.base_ref }} + GITHUB_ACTIONS: "true" + # Incremental base: last reviewed commit (or PR base for a full review). + REVIEW_BASE_SHA: ${{ steps.check.outputs.range_base }} + run: | + set +e + # stream-json + --verbose emits one NDJSON event per line (init, each + # assistant turn, every tool call/result, final result) as the review + # runs. tee shows them live in the Actions log AND saves them for parsing. + # PIPESTATUS[0] is claude's exit (not tee's). The final {"type":"result"} + # event has the same shape as --output-format json, so the jq paths below + # still work — we just extract that last line. + claude --dangerously-skip-permissions \ + --output-format stream-json --verbose \ + --model claude-sonnet-4-6 -p "/code-review" 2>&1 \ + | tee /tmp/claude-review.jsonl + CLAUDE_EXIT=${PIPESTATUS[0]} + set -e + + RAW=$(grep '"type":"result"' /tmp/claude-review.jsonl | tail -n 1) + if [ -z "$RAW" ]; then + RAW='{"is_error":true,"result":"_Review produced no result event._"}' + fi + + IS_ERROR=$(echo "$RAW" | jq -r '.is_error // empty' 2>/dev/null) + RESULT=$(echo "$RAW" | jq -r '.result // "_Review failed to produce output._"') + INPUT_TOK=$(echo "$RAW" | jq '[.modelUsage[].inputTokens // 0] | add // 0') + OUTPUT_TOK=$(echo "$RAW" | jq '[.modelUsage[].outputTokens // 0] | add // 0') + CACHED_TOK=$(echo "$RAW" | jq '[.modelUsage[].cacheReadInputTokens // 0] | add // 0') + CACHE_WRITE_TOK=$(echo "$RAW" | jq '[.modelUsage[].cacheCreationInputTokens // 0] | add // 0') + COST=$(echo "$RAW" | jq -r '.total_cost_usd // 0') + LINES="${{ steps.check.outputs.changed_lines }}" + + echo "result<> $GITHUB_OUTPUT + echo "$RESULT" >> $GITHUB_OUTPUT + echo "EOF" >> $GITHUB_OUTPUT + + { + echo "## Token Usage — SDK Review" + echo "| Metric | Value |" + echo "|--------|-------|" + echo "| Input tokens | $INPUT_TOK |" + echo "| Output tokens | $OUTPUT_TOK |" + echo "| Cache hits (read) | $CACHED_TOK |" + echo "| Cache writes | $CACHE_WRITE_TOK |" + echo "| Total cost | \$$COST |" + echo "| Changed lines | $LINES |" + } >> $GITHUB_STEP_SUMMARY + + # Fail the check on a real API/runtime error (invalid key, no credits, etc.) + # instead of posting the error text as a "review". + if [ "$CLAUDE_EXIT" -ne 0 ] || [ "$IS_ERROR" = "true" ]; then + echo "::error::Claude review failed (invalid key, no API credits, or runtime error): $(echo "$RESULT" | head -c 300)" + exit 1 + fi + + - name: Post review comment + if: steps.check.outputs.has_changes == 'true' + uses: actions/github-script@v7 + env: + REVIEW_BODY: ${{ steps.review.outputs.result }} + IS_INCREMENTAL: ${{ steps.check.outputs.incremental }} + # The commit this review's diff is measured from (the last reviewed commit, when incremental). + REVIEW_BASE_SHA: ${{ steps.check.outputs.range_base }} + with: + script: | + const raw = process.env.REVIEW_BODY || '_Review output unavailable._'; + const isIncremental = process.env.IS_INCREMENTAL === 'true'; + const baseSha = (process.env.REVIEW_BASE_SHA || '').slice(0, 7); + const pull_number = context.payload.pull_request.number; + + // No findings on the changed lines → the model emits only this marker → post nothing. + if (//.test(raw)) { + core.info('No findings on the changed lines; posting nothing.'); + return; + } + + // Extract inline findings, then strip the block so it never leaks into the comment. + // Primary format: HTML-comment sentinel (never renders, + // and backticks inside the JSON can't break extraction). Legacy fallback: ```json:inline fence. + let summary = raw; + let inline = []; + const html = raw.match(//); + const fenced = raw.match(/```+\s*json:inline\s*([\s\S]*?)```/); + const block = html || fenced; + if (block) { try { inline = JSON.parse(block[1].trim()); } catch (e) { inline = []; } } + // Strip the sentinel block and any stray json:inline fence (to end-of-text, since it must + // be last) so a malformed block can never appear in the visible summary. + summary = summary + .replace(//g, '') + .replace(/```+\s*json:inline[\s\S]*$/i, '') + .trim(); + + // Strip any leading model preamble/narration before the findings-count summary + // line (starts with "**Findings"); tolerate a markdown heading too. + const lines = summary.split('\n'); + const idx = lines.findIndex(l => + /\*\*Findings/.test(l) || /^#{1,6}\s/.test(l)); + if (idx > 0) summary = lines.slice(idx).join('\n').trim(); + + // On a re-review (incremental run), make clear this comment only covers the + // commits added since the last review and earlier unresolved comments still stand. + if (isIncremental) { + const since = baseSha ? ` since the last review (\`${baseSha}\`)` : ' since the last review'; + summary = `> ♻️ _**Incremental review** — covers only the commits added${since}; earlier changes were not re-checked. **Please resolve any open comments from previous reviews.**_\n\n${summary}`; + } + + const comments = (Array.isArray(inline) ? inline : []) + .filter(f => f && f.path && Number.isInteger(f.line)) + .map(f => ({ + path: f.path, + line: f.line, + side: 'RIGHT', + body: `**${f.severity || 'Finding'}${f.category ? ' · ' + f.category : ''}${f.cwe ? ' · ' + f.cwe : ''}**: ${f.comment || ''}`.trim() + })); + + const body = `## AI Code Review\n\n${summary}`; + + // One PR review: summary as the body + inline comments anchored to changed lines. + try { + await github.rest.pulls.createReview({ + ...context.repo, + pull_number, + event: 'COMMENT', + body, + comments + }); + } catch (e) { + // Inline anchoring fails if a line is not in the diff — fall back to a single + // summary comment and list the would-be inline findings so nothing is lost. + const list = comments.length + ? '\n\n
Inline findings (could not attach to lines)\n\n' + + comments.map(c => `- \`${c.path}:${c.line}\` — ${c.body}`).join('\n') + + '\n
' + : ''; + await github.rest.issues.createComment({ + ...context.repo, + issue_number: pull_number, + body: body + list + }); + } diff --git a/CLAUDE.md b/CLAUDE.md new file mode 100644 index 00000000..263165ca --- /dev/null +++ b/CLAUDE.md @@ -0,0 +1,160 @@ +--- +name: skyflow-java-sdk +description: Skyflow Java SDK project context — naming conventions, build commands, known failures, and slash commands. Loaded for all Java source files. +paths: + - src/**/*.java + - pom.xml + - checkstyle.xml +exclude: + - src/main/java/com/skyflow/generated/** +--- + +# Skyflow Java SDK — Claude Code Instructions + +## Project Overview + +This is the Skyflow Java SDK (`skyflow-java`). It provides a Java interface to the Skyflow Data Privacy Vault API — vault operations (insert, get, update, delete, query, tokenize, detokenize), service account authentication (bearer tokens, signed data tokens), connections, detect, and audit. + +**v1 (maintenance mode, `v1` branch):** Security and bug fixes only — no new features. EOL announced: **October 31, 2026**. + +**Current stable version: v2.1** — supports PDB vaults. This is what customers use. + +**v3 (pre-release, Flow DB only):** v3 is *not* a full replacement for v2. It adds Flow DB-specific operations used by the [Spark wrapper](https://github.com/skyflowapi/vault-workflows): +- `bulkInsert` +- `batchProcessing` (`batchSize` + `concurrencyLimit`) + +v3 does not yet have full parity with v2. Do not treat v3 as the general SDK — scope v3 work strictly to Flow DB features unless explicitly told otherwise. + +## Critical Boundary — Generated Code + +**Never edit files under `src/main/java/com/skyflow/generated/`.** + +These are auto-generated by [Fern](https://buildwithfern.com) from the Skyflow API definition. Manual edits are overwritten on the next generation run. If you find a bug in generated code, report it — do not patch it directly. + +The `pom.xml` checkstyle and test configs already exclude `generated/` from all checks. + +## Project Structure + +``` +src/ + main/java/com/skyflow/ + config/ # VaultConfig, Credentials, ConnectionConfig + vault/ # controller/, data/, tokens/, connection/, audit/, bin/, detect/ + serviceaccount/ # BearerToken, SignedDataTokens (JWT + credential parsing) + enums/ # LogLevel, RedactionType, TokenMode, Env + errors/ # SkyflowException, ErrorCode, ErrorMessage + utils/ # Utils, Constants, HttpUtility, LogUtil, Validations + generated/ # ← FERN-GENERATED, DO NOT EDIT + test/java/com/skyflow/ + ... # JUnit 4 tests mirroring main structure +samples/ # Standalone Maven project — see samples/CLAUDE.md for placement rules +docs/ + superpowers/specs/ # Design specs + superpowers/plans/ # Implementation plans +``` + +## Naming Conventions + +- **Acronyms as words:** Examples: `skyflowId` (not `skyflowID`), `clientId` (not `clientID`), `tokenUri` (not `tokenURI`), `keyId` (not `keyID`) +- **Builder setters:** Examples: `setVaultId()`, `setClusterId()`, `setSkyflowId()` — never `setVaultID()` +- **Response maps:** always use `skyflowId` (camelCase) — the raw API returns `skyflow_id` (snake_case) which VaultController normalises before returning to callers +- **Constants class:** use `com.skyflow.utils.Constants` for string literals; `ErrorMessage` enum for error message strings + +## SDK Coding Rules + +These apply whenever writing or modifying code — not just during review. + +### Error handling +- All public methods must declare `throws SkyflowException` +- Never swallow exceptions — always re-throw as `SkyflowException` +- No `System.out.println` or `e.printStackTrace()` — use `LogUtil` +- `catch (Exception e)` without re-throw is always a bug + +### Request / Response patterns +- Request builders are data holders — validation belongs in `Validations.validateXxxRequest()`, not in `build()` +- No separate `*Options` classes — options are fields on the request builder itself +- All response classes must have `getErrors()` returning `null` when no errors + +### String literals +- Use `Constants` for string literals and `ErrorMessage` enum for error messages — no magic strings + +### Tests +- Use `Assert.assertEquals` / `Assert.assertNull` — not just `Assert.fail` guards +- No mocking of the production class under test +- Reflection-based tests on private methods are acceptable only when no public API exercises the method +- All code written or modified by Claude must have 100% coverage — both instruction and branch + +### Code quality +- No `@SuppressWarnings` without a comment explaining why +- Use `LogUtil.printWarningLog` for deprecation warnings — not `System.err` + +--- + +## Build and Test + +```bash +mvn compile -q # compile +mvn checkstyle:check -q # lint (config: checkstyle.xml) +mvn test -q # full test suite (JUnit 4) +mvn test -Dtest=ClassName # single test class +mvn package -DskipTests -q # build jar +``` + +## Credentials JSON Format + +The SDK reads a `credentials.json` file for service account authentication. The canonical field names (v3+) are: + +```json +{ + "clientId": "...", + "keyId": "...", + "tokenUri": "...", + "privateKey": "..." +} +``` + +The legacy all-caps forms (`clientID`, `keyID`, `tokenURI`) are accepted as fallbacks for migration. + +## Known Pre-existing Test Failures + +These failures exist on `main` and are **not regressions** — do not investigate them unless specifically asked: + +| Test class | Failure | Cause | +|---|---|---| +| `HttpUtilityTests` | `InaccessibleObject` (all tests) | JDK 21 + PowerMock incompatibility — PowerMock cannot reflect into `java.net` | +| `TokenTests#testExpiredTokenForIsExpiredToken` | Environment error | Requires live credentials | +| `VaultClientTests#testSetBearerTokenWithEnvCredentials` | Environment error | Requires `SKYFLOW_CREDENTIALS` env var | +| `ConnectionClientTests#testSetBearerTokenWithEnvCredentials` | Environment error | Requires `SKYFLOW_CREDENTIALS` env var | + +Run `mvn test -q 2>&1 | grep -E "Tests run|FAIL|ERROR"` to see the current baseline. + +## Active Work + +See `docs/superpowers/specs/` for in-progress design specs and `docs/superpowers/plans/` for implementation plans. + +## Slash Commands + +- `/code-review` — full review: SDK patterns + code smells + security (Steps 2 and 3 read `.claude/commands/code-smell.md` and `.claude/commands/code-security.md` at runtime) +- `/code-smell` — standalone structural smell analysis only (long methods, dead code, misplaced logic) +- `/code-security` — standalone security audit only (credentials, input validation, HTTP security) +- `/sdk-sample ` — generate a sample file for a feature +- `/code-quality [ClassName]` — run quality pipeline (compile → checkstyle → build → test → 100% coverage check) +- `/git-commit ` — stage check + Jira-aware commit (extracts ticket ID from branch name) + +## Commit & PR Guidelines + +### Commit messages +**Never run `git commit` directly. Always use `/git-commit `** — it extracts the Jira ticket ID from the branch name, confirms `/code-quality` has passed, and validates the format against the CI check in `.github/workflows/pr.yml`. + +### Branch naming +Branch name must include your GitHub username: + +``` +/- +``` + + +Example: `devesh/SK-770-ext-auth-json-error` + +### PR template +The `.github/pull_request_template.md` requires: **Why**, **Goal**, **Testing** sections. Tech debt section is optional. diff --git a/src/main/java/com/skyflow/ConnectionClient.java b/src/main/java/com/skyflow/ConnectionClient.java index d67122ad..3715b509 100644 --- a/src/main/java/com/skyflow/ConnectionClient.java +++ b/src/main/java/com/skyflow/ConnectionClient.java @@ -40,7 +40,7 @@ protected void updateConnectionConfig(ConnectionConfig connectionConfig) throws prioritiseCredentials(); } - protected void setBearerToken() throws SkyflowException { + protected synchronized void setBearerToken() throws SkyflowException { prioritiseCredentials(); Validations.validateCredentials(this.finalCredentials); if (this.finalCredentials.getApiKey() != null) { @@ -89,7 +89,7 @@ private void prioritiseCredentials() throws SkyflowException { } catch (SkyflowException e) { throw e; } catch (Exception e) { - throw new RuntimeException(e); + throw new SkyflowException(ErrorCode.SERVER_ERROR.getCode(), ErrorMessage.EmptyCredentials.getMessage()); } } diff --git a/src/main/java/com/skyflow/VaultClient.java b/src/main/java/com/skyflow/VaultClient.java index 1d5e5d74..5060c48f 100644 --- a/src/main/java/com/skyflow/VaultClient.java +++ b/src/main/java/com/skyflow/VaultClient.java @@ -1,5 +1,18 @@ package com.skyflow; +import java.io.File; +import java.io.IOException; +import java.nio.file.Files; +import java.util.ArrayList; +import java.util.Base64; +import java.util.Collections; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Optional; +import java.util.concurrent.TimeUnit; +import java.util.stream.Collectors; + import com.skyflow.config.Credentials; import com.skyflow.config.VaultConfig; import com.skyflow.enums.DetectEntities; @@ -10,8 +23,24 @@ import com.skyflow.generated.rest.ApiClient; import com.skyflow.generated.rest.ApiClientBuilder; import com.skyflow.generated.rest.resources.files.FilesClient; -import com.skyflow.generated.rest.resources.files.requests.*; -import com.skyflow.generated.rest.resources.files.types.*; +import com.skyflow.generated.rest.resources.files.requests.DeidentifyFileAudioRequestDeidentifyAudio; +import com.skyflow.generated.rest.resources.files.requests.DeidentifyFileDocumentPdfRequestDeidentifyPdf; +import com.skyflow.generated.rest.resources.files.requests.DeidentifyFileImageRequestDeidentifyImage; +import com.skyflow.generated.rest.resources.files.requests.DeidentifyFileRequestDeidentifyDocument; +import com.skyflow.generated.rest.resources.files.requests.DeidentifyFileRequestDeidentifyPresentation; +import com.skyflow.generated.rest.resources.files.requests.DeidentifyFileRequestDeidentifySpreadsheet; +import com.skyflow.generated.rest.resources.files.requests.DeidentifyFileRequestDeidentifyStructuredText; +import com.skyflow.generated.rest.resources.files.types.DeidentifyFileAudioRequestDeidentifyAudioEntityTypesItem; +import com.skyflow.generated.rest.resources.files.types.DeidentifyFileAudioRequestDeidentifyAudioOutputTranscription; +import com.skyflow.generated.rest.resources.files.types.DeidentifyFileDocumentPdfRequestDeidentifyPdfEntityTypesItem; +import com.skyflow.generated.rest.resources.files.types.DeidentifyFileImageRequestDeidentifyImageEntityTypesItem; +import com.skyflow.generated.rest.resources.files.types.DeidentifyFileImageRequestDeidentifyImageMaskingMethod; +import com.skyflow.generated.rest.resources.files.types.DeidentifyFileRequestDeidentifyDocumentEntityTypesItem; +import com.skyflow.generated.rest.resources.files.types.DeidentifyFileRequestDeidentifyPresentationEntityTypesItem; +import com.skyflow.generated.rest.resources.files.types.DeidentifyFileRequestDeidentifySpreadsheetEntityTypesItem; +import com.skyflow.generated.rest.resources.files.types.DeidentifyFileRequestDeidentifyStructuredTextEntityTypesItem; +import com.skyflow.generated.rest.resources.files.types.DeidentifyFileRequestDeidentifyTextEntityTypesItem; +import com.skyflow.generated.rest.resources.files.types.DeidentifyFileRequestEntityTypesItem; import com.skyflow.generated.rest.resources.query.QueryClient; import com.skyflow.generated.rest.resources.records.RecordsClient; import com.skyflow.generated.rest.resources.records.requests.RecordServiceBatchOperationBody; @@ -24,8 +53,40 @@ import com.skyflow.generated.rest.resources.tokens.TokensClient; import com.skyflow.generated.rest.resources.tokens.requests.V1DetokenizePayload; import com.skyflow.generated.rest.resources.tokens.requests.V1TokenizePayload; +import com.skyflow.generated.rest.types.BatchRecordMethod; +import com.skyflow.generated.rest.types.DeidentifyStringResponse; +import com.skyflow.generated.rest.types.FileData; +import com.skyflow.generated.rest.types.FileDataDataFormat; +import com.skyflow.generated.rest.types.FileDataDeidentifyAudio; +import com.skyflow.generated.rest.types.FileDataDeidentifyAudioDataFormat; +import com.skyflow.generated.rest.types.FileDataDeidentifyDocument; +import com.skyflow.generated.rest.types.FileDataDeidentifyDocumentDataFormat; +import com.skyflow.generated.rest.types.FileDataDeidentifyImage; +import com.skyflow.generated.rest.types.FileDataDeidentifyImageDataFormat; +import com.skyflow.generated.rest.types.FileDataDeidentifyPdf; +import com.skyflow.generated.rest.types.FileDataDeidentifyPresentation; +import com.skyflow.generated.rest.types.FileDataDeidentifyPresentationDataFormat; +import com.skyflow.generated.rest.types.FileDataDeidentifySpreadsheet; +import com.skyflow.generated.rest.types.FileDataDeidentifySpreadsheetDataFormat; +import com.skyflow.generated.rest.types.FileDataDeidentifyStructuredText; +import com.skyflow.generated.rest.types.FileDataDeidentifyStructuredTextDataFormat; +import com.skyflow.generated.rest.types.FileDataDeidentifyText; +import com.skyflow.generated.rest.types.Format; +import com.skyflow.generated.rest.types.FormatMaskedItem; +import com.skyflow.generated.rest.types.FormatPlaintextItem; +import com.skyflow.generated.rest.types.FormatRedactedItem; +import com.skyflow.generated.rest.types.ShiftDates; +import com.skyflow.generated.rest.types.ShiftDatesEntityTypesItem; +import com.skyflow.generated.rest.types.StringResponseEntities; +import com.skyflow.generated.rest.types.TokenTypeMapping; +import com.skyflow.generated.rest.types.TokenTypeMappingEntityOnlyItem; +import com.skyflow.generated.rest.types.TokenTypeMappingEntityUnqCounterItem; +import com.skyflow.generated.rest.types.TokenTypeMappingVaultTokenItem; import com.skyflow.generated.rest.types.Transformations; -import com.skyflow.generated.rest.types.*; +import com.skyflow.generated.rest.types.V1BatchRecord; +import com.skyflow.generated.rest.types.V1DetokenizeRecordRequest; +import com.skyflow.generated.rest.types.V1FieldRecords; +import com.skyflow.generated.rest.types.V1TokenizeRecordRequest; import com.skyflow.logs.InfoLogs; import com.skyflow.serviceaccount.util.Token; import com.skyflow.utils.Constants; @@ -36,24 +97,23 @@ import com.skyflow.vault.data.InsertRequest; import com.skyflow.vault.data.UpdateRequest; import com.skyflow.vault.detect.DeidentifyFileRequest; -import com.skyflow.vault.detect.*; +import com.skyflow.vault.detect.DeidentifyTextRequest; +import com.skyflow.vault.detect.DeidentifyTextResponse; +import com.skyflow.vault.detect.EntityInfo; +import com.skyflow.vault.detect.ReidentifyTextRequest; +import com.skyflow.vault.detect.TextIndex; +import com.skyflow.vault.detect.TokenFormat; import com.skyflow.vault.tokens.ColumnValue; import com.skyflow.vault.tokens.DetokenizeData; import com.skyflow.vault.tokens.DetokenizeRequest; import com.skyflow.vault.tokens.TokenizeRequest; + import io.github.cdimascio.dotenv.Dotenv; import io.github.cdimascio.dotenv.DotenvException; import okhttp3.ConnectionPool; import okhttp3.OkHttpClient; import okhttp3.Request; -import java.io.File; -import java.io.IOException; -import java.nio.file.Files; -import java.util.*; -import java.util.concurrent.TimeUnit; -import java.util.stream.Collectors; - public class VaultClient { private final VaultConfig vaultConfig; @@ -232,7 +292,7 @@ protected File getFileForFileUpload(FileUploadRequest fileUploadRequest) throws return null; } - protected void setBearerToken() throws SkyflowException { + protected synchronized void setBearerToken() throws SkyflowException { prioritiseCredentials(); Validations.validateCredentials(this.finalCredentials); if (this.finalCredentials.getApiKey() != null) { @@ -879,7 +939,7 @@ private void prioritiseCredentials() throws SkyflowException { } catch (SkyflowException e) { throw e; } catch (Exception e) { - throw new RuntimeException(e); + throw new SkyflowException(ErrorCode.SERVER_ERROR.getCode(), ErrorMessage.EmptyCredentials.getMessage()); } } } diff --git a/src/main/java/com/skyflow/utils/HttpUtility.java b/src/main/java/com/skyflow/utils/HttpUtility.java index 671e2415..811bd553 100644 --- a/src/main/java/com/skyflow/utils/HttpUtility.java +++ b/src/main/java/com/skyflow/utils/HttpUtility.java @@ -2,6 +2,7 @@ import com.google.gson.JsonElement; import com.google.gson.JsonObject; +import com.skyflow.errors.ErrorMessage; import com.skyflow.errors.SkyflowException; import java.io.*; @@ -83,8 +84,8 @@ public static String sendRequest(String method, URL url, JsonObject params, Map< if (connection.getErrorStream() != null) streamReader = new InputStreamReader(connection.getErrorStream()); else { - String description = appendRequestId("replace with description", requestID); - throw new SkyflowException(description); + String description = appendRequestId(ErrorMessage.ErrorOccurred.getMessage(), requestID); + throw new SkyflowException(httpCode, new Throwable(description), responseHeaders, "{}"); } } else { streamReader = new InputStreamReader(connection.getInputStream()); diff --git a/src/main/java/com/skyflow/utils/ReviewProbe.java b/src/main/java/com/skyflow/utils/ReviewProbe.java new file mode 100644 index 00000000..2b5db064 --- /dev/null +++ b/src/main/java/com/skyflow/utils/ReviewProbe.java @@ -0,0 +1,47 @@ +package com.skyflow.utils; + +/** + * Temporary probe to validate the Claude PR review workflow. + * Intentionally contains SDK-rule violations — delete after testing. + */ +public class ReviewProbe { + + private String vaultId; + + // NEW BUG 1 (correctness): String compared with == instead of .equals(). + public boolean isAdmin(String role) { + return role == "admin"; + } + + // NEW BUG 2 (naming): all-caps acronym — should be setVaultId, not setVaultID. + public void setVaultID(String id) { + this.vaultId = id; + } + + // NEW BUG 3 (error handling): empty catch swallows the exception. + public void load(String value) { + try { + Integer.parseInt(value); + } catch (NumberFormatException e) { + } + } + + // NEW SMELL 1 (advisory): large parameter list — more than 4 parameters. + public String build(String a, String b, String c, String d, String e, String f) { + return a + b + c + d + e + f; + } + + // NEW SMELL 2 (advisory): deep nesting — more than 3 levels of if. + public int classify(int n) { + if (n > 0) { + if (n < 100) { + if (n % 2 == 0) { + if (n > 10) { + return n; + } + } + } + } + return 0; + } +}