From 2a681564783f87045839a60532efc02be1ab744f Mon Sep 17 00:00:00 2001 From: Krish Parekh Date: Sat, 27 Jun 2026 01:47:57 +0530 Subject: [PATCH] fix(telemetry): stop unauthenticated/duplicate CLI rollups polluting PostHog (v1.48.0) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Three fixes to lib/analytics.ts (+ config.ts), all unit-tested: 1. Don't record auth-required commands run WITHOUT an identity. An unauthenticated CLI can still invoke e.g. `actions execute` (it just fails "Not configured"), and those were counted under an anonymous device id — one such agent produced ~84% of all CLI events. Pre-auth funnel commands (login/init/guide/…) are still recorded. Every event now carries an `authenticated` flag for clean dashboard/ingest filtering. 2. Deterministic $insert_id for rollups (hash of the exact entries) so a re-emitted batch dedupes on PostHog ingest instead of double-counting. 3. Atomic claim of the usage log before flushing (rename-aside) so under concurrent CLI processes exactly one flush owns a batch — eliminating the duplicate "CLI Usage Rollup" events (one user was emitting the same 500-command batch up to 8x). Verified: 16/16 analytics unit tests; a 12-process concurrency test (300 commands -> 300 rollups, 0 duplicates, 0 loss, no over-count); and a real-key run (authenticated actions execute -> recorded; unauthenticated actions execute -> 0 events; unauthenticated login -> kept, authenticated=false). Co-Authored-By: Claude Opus 4.8 --- package-lock.json | 4 +-- package.json | 2 +- src/lib/analytics.test.ts | 65 +++++++++++++++++++++++++++++++++++++-- src/lib/analytics.ts | 62 ++++++++++++++++++++++++++++++------- src/lib/config.ts | 29 +++++++++++++++++ 5 files changed, 146 insertions(+), 16 deletions(-) diff --git a/package-lock.json b/package-lock.json index 05b9ab2..a88406b 100644 --- a/package-lock.json +++ b/package-lock.json @@ -1,12 +1,12 @@ { "name": "@withone/cli", - "version": "1.47.11", + "version": "1.48.0", "lockfileVersion": 3, "requires": true, "packages": { "": { "name": "@withone/cli", - "version": "1.47.11", + "version": "1.48.0", "dependencies": { "@clack/prompts": "^0.9.1", "commander": "^13.1.0", diff --git a/package.json b/package.json index aebc0ab..cdc5eb1 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "@withone/cli", - "version": "1.47.11", + "version": "1.48.0", "description": "CLI for managing One", "type": "module", "files": [ diff --git a/src/lib/analytics.test.ts b/src/lib/analytics.test.ts index 3da2da5..808652c 100644 --- a/src/lib/analytics.test.ts +++ b/src/lib/analytics.test.ts @@ -6,7 +6,7 @@ import path from 'node:path'; import type { Command } from 'commander'; import { recordCommand, flushUsageRollups } from './analytics.js'; -import { appendUsageLog, readUsageLog, readAnalyticsQueue } from './config.js'; +import { appendUsageLog, readUsageLog, readAnalyticsQueue, claimUsageLog } from './config.js'; // These tests exercise the REAL rollup code against REAL files: HOME is // sandboxed to a temp dir so all reads/writes land under /.one (config.ts @@ -24,6 +24,8 @@ interface RollupEvent { human_count: number; window_start: string; window_end: string; + $insert_id: string; + authenticated: boolean; }; } @@ -61,7 +63,7 @@ describe('CLI usage rollups', () => { const orig: Record = {}; beforeEach(() => { - for (const k of ['HOME', 'CI', 'ONE_NO_TELEMETRY', 'ONE_DISABLE_TELEMETRY', 'DO_NOT_TRACK']) { + for (const k of ['HOME', 'CI', 'ONE_NO_TELEMETRY', 'ONE_DISABLE_TELEMETRY', 'DO_NOT_TRACK', 'ONE_SECRET']) { orig[k] = process.env[k]; } originalCwd = process.cwd(); @@ -75,6 +77,8 @@ describe('CLI usage rollups', () => { delete process.env.ONE_NO_TELEMETRY; delete process.env.ONE_DISABLE_TELEMETRY; delete process.env.DO_NOT_TRACK; + // Authenticated by default so the command-recording tests exercise the normal path. + process.env.ONE_SECRET = 'sk_live_test_key'; }); afterEach(() => { @@ -207,4 +211,61 @@ describe('CLI usage rollups', () => { assert.equal(emittedRollups().length, 0); assert.equal(readUsageLog().length, 0, 'backlog dropped on opt-out'); }); + + // ── Fix 1: don't record unauthenticated auth-required commands ────────────── + it('does NOT record an auth-required command when unauthenticated (no anon pollution)', () => { + delete process.env.ONE_SECRET; // a CLI with no login and no API key + recordCommand(fakeCommand('actions execute')); + assert.equal(emittedRollups().length, 0, 'no rollup for unauthenticated actions execute'); + assert.equal(readUsageLog().length, 0, 'not even written to the local log'); + }); + + it('still records pre-auth funnel commands when unauthenticated', () => { + delete process.env.ONE_SECRET; + recordCommand(fakeCommand('login')); // allowlisted pre-auth command + const r = emittedRollups(); + assert.equal(r.length, 1, 'login is recorded even without auth'); + assert.equal(r[0].properties.authenticated, false, 'flagged as unauthenticated'); + }); + + it('tags rollups with the authenticated flag', () => { + recordCommand(fakeCommand('actions execute')); // ONE_SECRET set in beforeEach + const r = emittedRollups(); + assert.equal(r.length, 1); + assert.equal(r[0].properties.authenticated, true); + }); + + // ── Fix 2: deterministic insert_id so duplicate batches dedupe in PostHog ──── + it('gives identical batches the same $insert_id so PostHog dedupes duplicates', () => { + const old = Date.now() - WINDOW_MS - 1000; + const mk = () => logEntry({ did: 'whale', ts: old, command: 'actions execute', agent: true }); + for (let i = 0; i < 5; i++) appendUsageLog(mk()); + flushUsageRollups(); + for (let i = 0; i < 5; i++) appendUsageLog(mk()); // a byte-identical batch + flushUsageRollups(); + const r = emittedRollups(); + assert.equal(r.length, 2, 'two identical batches were emitted'); + assert.ok(r[0].properties.$insert_id, 'rollup carries an insert id'); + assert.equal(r[0].properties.$insert_id, r[1].properties.$insert_id, 'identical content → same id (deduped on ingest)'); + }); + + it('gives genuinely different batches different $insert_ids', () => { + const old = Date.now() - WINDOW_MS - 1000; + appendUsageLog(logEntry({ did: 'a', ts: old })); + flushUsageRollups(); + appendUsageLog(logEntry({ did: 'b', ts: old })); + flushUsageRollups(); + const r = emittedRollups(); + assert.equal(r.length, 2); + assert.notEqual(r[0].properties.$insert_id, r[1].properties.$insert_id); + }); + + // ── Fix 3: atomic claim so concurrent processes can't double-emit a batch ─── + it('claimUsageLog hands a batch to exactly one caller (concurrency-safe flush)', () => { + for (let i = 0; i < 3; i++) appendUsageLog(logEntry({ did: 'x' })); + const first = claimUsageLog(); + const second = claimUsageLog(); + assert.equal(first?.length, 3, 'first claimant gets the whole batch'); + assert.equal(second, null, 'a concurrent second claimant gets nothing → cannot double-emit'); + }); }); diff --git a/src/lib/analytics.ts b/src/lib/analytics.ts index 70b81ce..5ce8a74 100644 --- a/src/lib/analytics.ts +++ b/src/lib/analytics.ts @@ -1,5 +1,5 @@ import { createRequire } from 'node:module'; -import { randomUUID } from 'node:crypto'; +import { randomUUID, createHash } from 'node:crypto'; import type { Command } from 'commander'; import pc from 'picocolors'; import { @@ -14,7 +14,7 @@ import { readAnalyticsQueue, writeAnalyticsQueue, appendUsageLog, - readUsageLog, + claimUsageLog, writeUsageLog, readUsageState, writeUsageState, @@ -133,6 +133,11 @@ function distinctId(): string { return getWhoAmI()?.user?.id ?? getDeviceId(); } +/** True when the CLI has an identity — a logged-in user or a configured API key. */ +function isAuthenticated(): boolean { + return !!getWhoAmI()?.user || !!getApiKey(); +} + function baseProperties(): Record { return { $lib: 'one-cli', @@ -142,6 +147,7 @@ function baseProperties(): Record { os: process.platform, arch: process.arch, node_version: process.versions.node, + authenticated: isAuthenticated(), }; } @@ -205,7 +211,10 @@ export function capture( return; } const did = opts.distinctId ?? distinctId(); - const props: Record = { ...baseProperties(), ...properties, $insert_id: randomUUID() }; + const props: Record = { ...baseProperties(), ...properties }; + // One-off events get a random id; rollups pass a content-derived id (see + // emitRollup) so duplicate batches collapse to a single event in PostHog. + if (props.$insert_id === undefined) props.$insert_id = randomUUID(); // Person props belong only to the *current* user; never tag a rollup for a // previous login (distinct_id ≠ current) with the new user's email/name. if (did === distinctId()) { @@ -233,20 +242,39 @@ function utcDay(ts: number): string { return new Date(ts).toISOString().slice(0, 10); } +/** + * Top-level commands worth recording BEFORE authentication — the install / try / + * activation funnel. Anything else run without an identity is dropped, so a + * determined unauthenticated loop (e.g. `actions execute` failing on repeat) + * can't pollute analytics. Fail-closed: unknown commands need auth to count. + */ +const PRE_AUTH_COMMANDS = new Set([ + 'init', 'login', 'logout', 'guide', 'platforms', 'onboard', 'config', 'update', 'help', +]); + +/** Authenticated → record everything; unauthenticated → only the pre-auth funnel. */ +function shouldRecord(commandPath: string): boolean { + if (isAuthenticated()) return true; + return PRE_AUTH_COMMANDS.has(commandPath.split(' ')[0]); +} + /** * Record the command about to run for usage analytics. Appends the command * PATH (e.g. "actions execute") — never args/flags — to the local rollup log * (instant, sync), then flushes any due rollups. The first command of the day * flushes immediately so a user is captured even if they run the CLI once and - * never again. Never throws, never blocks. No-op when telemetry is disabled. + * never again. Never throws, never blocks. No-op when telemetry is disabled, or + * when an unauthenticated session runs an auth-required command (see shouldRecord). */ export function recordCommand(command: Command): void { if (isTelemetryDisabled()) { writeUsageLog([]); return; } + const cmdPath = commandPath(command); + if (!shouldRecord(cmdPath)) return; const did = distinctId(); - const entry: UsageEntry = { ts: Date.now(), command: commandPath(command), agent: isAgentMode(), did }; + const entry: UsageEntry = { ts: Date.now(), command: cmdPath, agent: isAgentMode(), did }; appendUsageLog(JSON.stringify(entry)); const today = utcDay(entry.ts); @@ -269,8 +297,13 @@ export function flushUsageRollups(opts: { force?: boolean } = {}): void { writeUsageLog([]); return; } + // Atomically claim the batch so concurrent CLI processes can't each emit it + // (the cause of duplicate "CLI Usage Rollup" events). Only one flush wins. + const lines = claimUsageLog(); + if (!lines || lines.length === 0) return; + const entries: UsageEntry[] = []; - for (const line of readUsageLog()) { + for (const line of lines) { try { const e = JSON.parse(line) as UsageEntry; if (e && typeof e.ts === 'number' && typeof e.command === 'string' && typeof e.did === 'string') { @@ -280,10 +313,7 @@ export function flushUsageRollups(opts: { force?: boolean } = {}): void { // skip a malformed line } } - if (entries.length === 0) { - writeUsageLog([]); - return; - } + if (entries.length === 0) return; const currentDid = entries[entries.length - 1].did; const now = Date.now(); @@ -305,7 +335,9 @@ export function flushUsageRollups(opts: { force?: boolean } = {}): void { if (due) emitRollup(did, group); else kept.push(...group); } - writeUsageLog(kept.map((e) => JSON.stringify(e))); + // Re-append (never overwrite) the not-yet-due entries, so we can't clobber rows + // another process appended to the fresh log while we held this batch. + for (const e of kept) appendUsageLog(JSON.stringify(e)); } /** Build + enqueue one "CLI Usage Rollup" event carrying exact counts for a batch. */ @@ -316,6 +348,13 @@ function emitRollup(did: string, group: UsageEntry[]): void { byCommand[e.command] = (byCommand[e.command] ?? 0) + 1; if (e.agent) agentCount += 1; } + // Content-derived id hashed over the EXACT entries (each command's timestamp + + // path + agent flag). A re-emitted copy of the same batch hashes identically so + // PostHog dedupes it on ingest; genuinely different batches hash differently + // (distinct per-command timestamps), so this never collapses real activity. + const insertId = createHash('sha1') + .update(`${did}|${group.map((e) => `${e.ts}:${e.command}:${e.agent ? 1 : 0}`).join('|')}`) + .digest('hex'); capture( 'CLI Usage Rollup', { @@ -325,6 +364,7 @@ function emitRollup(did: string, group: UsageEntry[]): void { human_count: group.length - agentCount, window_start: new Date(group[0].ts).toISOString(), window_end: new Date(group[group.length - 1].ts).toISOString(), + $insert_id: insertId, }, { distinctId: did, timestamp: new Date(group[group.length - 1].ts).toISOString() }, ); diff --git a/src/lib/config.ts b/src/lib/config.ts index b694550..b502a76 100644 --- a/src/lib/config.ts +++ b/src/lib/config.ts @@ -511,6 +511,35 @@ export function writeUsageLog(lines: string[]): void { } catch { /* best-effort */ } } +/** + * Atomically claim the whole usage log for flushing: rename it aside, read it, + * delete the temp copy, and return its lines. Returns null when there's nothing + * to claim OR another process already claimed it (the rename throws ENOENT). + * + * This is what makes rollups concurrency-safe. Multiple parallel CLI processes + * each call this; the OS guarantees only ONE rename of a given file succeeds, so + * exactly one flush ever owns a batch — eliminating the duplicate "CLI Usage + * Rollup" events that happened when every concurrent process read the same log + * and emitted it. Commands logged by other processes meanwhile land in a fresh + * log and are claimed on a later flush, so nothing is lost. + */ +export function claimUsageLog(): string[] | null { + const src = usageLogFile(); + const tmp = `${src}.claim.${process.pid}.${randomUUID()}`; + try { + fs.renameSync(src, tmp); + } catch { + return null; // no log yet, or another process claimed it first + } + try { + return fs.readFileSync(tmp, 'utf-8').split('\n').filter(Boolean).slice(-USAGE_LOG_MAX); + } catch { + return []; + } finally { + try { fs.rmSync(tmp, { force: true }); } catch { /* best-effort */ } + } +} + /** First-touch bookkeeping so every user is captured on their first command of the day. */ export interface UsageState { lastDay?: string; distinctId?: string }