diff --git a/packages/appkit/src/type-generator/query-registry.ts b/packages/appkit/src/type-generator/query-registry.ts
index ce0cdd0b..c8e58181 100644
--- a/packages/appkit/src/type-generator/query-registry.ts
+++ b/packages/appkit/src/type-generator/query-registry.ts
@@ -463,7 +463,17 @@ export async function generateQueriesFromDescribe(
   const queryFiles = allFiles.filter((file) => file.endsWith(".sql"));
   logger.debug("Found %d SQL queries", queryFiles.length);
 
-  const client = new WorkspaceClient({});
+  // Construct the SDK client lazily: only the blocking preflight and the DESCRIBE
+  // batch ever touch the warehouse, and a `non-blocking` run (the dev/install
+  // hot path) short-circuits to degraded types before either. Building a
+  // WorkspaceClient eagerly here made every keystroke-triggered, all-cached, or
+  // non-blocking run pay for a client (OAuth/config resolution) it never used.
+  // The accessor memoizes, so the blocking path still builds exactly one.
+  let client: WorkspaceClient | null = null;
+  const getClient = (): WorkspaceClient => {
+    client ??= new WorkspaceClient({});
+    return client;
+  };
   const spinner = new Spinner();
 
   // Read all SQL files in parallel
@@ -595,8 +605,11 @@ export async function generateQueriesFromDescribe(
       // now.
       decision = "degradeAll";
     } else {
+      // First warehouse round-trip of the run — build the client now (memoized,
+      // so the DESCRIBE batch below reuses this same instance).
+      const wh = getClient();
       try {
-        const state = await getWarehouseState(client, warehouseId);
+        const state = await getWarehouseState(wh, warehouseId);
         decision = decidePreflight(state, mode);
         if (decision === "fatal") {
           fatalMessage = `warehouse ${warehouseId} is ${state}`;
@@ -606,8 +619,8 @@ export async function generateQueriesFromDescribe(
           // poll to RUNNING. treatStoppedAsTransient rides out the stale
           // pre-start STOPPED/STOPPING reading the start hasn't propagated past
           // yet — only DELETED/DELETING (or the deadline) ends the wait early.
-          await startWarehouse(client, warehouseId);
-          const final = await waitUntilRunning(client, warehouseId, {
+          await startWarehouse(wh, warehouseId);
+          const final = await waitUntilRunning(wh, warehouseId, {
             maxMs: PREFLIGHT_WAIT_MAX_MS,
             treatStoppedAsTransient: true,
           });
@@ -619,7 +632,7 @@ export async function generateQueriesFromDescribe(
           }
         }
         if (decision === "waitThenProceed") {
-          const final = await waitUntilRunning(client, warehouseId, {
+          const final = await waitUntilRunning(wh, warehouseId, {
             maxMs: PREFLIGHT_WAIT_MAX_MS,
           });
           if (final === "RUNNING") {
@@ -670,6 +683,9 @@ export async function generateQueriesFromDescribe(
     } else {
       let completed = 0;
       const total = uncachedQueries.length;
+      // Reuse the memoized client: in blocking mode the preflight above already
+      // built it; this is the same instance, never a second one.
+      const wh = getClient();
       spinner.start(
         `Describing ${total} ${total === 1 ? "query" : "queries"} (0/${total})`,
       );
@@ -681,7 +697,7 @@ export async function generateQueriesFromDescribe(
         sqlHash,
         cleanedSql,
       }: (typeof uncachedQueries)[number]): Promise<DescribeResult> => {
-        const result = (await client.statementExecution.executeStatement({
+        const result = (await wh.statementExecution.executeStatement({
           statement: `DESCRIBE QUERY ${cleanedSql}`,
           warehouse_id: warehouseId,
         })) as DatabricksStatementExecutionResponse;
diff --git a/packages/appkit/src/type-generator/tests/generate-queries.test.ts b/packages/appkit/src/type-generator/tests/generate-queries.test.ts
index 4ebcaac2..68ef5556 100644
--- a/packages/appkit/src/type-generator/tests/generate-queries.test.ts
+++ b/packages/appkit/src/type-generator/tests/generate-queries.test.ts
@@ -10,6 +10,9 @@ const mocks = vi.hoisted(() => ({
   getWarehouse: vi.fn(() => ({ state: "RUNNING" })),
   // warehouses.start — only the blocking startWaitProceed path calls this.
   startWarehouse: vi.fn(),
+  // Counts `new WorkspaceClient({})` constructions so the lazy-client tests can
+  // assert non-blocking / all-cached runs build zero clients.
+  workspaceClientCtor: vi.fn(),
   spinnerStop: vi.fn(),
   spinnerPrintDetail: vi.fn(),
   loadCache: vi.fn(() => ({ version: "2", queries: {} })),
@@ -24,10 +27,15 @@ vi.mock("node:fs/promises", () => ({
 }));
 
 vi.mock("@databricks/sdk-experimental", () => ({
-  WorkspaceClient: vi.fn(() => ({
-    statementExecution: { executeStatement: mocks.executeStatement },
-    warehouses: { get: mocks.getWarehouse, start: mocks.startWarehouse },
-  })),
+  WorkspaceClient: vi.fn(() => {
+    // Record the construction so lazy-client tests can assert zero builds on the
+    // non-blocking / all-cached paths.
+    mocks.workspaceClientCtor();
+    return {
+      statementExecution: { executeStatement: mocks.executeStatement },
+      warehouses: { get: mocks.getWarehouse, start: mocks.startWarehouse },
+    };
+  }),
 }));
 
 vi.mock("../spinner", () => ({
@@ -767,6 +775,9 @@ describe("generateQueriesFromDescribe", () => {
       // ZERO warehouse round-trips: no probe (getWarehouse) and no DESCRIBE.
       expect(mocks.getWarehouse).not.toHaveBeenCalled();
       expect(mocks.executeStatement).not.toHaveBeenCalled();
+      // Lazy client (F2): a non-blocking run short-circuits before any warehouse
+      // work, so no WorkspaceClient is ever constructed.
+      expect(mocks.workspaceClientCtor).not.toHaveBeenCalled();
       expect(fatalErrors).toEqual([]);
       expect(syntaxErrors).toEqual([]);
       expect(schemas[0].type).toContain("result: unknown");
@@ -865,6 +876,8 @@ describe("generateQueriesFromDescribe", () => {
       // ZERO warehouse round-trips: no probe (getWarehouse) and no DESCRIBE.
       expect(mocks.getWarehouse).not.toHaveBeenCalled();
       expect(mocks.executeStatement).not.toHaveBeenCalled();
+      // Lazy client (F2): non-blocking builds no WorkspaceClient at all.
+      expect(mocks.workspaceClientCtor).not.toHaveBeenCalled();
       // Best-available types: no cache seeded → every query degrades to unknown.
       expect(schemas).toHaveLength(2);
       expect(schemas[0].name).toBe("a");
@@ -902,5 +915,47 @@ describe("generateQueriesFromDescribe", () => {
       expect(syntaxErrors).toEqual([]);
       expect(fatalErrors).toEqual([]);
     });
+
+    test("blocking mode, all queries cached — builds zero WorkspaceClient", async () => {
+      // Lazy client (F2): even in blocking mode, if every query is a cache HIT
+      // there are no uncached queries, so the preflight/describe block is skipped
+      // entirely and no warehouse client is constructed.
+      const sql = "SELECT id FROM users";
+      mocks.readdir.mockResolvedValue(["users.sql"]);
+      mocks.readFile.mockResolvedValue(sql);
+      mocks.loadCache.mockReturnValueOnce({
+        version: CACHE_VERSION,
+        queries: {
+          users: { hash: hashSQL(sql), type: CACHED_GOOD_TYPE, retry: false },
+        },
+      });
+
+      const { schemas } = await describeQueries("/queries", "wh-123");
+
+      expect(mocks.workspaceClientCtor).not.toHaveBeenCalled();
+      expect(mocks.getWarehouse).not.toHaveBeenCalled();
+      expect(mocks.executeStatement).not.toHaveBeenCalled();
+      expect(schemas[0].type).toBe(CACHED_GOOD_TYPE);
+    });
+
+    test("blocking mode, uncached query — builds exactly one WorkspaceClient (preflight + describe share it)", async () => {
+      // Lazy client (F2): the blocking path still works — it constructs a single
+      // client that the preflight probe and the DESCRIBE batch both reuse (memoized),
+      // never two.
+      mocks.readdir.mockResolvedValue(["a.sql"]);
+      mocks.readFile.mockResolvedValue("SELECT id FROM a");
+      mocks.getWarehouse.mockReturnValue({ state: "RUNNING" });
+      mocks.executeStatement.mockResolvedValue(
+        succeededResult([["id", "INT", null]]),
+      );
+
+      const { schemas } = await describeQueries("/queries", "wh-123");
+
+      // One construction shared across probe + describe (not one each).
+      expect(mocks.workspaceClientCtor).toHaveBeenCalledTimes(1);
+      expect(mocks.getWarehouse).toHaveBeenCalledTimes(1);
+      expect(mocks.executeStatement).toHaveBeenCalledTimes(1);
+      expect(schemas[0].type).toContain("id: number");
+    });
   });
 });
diff --git a/packages/appkit/src/type-generator/tests/vite-plugin.test.ts b/packages/appkit/src/type-generator/tests/vite-plugin.test.ts
index 16153257..8dd92ded 100644
--- a/packages/appkit/src/type-generator/tests/vite-plugin.test.ts
+++ b/packages/appkit/src/type-generator/tests/vite-plugin.test.ts
@@ -9,6 +9,9 @@ const mocks = vi.hoisted(() => ({
   getWarehouseState: vi.fn(),
   startWarehouse: vi.fn(),
   waitUntilRunning: vi.fn(),
+  // Counts `new WorkspaceClient({})` constructions so the per-save perf tests can
+  // assert the watch builds exactly one client (not one per rapid save).
+  workspaceClientCtor: vi.fn(),
 }));
 
 // Mock the module vite-plugin.ts pulls generateFromEntryPoint from. The error
@@ -31,9 +34,14 @@ vi.mock("../warehouse-status", () => ({
 }));
 
 // armWarehouseWatch constructs `new WorkspaceClient({})`. Stub the SDK so that
-// constructor is inert in unit tests.
+// constructor is inert in unit tests, but route each construction through a spy
+// so the per-save perf tests can count how many clients the watch builds.
 vi.mock("@databricks/sdk-experimental", () => ({
-  WorkspaceClient: class {},
+  WorkspaceClient: class {
+    constructor() {
+      mocks.workspaceClientCtor();
+    }
+  },
 }));
 
 const { appKitTypesPlugin } = await import("../vite-plugin");
@@ -484,3 +492,180 @@ describe("appKitTypesPlugin — background warehouse watch", () => {
     expect(mocks.generateFromEntryPoint).toHaveBeenCalledTimes(1);
   });
 });
+
+describe("appKitTypesPlugin — per-save perf collapse (F2)", () => {
+  const savedNodeEnv = process.env.NODE_ENV;
+  const savedWarehouseId = process.env.DATABRICKS_WAREHOUSE_ID;
+
+  beforeEach(() => {
+    vi.clearAllMocks();
+    mocks.generateFromEntryPoint.mockResolvedValue(undefined);
+    mocks.startWarehouse.mockResolvedValue(undefined);
+    process.env.NODE_ENV = "development";
+    process.env.DATABRICKS_WAREHOUSE_ID = "wh-test";
+  });
+
+  afterEach(() => {
+    if (savedNodeEnv === undefined) delete process.env.NODE_ENV;
+    else process.env.NODE_ENV = savedNodeEnv;
+
+    if (savedWarehouseId === undefined)
+      delete process.env.DATABRICKS_WAREHOUSE_ID;
+    else process.env.DATABRICKS_WAREHOUSE_ID = savedWarehouseId;
+  });
+
+  test("a single .sql save builds one WorkspaceClient and issues one warehouses.get", async () => {
+    // RUNNING so the watch probes once, then immediately regenerates (its wait
+    // returns on the first poll). We isolate a single save by letting the initial
+    // build's watch fully settle first, then measuring the delta for one save.
+    mocks.getWarehouseState.mockResolvedValue("RUNNING" as WarehouseState);
+    mocks.waitUntilRunning.mockResolvedValue("RUNNING" as WarehouseState);
+
+    const plugin = makeConfiguredPlugin();
+    const { server, watcher } = makeFakeServer();
+    getHook<ConfigureServerHook>(plugin, "configureServer")(server);
+    const buildStart = getHook<BuildStartHook>(plugin, "buildStart");
+
+    // Initial build arms + completes watch #1.
+    await buildStart();
+    await flush();
+
+    // Measure only the single save below.
+    mocks.workspaceClientCtor.mockClear();
+    mocks.getWarehouseState.mockClear();
+
+    watcher.emit(
+      "change",
+      path.join(process.cwd(), "config", "queries", "q.sql"),
+    );
+    await flush();
+
+    // Exactly one client construction and one status RPC for the save — the
+    // F2 acceptance criterion.
+    expect(mocks.workspaceClientCtor).toHaveBeenCalledTimes(1);
+    expect(mocks.getWarehouseState).toHaveBeenCalledTimes(1);
+  });
+
+  test("five rapid saves do not fan out — one waiter, one client, one get", async () => {
+    // Hold the watch's wait pending so all five saves land while watch #1 is
+    // still in flight. One-waiter re-arm must coalesce them onto that single
+    // watch instead of spawning a client + probe per save.
+    mocks.getWarehouseState.mockResolvedValue("STARTING" as WarehouseState);
+    const wait = deferred();
+    mocks.waitUntilRunning.mockReturnValue(
+      wait.promise.then(() => "RUNNING" as WarehouseState),
+    );
+
+    const plugin = makeConfiguredPlugin();
+    const { server, watcher } = makeFakeServer();
+    getHook<ConfigureServerHook>(plugin, "configureServer")(server);
+    const buildStart = getHook<BuildStartHook>(plugin, "buildStart");
+
+    // buildStart arms watch #1 (now parked on the pending wait).
+    await buildStart();
+    await flush();
+    expect(mocks.workspaceClientCtor).toHaveBeenCalledTimes(1);
+    expect(mocks.getWarehouseState).toHaveBeenCalledTimes(1);
+
+    // Five rapid .sql saves while watch #1 is still waiting.
+    const sqlFile = path.join(process.cwd(), "config", "queries", "q.sql");
+    for (let i = 0; i < 5; i++) watcher.emit("change", sqlFile);
+    await flush();
+
+    // No fan-out: still exactly one client + one status RPC despite six triggers.
+    expect(mocks.workspaceClientCtor).toHaveBeenCalledTimes(1);
+    expect(mocks.getWarehouseState).toHaveBeenCalledTimes(1);
+    // And only one warehouse wait is in flight, not five.
+    expect(mocks.waitUntilRunning).toHaveBeenCalledTimes(1);
+
+    // Let the watch finish so the trailing regenerate fires and no timer leaks.
+    wait.resolve();
+    await flush();
+  });
+
+  test("after a watch settles, the next save arms a fresh watch (latch released)", async () => {
+    // The one-waiter latch must release on completion, or saves after the first
+    // watch would be permanently ignored.
+    mocks.getWarehouseState.mockResolvedValue("RUNNING" as WarehouseState);
+    mocks.waitUntilRunning.mockResolvedValue("RUNNING" as WarehouseState);
+
+    const plugin = makeConfiguredPlugin();
+    const { server, watcher } = makeFakeServer();
+    getHook<ConfigureServerHook>(plugin, "configureServer")(server);
+    const buildStart = getHook<BuildStartHook>(plugin, "buildStart");
+
+    await buildStart();
+    await flush();
+    const ctorAfterBuild = mocks.workspaceClientCtor.mock.calls.length;
+
+    // A save after the first watch settled must arm a new watch (one more client
+    // + one more probe).
+    watcher.emit(
+      "change",
+      path.join(process.cwd(), "config", "queries", "q.sql"),
+    );
+    await flush();
+
+    expect(mocks.workspaceClientCtor.mock.calls.length).toBe(
+      ctorAfterBuild + 1,
+    );
+  });
+
+  test("a non-blocking save during a blocking warm-up does not downgrade the pending describe", async () => {
+    // Sticky mode: while the background blocking regenerate is queued behind an
+    // in-flight foreground run, a coalesced non-blocking .sql save must NOT
+    // downgrade the trailing run to non-blocking — real (described) types still
+    // land.
+    //
+    // We drive the real path: the buildStart foreground runs non-blocking and is
+    // held in flight (deferred); the armed watch reaches RUNNING and enqueues a
+    // blocking trailing run; then a non-blocking .sql save lands. The save's
+    // re-armed watch sees DELETED (the 2nd getWarehouseState) so it does NOT
+    // inject another blocking trigger — proving the blocking trailing run
+    // survives purely because of sticky escalation, not a later re-escalation.
+    mocks.getWarehouseState
+      .mockResolvedValueOnce("RUNNING" as WarehouseState) // watch #1 → blocking regen
+      .mockResolvedValue("DELETED" as WarehouseState); // watch #2 (save) → no-op
+    mocks.waitUntilRunning.mockResolvedValue("RUNNING" as WarehouseState);
+
+    const first = deferred();
+    const trailing = deferred();
+    mocks.generateFromEntryPoint
+      .mockReturnValueOnce(first.promise) // initial foreground (in flight)
+      .mockReturnValueOnce(trailing.promise); // the single trailing run
+
+    const plugin = makeConfiguredPlugin();
+    const { server, watcher } = makeFakeServer();
+    getHook<ConfigureServerHook>(plugin, "configureServer")(server);
+    const buildStart = getHook<BuildStartHook>(plugin, "buildStart");
+
+    // Initial foreground non-blocking generate is now in flight (deferred), and
+    // watch #1 (RUNNING) has enqueued a blocking trailing run behind it.
+    await buildStart();
+    await flush();
+    expect(mocks.generateFromEntryPoint).toHaveBeenCalledTimes(1);
+
+    // A non-blocking .sql save lands while still in flight — must NOT downgrade
+    // the queued blocking trailing run.
+    watcher.emit(
+      "change",
+      path.join(process.cwd(), "config", "queries", "q.sql"),
+    );
+    await flush();
+    // Still only the one in-flight run; nothing started concurrently.
+    expect(mocks.generateFromEntryPoint).toHaveBeenCalledTimes(1);
+
+    // Release the in-flight run → the single trailing run fires, and it must be
+    // blocking (escalated), not downgraded to non-blocking by the later save.
+    first.resolve();
+    await flush();
+    expect(mocks.generateFromEntryPoint).toHaveBeenCalledTimes(2);
+    expect(mocks.generateFromEntryPoint).toHaveBeenNthCalledWith(
+      2,
+      expect.objectContaining({ mode: "blocking" }),
+    );
+
+    trailing.resolve();
+    await flush();
+  });
+});
diff --git a/packages/appkit/src/type-generator/vite-plugin.ts b/packages/appkit/src/type-generator/vite-plugin.ts
index c869acdf..72c2f9bc 100644
--- a/packages/appkit/src/type-generator/vite-plugin.ts
+++ b/packages/appkit/src/type-generator/vite-plugin.ts
@@ -64,9 +64,14 @@ export function appKitTypesPlugin(options?: AppKitTypesPluginOptions): Plugin {
   let pendingMode: PreflightMode = "non-blocking";
 
   // The currently-armed DEV background warehouse watch, if any. Aborting it
-  // stops a pending waitUntilRunning (server shutdown, or a newer arm replacing
-  // an older one).
+  // stops a pending waitUntilRunning (server shutdown).
   let watchController: AbortController | null = null;
+  // Whether a warehouse watch is currently in flight. Used for one-waiter
+  // re-arm: rapid `.sql` saves must NOT each abort+recreate a watch (that fans
+  // out into one WorkspaceClient + one warehouses.get PER save). Instead, while
+  // a watch is running, new saves coalesce onto it — its trailing blocking
+  // regenerate re-reads the query folder, so it already covers the latest edits.
+  let watchInFlight = false;
 
   /**
    * Generate types once in the given preflight {@link PreflightMode}. Never
@@ -129,13 +134,29 @@ export function appKitTypesPlugin(options?: AppKitTypesPluginOptions): Plugin {
    * (latest-wins, including the mode: a blocking watch trigger that arrives mid
    * non-blocking foreground run still describes when its trailing run fires).
    *
-   * @param mode - preflight policy for this run. Recorded into `pendingMode`,
-   *   which the drain reads for each generate (latest trigger wins).
+   * @param mode - preflight policy for this run. Folded into `pendingMode`,
+   *   which the drain reads for each generate. The fold ESCALATES only: once a
+   *   blocking run is pending, a coalesced non-blocking trigger can't downgrade
+   *   it (see below).
    * @returns A promise that resolves when this trigger's work (including any
    *   trailing run it scheduled) has completed.
    */
   function runGenerate(mode: PreflightMode): Promise<void> {
-    pendingMode = mode;
+    // Sticky/escalate-only mode fold (NOT latest-wins): a "blocking" request
+    // outranks "non-blocking". The dev hot path fires a non-blocking foreground
+    // run on every `.sql` save AND arms a blocking background warm-up; without
+    // this, a save's non-blocking trigger landing while the blocking warm-up is
+    // queued would downgrade the trailing run back to non-blocking and the real
+    // (described) types would never land. Escalating means the trailing run
+    // still describes. Blocking → blocking; non-blocking only sticks if nothing
+    // blocking is already pending.
+    if (mode === "blocking") {
+      pendingMode = "blocking";
+    } else if (!inFlight) {
+      // Idle: a lone non-blocking trigger sets the baseline for its own drain.
+      // (When a run is in flight we leave a possibly-blocking pendingMode intact.)
+      pendingMode = "non-blocking";
+    }
 
     if (inFlight) {
       // A run is active: remember that another trigger arrived and ride out the
@@ -190,10 +211,14 @@ export function appKitTypesPlugin(options?: AppKitTypesPluginOptions): Plugin {
    *  - DELETED / DELETING → return (a deleted warehouse can't be started, and
    *    blocking typegen would treat it as fatal); leave the degraded types.
    *
-   * No-op in production or without a warehouse id. Replaces any previously-armed
-   * watch (aborting it first). Fully self-contained: it never throws into the
-   * caller and never re-arms itself. The whole lifecycle is abortable via the
-   * shared {@link watchController} — its signal is threaded into
+   * No-op in production or without a warehouse id. ONE-WAITER re-arm: if a watch
+   * is already in flight, this is a no-op — the running watch's trailing blocking
+   * regenerate re-reads the query folder and so already covers any `.sql` edits
+   * that landed while it was waiting. This is what keeps rapid saves from fanning
+   * out into one WorkspaceClient + one warehouses.get apiece; instead they
+   * coalesce onto the single in-flight watch. Fully self-contained: it never
+   * throws into the caller and never re-arms itself. The lifecycle is abortable
+   * via the shared {@link watchController} — its signal is threaded into
    * `waitUntilRunning`, so a dev-server shutdown cancels a pending wait — and the
    * regenerate routes through {@link runGenerate} so it can't race-write the
    * .d.ts with the foreground degrade or a `.sql` re-trigger.
@@ -208,10 +233,15 @@ export function appKitTypesPlugin(options?: AppKitTypesPluginOptions): Plugin {
     const warehouseId = process.env.DATABRICKS_WAREHOUSE_ID || "";
     if (!warehouseId) return;
 
-    // Supersede any in-flight watch so we never run two concurrently.
-    watchController?.abort();
+    // One-waiter: a watch is already running — let it cover this save too (its
+    // trailing blocking regenerate re-reads the folder). Recreating here would
+    // fan out a fresh client + probe per save, which is exactly the churn F2
+    // removes.
+    if (watchInFlight) return;
+
     const controller = new AbortController();
     watchController = controller;
+    watchInFlight = true;
     const { signal } = controller;
 
     void (async () => {
@@ -268,6 +298,15 @@ export function appKitTypesPlugin(options?: AppKitTypesPluginOptions): Plugin {
       } catch {
         // Detached background task: any failure (timeout, abort, connectivity,
         // auth) is non-fatal — the developer still has degraded/cached types.
+      } finally {
+        // Release the one-waiter latch so the NEXT save can arm a fresh watch.
+        // Only clear if we're still the current watch: a shutdown abort replaces
+        // nothing (no new watch arms while in flight), so an unconditional clear
+        // is safe here, but guarding on identity keeps it correct if that ever
+        // changes.
+        if (watchController === controller) {
+          watchInFlight = false;
+        }
       }
     })();
   }