diff --git a/.agents/skills/graphile-search/SKILL.md b/.agents/skills/graphile-search/SKILL.md index 005b66ec40..715e50f0e8 100644 --- a/.agents/skills/graphile-search/SKILL.md +++ b/.agents/skills/graphile-search/SKILL.md @@ -78,7 +78,7 @@ UnifiedSearchPreset({ enableSearchScore: true, // expose searchScore (0..1) on search-enabled tables enableUnifiedSearch: true, // expose unifiedSearch composite filter - // Weights for composite searchScore + // Weights for composite searchScore (used in weighted RRF contributions) searchScoreWeights: { tsv: 0.3, bm25: 0.4, @@ -86,6 +86,9 @@ UnifiedSearchPreset({ vector: 0.1, }, + // RRF smoothing constant (higher = more democratic across ranks, default 60) + rrfK: 60, + // Scalar naming fullTextScalarName: 'FullText', // GraphQL scalar name for tsvector columns tsConfig: 'english', // PostgreSQL text search configuration @@ -116,7 +119,7 @@ type Article { } ``` -Computed by normalizing all active search signals to 0..1 and averaging them. Returns `null` when no search filters are active. +Computed via **Reciprocal Rank Fusion (RRF)**: each active adapter ranks its results independently, then `searchScore = Σ(weight_i / (rrfK + rank_i)) / max_possible`. Returns `null` when no search filters are active. Always in [0, 1] with 1.0 = ranked #1 by every active adapter. ### Per-Adapter Filter Fields (on connection filters) @@ -224,9 +227,9 @@ After running `cnc codegen`, the generated SDK client exposes search filters, sc - **Chunk-aware search** — `includeChunks` toggle for RAG tables with `@hasChunks`, transparent parent + chunk distance - **Multi-strategy patterns** — fuzzy fallback, autocomplete pipeline, semantic + keyword hybrid -## Score Semantics +## Score Semantics & RRF Fusion -Each adapter declares how its scores behave for normalization in `searchScore`: +Each adapter declares how its scores behave: | Adapter | Metric | Lower is Better? | Range | |---------|--------|-------------------|-------| @@ -235,7 +238,17 @@ Each adapter declares how its scores behave for normalization in `searchScore`: | trgm | `similarity` | No (higher = better) | [0, 1] | | pgvector | `distance` | Yes (closer = better) | Unbounded | -Bounded ranges use linear normalization. Unbounded ranges use sigmoid normalization (`1 / (1 + |score|)`). +**Composite `searchScore` uses Reciprocal Rank Fusion (RRF)** — not score normalization. Each adapter's results are ranked by `ROW_NUMBER()` window functions in SQL, and the composite score is: + +``` +searchScore = Σ(weight_i / (rrfK + rank_i)) / Σ(weight_i / (rrfK + 1)) +``` + +This avoids the problem of BM25/pgvector producing unbounded scores that can't be meaningfully normalized. RRF only uses rank positions, making cross-algorithm fusion fair regardless of score scale. + +- `rrfK` (default 60) controls how much top-ranked items dominate. Lower values amplify rank-1 advantage. +- `weights` from `@searchConfig` scale each adapter's RRF contribution (weighted RRF). +- `boost_recent` / `boost_recency_decay` apply exponential decay as a post-RRF multiplier. ## Common Pitfalls diff --git a/graphile/graphile-search/src/__tests__/rrf-scoring.test.ts b/graphile/graphile-search/src/__tests__/rrf-scoring.test.ts new file mode 100644 index 0000000000..14ecaac2f3 --- /dev/null +++ b/graphile/graphile-search/src/__tests__/rrf-scoring.test.ts @@ -0,0 +1,1073 @@ +/** + * RRF (Reciprocal Rank Fusion) scoring tests. + * + * Verifies that searchScore uses rank-based fusion across adapters: + * - Single adapter scenarios (BM25 only, tsvector only, trgm only, pgvector only) + * - Multi-adapter combinations (BM25+tsvector, BM25+pgvector, all 4) + * - Chunk-aware tables (parent + chunks with BM25/pgvector) + * - Custom @searchConfig weights + * - Recency boost + RRF + * - Invariants: searchScore always [0,1], SEARCH_SCORE_DESC correct ordering + */ + +import { join } from 'path'; +import { getConnections, seed } from 'graphile-test'; +import type { GraphQLResponse } from 'graphile-test'; +import type { PgTestClient } from 'pgsql-test'; +import { ConnectionFilterPreset } from 'graphile-connection-filter'; +import { Bm25CodecPlugin } from '../codecs/bm25-codec'; +import { VectorCodecPlugin } from '../codecs/vector-codec'; +import { TsvectorCodecPlugin } from '../codecs/tsvector-codec'; +import { createUnifiedSearchPlugin } from '../plugin'; +import { createTsvectorAdapter } from '../adapters/tsvector'; +import { createBm25Adapter } from '../adapters/bm25'; +import { createTrgmAdapter } from '../adapters/trgm'; +import { createPgvectorAdapter } from '../adapters/pgvector'; +import type { GraphileConfig } from 'graphile-config'; + +// ─── Smart Tags Plugin ─────────────────────────────────────────────────────── + +function makeTestSmartTagsPlugin( + tagsByTable: Record> +): GraphileConfig.Plugin { + return { + name: 'TestSmartTagsPlugin', + version: '1.0.0', + schema: { + hooks: { + init: { + before: ['UnifiedSearchPlugin'], + callback(_, build) { + for (const codec of Object.values(build.input.pgRegistry.pgCodecs)) { + const c = codec as any; + if (!c.attributes || !c.name) continue; + const tags = tagsByTable[c.name]; + if (!tags) continue; + if (!c.extensions) c.extensions = {}; + if (!c.extensions.tags) c.extensions.tags = {}; + Object.assign(c.extensions.tags, tags); + } + return _; + }, + }, + }, + }, + }; +} + +// ─── Result types ──────────────────────────────────────────────────────────── + +interface DocumentNode { + rowId: number; + title: string; + body?: string; + tsvRank: number | null; + bodyBm25Score: number | null; + titleTrgmSimilarity: number | null; + embeddingVectorDistance: number | null; + searchScore: number | null; +} + +interface AllDocumentsResult { + allDocuments: { + nodes: DocumentNode[]; + }; +} + +interface ArticleNode { + rowId: number; + title: string; + tsvRank: number | null; + bodyBm25Score: number | null; + embeddingVectorDistance: number | null; + searchScore: number | null; +} + +interface AllArticlesResult { + allArticles: { + nodes: ArticleNode[]; + }; +} + +interface PostNode { + rowId: number; + title: string; + tsvRank: number | null; + embeddingVectorDistance: number | null; + searchScore: number | null; +} + +interface AllPostsResult { + allPosts: { + nodes: PostNode[]; + }; +} + +interface PostChunkNode { + rowId: number; + content: string; + searchScore: number | null; +} + +interface AllPostsChunksResult { + allPostsChunks: { + nodes: PostChunkNode[]; + }; +} + +type QueryFn = ( + query: string, + variables?: Record +) => Promise>; + +// ─── Test Suite: Single Adapter RRF Scenarios ──────────────────────────────── + +describe('RRF scoring — single adapter scenarios', () => { + let db: PgTestClient; + let teardown: () => Promise; + let query: QueryFn; + + beforeAll(async () => { + const unifiedPlugin = createUnifiedSearchPlugin({ + adapters: [ + createTsvectorAdapter(), + createBm25Adapter(), + createTrgmAdapter({ defaultThreshold: 0.1 }), + createPgvectorAdapter(), + ], + enableSearchScore: true, + enableUnifiedSearch: true, + rrfK: 60, + }); + + const testPreset = { + extends: [ConnectionFilterPreset()], + plugins: [ + TsvectorCodecPlugin, + Bm25CodecPlugin, + VectorCodecPlugin, + unifiedPlugin, + ], + }; + + const connections = await getConnections({ + schemas: ['unified_search_test'], + preset: testPreset, + useRoot: true, + authRole: 'postgres', + }, [ + seed.sqlfile([join(__dirname, './setup.sql')]) + ]); + + db = connections.db; + teardown = connections.teardown; + query = connections.query; + + await db.client.query('BEGIN'); + }); + + afterAll(async () => { + if (db) { + try { await db.client.query('ROLLBACK'); } catch {} + } + if (teardown) await teardown(); + }); + + beforeEach(async () => { await db.beforeEach(); }); + afterEach(async () => { await db.afterEach(); }); + + it('tsvector only — searchScore is 0..1 and correctly ranked', async () => { + const result = await query(` + query { + allDocuments(where: { + tsvTsv: "machine learning" + }) { + nodes { + rowId + title + tsvRank + searchScore + } + } + } + `); + + expect(result.errors).toBeUndefined(); + const nodes = result.data?.allDocuments?.nodes ?? []; + expect(nodes.length).toBeGreaterThan(0); + + for (const node of nodes) { + expect(typeof node.searchScore).toBe('number'); + expect(node.searchScore).toBeGreaterThanOrEqual(0); + expect(node.searchScore).toBeLessThanOrEqual(1); + } + + // The best match (highest tsvRank) should also have highest searchScore + const sorted = [...nodes].sort((a, b) => (b.searchScore ?? 0) - (a.searchScore ?? 0)); + const topResult = sorted[0]; + expect(topResult.searchScore).toBe(1); // Rank 1 with single adapter → normalized to 1.0 + }); + + it('BM25 only — searchScore is 0..1 and best match gets score 1.0', async () => { + const result = await query(` + query { + allDocuments(where: { + bm25Body: { query: "machine learning intelligence" } + }) { + nodes { + rowId + title + bodyBm25Score + searchScore + } + } + } + `); + + expect(result.errors).toBeUndefined(); + const nodes = result.data?.allDocuments?.nodes ?? []; + expect(nodes.length).toBeGreaterThan(0); + + for (const node of nodes) { + expect(typeof node.searchScore).toBe('number'); + expect(node.searchScore).toBeGreaterThanOrEqual(0); + expect(node.searchScore).toBeLessThanOrEqual(1); + } + + // Rank 1 document should get score 1.0 (single adapter, rank 1 = max) + const sorted = [...nodes].sort((a, b) => (b.searchScore ?? 0) - (a.searchScore ?? 0)); + expect(sorted[0].searchScore).toBe(1); + }); + + it('trgm only — searchScore is 0..1', async () => { + const result = await query(` + query { + allDocuments(where: { + trgmTitle: { value: "Machine Learn", threshold: 0.1 } + }) { + nodes { + rowId + title + titleTrgmSimilarity + searchScore + } + } + } + `); + + expect(result.errors).toBeUndefined(); + const nodes = result.data?.allDocuments?.nodes ?? []; + expect(nodes.length).toBeGreaterThan(0); + + for (const node of nodes) { + expect(typeof node.searchScore).toBe('number'); + expect(node.searchScore).toBeGreaterThanOrEqual(0); + expect(node.searchScore).toBeLessThanOrEqual(1); + } + }); + + it('pgvector only — searchScore is 0..1', async () => { + const result = await query(` + query { + allDocuments(where: { + vectorEmbedding: { vector: [1, 0, 0], metric: COSINE } + }) { + nodes { + rowId + title + embeddingVectorDistance + searchScore + } + } + } + `); + + expect(result.errors).toBeUndefined(); + const nodes = result.data?.allDocuments?.nodes ?? []; + expect(nodes.length).toBeGreaterThan(0); + + for (const node of nodes) { + expect(typeof node.searchScore).toBe('number'); + expect(node.searchScore).toBeGreaterThanOrEqual(0); + expect(node.searchScore).toBeLessThanOrEqual(1); + } + }); + + it('searchScore is null when no search filters active', async () => { + const result = await query(` + query { + allDocuments(first: 2) { + nodes { + rowId + searchScore + } + } + } + `); + + expect(result.errors).toBeUndefined(); + const nodes = result.data?.allDocuments?.nodes ?? []; + for (const node of nodes) { + expect(node.searchScore).toBeNull(); + } + }); +}); + +// ─── Test Suite: Multi-Adapter RRF Combinations ────────────────────────────── + +describe('RRF scoring — multi-adapter combinations', () => { + let db: PgTestClient; + let teardown: () => Promise; + let query: QueryFn; + + beforeAll(async () => { + const unifiedPlugin = createUnifiedSearchPlugin({ + adapters: [ + createTsvectorAdapter(), + createBm25Adapter(), + createTrgmAdapter({ defaultThreshold: 0.1 }), + createPgvectorAdapter(), + ], + enableSearchScore: true, + enableUnifiedSearch: true, + rrfK: 60, + }); + + const testPreset = { + extends: [ConnectionFilterPreset()], + plugins: [ + TsvectorCodecPlugin, + Bm25CodecPlugin, + VectorCodecPlugin, + unifiedPlugin, + ], + }; + + const connections = await getConnections({ + schemas: ['unified_search_test'], + preset: testPreset, + useRoot: true, + authRole: 'postgres', + }, [ + seed.sqlfile([join(__dirname, './setup.sql')]) + ]); + + db = connections.db; + teardown = connections.teardown; + query = connections.query; + + await db.client.query('BEGIN'); + }); + + afterAll(async () => { + if (db) { + try { await db.client.query('ROLLBACK'); } catch {} + } + if (teardown) await teardown(); + }); + + beforeEach(async () => { await db.beforeEach(); }); + afterEach(async () => { await db.afterEach(); }); + + it('BM25 + tsvector (via unifiedSearch) — searchScore combines ranks from both', async () => { + const result = await query(` + query { + allDocuments(where: { + unifiedSearch: "machine learning" + }) { + nodes { + rowId + title + tsvRank + bodyBm25Score + titleTrgmSimilarity + searchScore + } + } + } + `); + + expect(result.errors).toBeUndefined(); + const nodes = result.data?.allDocuments?.nodes ?? []; + expect(nodes.length).toBeGreaterThan(0); + + for (const node of nodes) { + expect(typeof node.searchScore).toBe('number'); + expect(node.searchScore).toBeGreaterThanOrEqual(0); + expect(node.searchScore).toBeLessThanOrEqual(1); + } + + // Document ranked #1 by multiple adapters should score higher than one + // ranked lower by all adapters + const sortedByScore = [...nodes].sort( + (a, b) => (b.searchScore ?? 0) - (a.searchScore ?? 0) + ); + // The top scoring doc should be one with "machine learning" in title AND body + expect(sortedByScore[0].title.toLowerCase()).toContain('machine learning'); + }); + + it('BM25 + pgvector (separate filters) — RRF fuses ranks from both', async () => { + const result = await query(` + query { + allDocuments(where: { + bm25Body: { query: "machine learning" } + vectorEmbedding: { vector: [1, 0, 0], metric: COSINE } + }) { + nodes { + rowId + title + bodyBm25Score + embeddingVectorDistance + searchScore + } + } + } + `); + + expect(result.errors).toBeUndefined(); + const nodes = result.data?.allDocuments?.nodes ?? []; + expect(nodes.length).toBeGreaterThan(0); + + for (const node of nodes) { + expect(typeof node.searchScore).toBe('number'); + expect(node.searchScore).toBeGreaterThanOrEqual(0); + expect(node.searchScore).toBeLessThanOrEqual(1); + // Both scores should be populated when both filters are active + expect(node.bodyBm25Score).not.toBeNull(); + expect(node.embeddingVectorDistance).not.toBeNull(); + } + }); + + it('all 4 adapters — unifiedSearch + pgvector combines all ranks', async () => { + const result = await query(` + query { + allDocuments(where: { + unifiedSearch: "machine learning" + vectorEmbedding: { vector: [1, 0, 0], metric: COSINE } + }) { + nodes { + rowId + title + tsvRank + bodyBm25Score + titleTrgmSimilarity + embeddingVectorDistance + searchScore + } + } + } + `); + + expect(result.errors).toBeUndefined(); + const nodes = result.data?.allDocuments?.nodes ?? []; + expect(nodes.length).toBeGreaterThan(0); + + for (const node of nodes) { + expect(typeof node.searchScore).toBe('number'); + expect(node.searchScore).toBeGreaterThanOrEqual(0); + expect(node.searchScore).toBeLessThanOrEqual(1); + } + + // Document 1 ("Introduction to Machine Learning") should rank highest: + // - Matches "machine learning" in title/body (BM25, tsv, trgm) + // - Has embedding [1,0,0] (exact match for vector filter) + const doc1 = nodes.find((n) => n.rowId === 1); + if (doc1) { + expect(doc1.searchScore).toBeGreaterThan(0.5); + } + }); + + it('SEARCH_SCORE_DESC ordering returns results in roughly descending score order', async () => { + const result = await query(` + query { + allDocuments( + where: { unifiedSearch: "machine learning" } + orderBy: [SEARCH_SCORE_DESC] + ) { + nodes { + rowId + title + searchScore + } + } + } + `); + + expect(result.errors).toBeUndefined(); + const nodes = result.data?.allDocuments?.nodes ?? []; + expect(nodes.length).toBeGreaterThan(1); + + // Verify all scores are valid + for (const node of nodes) { + expect(typeof node.searchScore).toBe('number'); + expect(node.searchScore).toBeGreaterThanOrEqual(0); + expect(node.searchScore).toBeLessThanOrEqual(1); + } + + // The first result should have the highest (or near-highest) score + const maxScore = Math.max(...nodes.map((n) => n.searchScore ?? 0)); + // Allow small floating point tolerance for ordering + expect(nodes[0].searchScore).toBeGreaterThanOrEqual(maxScore - 0.05); + }); + + it('document ranked #1 by multiple adapters scores higher than one ranked lower by all', async () => { + const result = await query(` + query { + allDocuments(where: { + unifiedSearch: "machine learning" + }) { + nodes { + rowId + title + tsvRank + bodyBm25Score + searchScore + } + } + } + `); + + expect(result.errors).toBeUndefined(); + const nodes = result.data?.allDocuments?.nodes ?? []; + expect(nodes.length).toBeGreaterThan(1); + + // Doc 1 should be #1 for BM25 and tsvector (most relevant to "machine learning") + // and should score higher than docs that only partially match + const doc1 = nodes.find((n) => n.rowId === 1); + const otherDocs = nodes.filter((n) => n.rowId !== 1); + + if (doc1 && otherDocs.length > 0) { + const maxOtherScore = Math.max(...otherDocs.map((d) => d.searchScore ?? 0)); + expect(doc1.searchScore).toBeGreaterThanOrEqual(maxOtherScore); + } + }); +}); + +// ─── Test Suite: Chunk-Aware Tables ────────────────────────────────────────── + +describe('RRF scoring — chunk-aware tables', () => { + let db: PgTestClient; + let teardown: () => Promise; + let query: QueryFn; + + beforeAll(async () => { + const unifiedPlugin = createUnifiedSearchPlugin({ + adapters: [ + createTsvectorAdapter(), + createBm25Adapter(), + createTrgmAdapter({ defaultThreshold: 0.1 }), + createPgvectorAdapter(), + ], + enableSearchScore: true, + enableUnifiedSearch: true, + rrfK: 60, + }); + + const testPreset = { + extends: [ConnectionFilterPreset()], + plugins: [ + TsvectorCodecPlugin, + Bm25CodecPlugin, + VectorCodecPlugin, + unifiedPlugin, + ], + }; + + const connections = await getConnections({ + schemas: ['unified_search_test'], + preset: testPreset, + useRoot: true, + authRole: 'postgres', + }, [ + seed.sqlfile([join(__dirname, './setup.sql')]) + ]); + + db = connections.db; + teardown = connections.teardown; + query = connections.query; + + await db.client.query('BEGIN'); + }); + + afterAll(async () => { + if (db) { + try { await db.client.query('ROLLBACK'); } catch {} + } + if (teardown) await teardown(); + }); + + beforeEach(async () => { await db.beforeEach(); }); + afterEach(async () => { await db.afterEach(); }); + + it('parent posts table — pgvector search produces valid RRF scores', async () => { + const result = await query(` + query { + allPosts(where: { + vectorEmbedding: { vector: [0.5, 0.5, 0], metric: COSINE } + }) { + nodes { + rowId + title + embeddingVectorDistance + searchScore + } + } + } + `); + + expect(result.errors).toBeUndefined(); + const nodes = result.data?.allPosts?.nodes ?? []; + expect(nodes.length).toBeGreaterThan(0); + + for (const node of nodes) { + expect(typeof node.searchScore).toBe('number'); + expect(node.searchScore).toBeGreaterThanOrEqual(0); + expect(node.searchScore).toBeLessThanOrEqual(1); + } + }); + + it('chunks table — BM25 search on chunk content produces valid RRF scores', async () => { + const result = await query(` + query { + allPostsChunks(where: { + bm25Content: { query: "quantum computing" } + }) { + nodes { + rowId + content + searchScore + } + } + } + `); + + expect(result.errors).toBeUndefined(); + const nodes = result.data?.allPostsChunks?.nodes ?? []; + expect(nodes.length).toBeGreaterThan(0); + + for (const node of nodes) { + expect(typeof node.searchScore).toBe('number'); + expect(node.searchScore).toBeGreaterThanOrEqual(0); + expect(node.searchScore).toBeLessThanOrEqual(1); + } + }); + + it('chunks table — pgvector search on chunk embeddings produces valid RRF scores', async () => { + const result = await query(` + query { + allPostsChunks(where: { + vectorEmbedding: { vector: [0.95, 0.05, 0], metric: COSINE } + }) { + nodes { + rowId + content + searchScore + } + } + } + `); + + expect(result.errors).toBeUndefined(); + const nodes = result.data?.allPostsChunks?.nodes ?? []; + expect(nodes.length).toBeGreaterThan(0); + + for (const node of nodes) { + expect(typeof node.searchScore).toBe('number'); + expect(node.searchScore).toBeGreaterThanOrEqual(0); + expect(node.searchScore).toBeLessThanOrEqual(1); + } + }); + + it('chunks table — BM25 + pgvector combined on chunks', async () => { + const result = await query(` + query { + allPostsChunks(where: { + bm25Content: { query: "quantum computing" } + vectorEmbedding: { vector: [0.95, 0.05, 0], metric: COSINE } + }) { + nodes { + rowId + content + searchScore + } + } + } + `); + + expect(result.errors).toBeUndefined(); + const nodes = result.data?.allPostsChunks?.nodes ?? []; + // May be empty if no chunk matches both — that's valid (AND semantics) + for (const node of nodes) { + expect(typeof node.searchScore).toBe('number'); + expect(node.searchScore).toBeGreaterThanOrEqual(0); + expect(node.searchScore).toBeLessThanOrEqual(1); + } + }); +}); + +// ─── Test Suite: @searchConfig Weights + RRF ───────────────────────────────── + +describe('RRF scoring — custom @searchConfig weights', () => { + let db: PgTestClient; + let teardown: () => Promise; + let query: QueryFn; + + beforeAll(async () => { + const unifiedPlugin = createUnifiedSearchPlugin({ + adapters: [ + createTsvectorAdapter(), + createBm25Adapter(), + createTrgmAdapter({ defaultThreshold: 0.1 }), + createPgvectorAdapter(), + ], + enableSearchScore: true, + enableUnifiedSearch: true, + rrfK: 60, + }); + + // Inject @searchConfig on articles with custom weights + const smartTagsPlugin = makeTestSmartTagsPlugin({ + articles: { + searchConfig: { + weights: { tsv: 3.0, bm25: 1.0, vector: 0.5 }, + }, + }, + }); + + const testPreset = { + extends: [ConnectionFilterPreset()], + plugins: [ + TsvectorCodecPlugin, + Bm25CodecPlugin, + VectorCodecPlugin, + smartTagsPlugin, + unifiedPlugin, + ], + }; + + const connections = await getConnections({ + schemas: ['unified_search_test'], + preset: testPreset, + useRoot: true, + authRole: 'postgres', + }, [ + seed.sqlfile([join(__dirname, './setup.sql')]) + ]); + + db = connections.db; + teardown = connections.teardown; + query = connections.query; + + await db.client.query('BEGIN'); + }); + + afterAll(async () => { + if (db) { + try { await db.client.query('ROLLBACK'); } catch {} + } + if (teardown) await teardown(); + }); + + beforeEach(async () => { await db.beforeEach(); }); + afterEach(async () => { await db.afterEach(); }); + + it('custom weights influence RRF contribution — higher weighted adapter has more impact', async () => { + const result = await query(` + query { + allArticles(where: { + tsvTsv: "database" + }) { + nodes { + rowId + title + tsvRank + searchScore + } + } + } + `); + + expect(result.errors).toBeUndefined(); + const nodes = result.data?.allArticles?.nodes ?? []; + expect(nodes.length).toBeGreaterThan(0); + + // searchScore should be valid 0..1 + for (const node of nodes) { + expect(typeof node.searchScore).toBe('number'); + expect(node.searchScore).toBeGreaterThanOrEqual(0); + expect(node.searchScore).toBeLessThanOrEqual(1); + } + }); + + it('weighted RRF still produces score 1.0 for rank-1 document (single adapter active)', async () => { + const result = await query(` + query { + allArticles(where: { + tsvTsv: "postgresql" + }) { + nodes { + rowId + title + tsvRank + searchScore + } + } + } + `); + + expect(result.errors).toBeUndefined(); + const nodes = result.data?.allArticles?.nodes ?? []; + expect(nodes.length).toBeGreaterThan(0); + + // Rank 1 doc gets max RRF contribution; with single adapter active + // searchScore = weight/(k+1) / weight/(k+1) = 1.0 + const sorted = [...nodes].sort((a, b) => (b.searchScore ?? 0) - (a.searchScore ?? 0)); + expect(sorted[0].searchScore).toBe(1); + }); + + +}); + +// ─── Test Suite: Recency Boost + RRF ───────────────────────────────────────── + +describe('RRF scoring — recency boost', () => { + let db: PgTestClient; + let teardown: () => Promise; + let query: QueryFn; + + beforeAll(async () => { + const unifiedPlugin = createUnifiedSearchPlugin({ + adapters: [ + createTsvectorAdapter(), + createBm25Adapter(), + createTrgmAdapter({ defaultThreshold: 0.1 }), + createPgvectorAdapter(), + ], + enableSearchScore: true, + enableUnifiedSearch: true, + rrfK: 60, + }); + + // articles table with recency boost enabled + const smartTagsPlugin = makeTestSmartTagsPlugin({ + articles: { + searchConfig: { + boost_recent: true, + boost_recency_field: 'updated_at', + boost_recency_decay: 0.99, + }, + }, + }); + + const testPreset = { + extends: [ConnectionFilterPreset()], + plugins: [ + TsvectorCodecPlugin, + Bm25CodecPlugin, + VectorCodecPlugin, + smartTagsPlugin, + unifiedPlugin, + ], + }; + + const connections = await getConnections({ + schemas: ['unified_search_test'], + preset: testPreset, + useRoot: true, + authRole: 'postgres', + }, [ + seed.sqlfile([join(__dirname, './setup.sql')]) + ]); + + db = connections.db; + teardown = connections.teardown; + query = connections.query; + + await db.client.query('BEGIN'); + }); + + afterAll(async () => { + if (db) { + try { await db.client.query('ROLLBACK'); } catch {} + } + if (teardown) await teardown(); + }); + + beforeEach(async () => { await db.beforeEach(); }); + afterEach(async () => { await db.afterEach(); }); + + it('recency boost applied to RRF scores — newer documents get higher scores', async () => { + const result = await query(` + query { + allArticles(where: { + unifiedSearch: "database" + }) { + nodes { + rowId + title + tsvRank + searchScore + } + } + } + `); + + expect(result.errors).toBeUndefined(); + const nodes = result.data?.allArticles?.nodes ?? []; + expect(nodes.length).toBeGreaterThan(0); + + // All scores should still be valid 0..1 with recency boost + for (const node of nodes) { + expect(typeof node.searchScore).toBe('number'); + expect(node.searchScore).toBeGreaterThanOrEqual(0); + expect(node.searchScore).toBeLessThanOrEqual(1); + } + }); + + it('recency boost reduces older document scores relative to newer ones', async () => { + // Article 1: 2025-01-01 (oldest) + // Article 3: 2026-01-01 (newest) + // Both match "database" via tsvector + const result = await query(` + query { + allArticles(where: { + tsvTsv: "database" + }) { + nodes { + rowId + title + tsvRank + searchScore + } + } + } + `); + + expect(result.errors).toBeUndefined(); + const nodes = result.data?.allArticles?.nodes ?? []; + + const article1 = nodes.find((n) => n.rowId === 1); + const article3 = nodes.find((n) => n.rowId === 3); + + // If both match, the newer one (article 3) should have a higher score + // due to recency boost (decay applied to older articles) + if (article1 && article3) { + // Article 3 is newer so decay has less effect → higher score + expect(article3.searchScore).toBeGreaterThan(article1.searchScore!); + } + }); +}); + +// ─── Test Suite: Custom rrfK parameter ─────────────────────────────────────── + +describe('RRF scoring — custom rrfK parameter', () => { + let db: PgTestClient; + let teardown: () => Promise; + let query: QueryFn; + + beforeAll(async () => { + // Use a small rrfK to make rank differences more pronounced + const unifiedPlugin = createUnifiedSearchPlugin({ + adapters: [ + createTsvectorAdapter(), + createBm25Adapter(), + createTrgmAdapter({ defaultThreshold: 0.1 }), + createPgvectorAdapter(), + ], + enableSearchScore: true, + enableUnifiedSearch: true, + rrfK: 10, // Small k makes top-ranked items dominate more + }); + + const testPreset = { + extends: [ConnectionFilterPreset()], + plugins: [ + TsvectorCodecPlugin, + Bm25CodecPlugin, + VectorCodecPlugin, + unifiedPlugin, + ], + }; + + const connections = await getConnections({ + schemas: ['unified_search_test'], + preset: testPreset, + useRoot: true, + authRole: 'postgres', + }, [ + seed.sqlfile([join(__dirname, './setup.sql')]) + ]); + + db = connections.db; + teardown = connections.teardown; + query = connections.query; + + await db.client.query('BEGIN'); + }); + + afterAll(async () => { + if (db) { + try { await db.client.query('ROLLBACK'); } catch {} + } + if (teardown) await teardown(); + }); + + beforeEach(async () => { await db.beforeEach(); }); + afterEach(async () => { await db.afterEach(); }); + + it('small rrfK (10) still produces valid 0..1 scores', async () => { + const result = await query(` + query { + allDocuments(where: { + unifiedSearch: "machine learning" + }) { + nodes { + rowId + title + searchScore + } + } + } + `); + + expect(result.errors).toBeUndefined(); + const nodes = result.data?.allDocuments?.nodes ?? []; + expect(nodes.length).toBeGreaterThan(0); + + for (const node of nodes) { + expect(typeof node.searchScore).toBe('number'); + expect(node.searchScore).toBeGreaterThanOrEqual(0); + expect(node.searchScore).toBeLessThanOrEqual(1); + } + }); + + it('with small rrfK, score difference between rank 1 and rank 2 is larger', async () => { + const result = await query(` + query { + allDocuments(where: { + tsvTsv: "learning" + }) { + nodes { + rowId + title + searchScore + } + } + } + `); + + expect(result.errors).toBeUndefined(); + const nodes = result.data?.allDocuments?.nodes ?? []; + expect(nodes.length).toBeGreaterThan(1); + + const sorted = [...nodes].sort((a, b) => (b.searchScore ?? 0) - (a.searchScore ?? 0)); + const firstScore = sorted[0].searchScore!; + const secondScore = sorted[1].searchScore!; + + // With k=10: rank1 → 1/(10+1) = 0.0909, rank2 → 1/(10+2) = 0.0833 + // Normalized: rank1 → 1.0, rank2 → 0.0833/0.0909 ≈ 0.917 + // The gap should be noticeable + expect(firstScore).toBeGreaterThan(secondScore); + expect(firstScore - secondScore).toBeGreaterThan(0.05); + }); +}); diff --git a/graphile/graphile-search/src/__tests__/search-config-integration.test.ts b/graphile/graphile-search/src/__tests__/search-config-integration.test.ts index bb12ca9e7d..d63a5dc8ab 100644 --- a/graphile/graphile-search/src/__tests__/search-config-integration.test.ts +++ b/graphile/graphile-search/src/__tests__/search-config-integration.test.ts @@ -274,93 +274,7 @@ describe('@searchConfig integration tests', () => { }); }); -// ─── Test Suite: @searchConfig with sigmoid normalization ──────────────────── -describe('@searchConfig with sigmoid normalization', () => { - let teardown: () => Promise; - let query: QueryFn; - - beforeAll(async () => { - const unifiedPlugin = createUnifiedSearchPlugin({ - adapters: [ - createTsvectorAdapter(), - createBm25Adapter(), - createTrgmAdapter({ defaultThreshold: 0.1 }), - createPgvectorAdapter(), - ], - enableSearchScore: true, - enableUnifiedSearch: true, - }); - - // Inject @searchConfig with sigmoid normalization - const smartTagsPlugin = makeTestSmartTagsPlugin({ - articles: { - searchConfig: { - normalization: 'sigmoid', - }, - }, - }); - - const testPreset = { - extends: [ConnectionFilterPreset()], - plugins: [ - TsvectorCodecPlugin, - Bm25CodecPlugin, - VectorCodecPlugin, - smartTagsPlugin, - unifiedPlugin, - ], - }; - - const connections = await getConnections( - { - schemas: ['unified_search_test'], - preset: testPreset, - useRoot: true, - authRole: 'postgres', - }, - [seed.sqlfile([join(__dirname, './setup.sql')])] - ); - - teardown = connections.teardown; - query = connections.query; - }); - - afterAll(async () => { - if (teardown) { - await teardown(); - } - }); - - it('produces valid scores with sigmoid normalization forced', async () => { - const result = await query(` - query { - allArticles(where: { - tsvTsv: "database" - }) { - nodes { - rowId - title - tsvRank - searchScore - } - } - } - `); - - expect(result.errors).toBeUndefined(); - const nodes = result.data?.allArticles?.nodes; - expect(nodes).toBeDefined(); - expect(nodes!.length).toBeGreaterThan(0); - - for (const node of nodes!) { - expect(typeof node.searchScore).toBe('number'); - // Sigmoid normalization always produces values in (0, 1) - expect(node.searchScore).toBeGreaterThan(0); - expect(node.searchScore).toBeLessThan(1); - } - }); -}); // ─── Test Suite: @hasChunks chunk-aware querying ───────────────────────────── diff --git a/graphile/graphile-search/src/plugin.ts b/graphile/graphile-search/src/plugin.ts index a35f6a2321..a54757d266 100644 --- a/graphile/graphile-search/src/plugin.ts +++ b/graphile/graphile-search/src/plugin.ts @@ -72,7 +72,6 @@ interface SearchScoreDetails { */ interface SearchConfig { weights?: Record; - normalization?: 'linear' | 'sigmoid'; boost_recent?: boolean; boost_recency_field?: string; boost_recency_decay?: number; @@ -100,34 +99,25 @@ function getSearchConfig(codec: PgCodecWithAttributes): SearchConfig | undefined } /** - * Normalize a raw score to 0..1 using the specified strategy. - * - * When strategy is 'sigmoid', sigmoid normalization is used for ALL adapters - * (both bounded and unbounded). When strategy is 'linear' (default), - * known-range adapters use linear normalization and unbounded adapters - * use sigmoid normalization as fallback. + * Map a raw score to 0..1 for effective rank estimation in the RRF fallback path. + * Used only when a rank window function is not available for an adapter. */ function normalizeScore( score: number, lowerIsBetter: boolean, range: [number, number] | null, - strategy: 'linear' | 'sigmoid' = 'linear', ): number { let normalized: number; - if (range && strategy === 'linear') { - // Known range + linear strategy: linear normalization + if (range) { const [min, max] = range; normalized = lowerIsBetter ? 1 - (score - min) / (max - min) : (score - min) / (max - min); } else { - // Unbounded range, or explicit sigmoid strategy: sigmoid normalization if (lowerIsBetter) { - // BM25: negative scores, more negative = better normalized = 1 / (1 + Math.abs(score)); } else { - // Higher-is-better: map via sigmoid normalized = score / (1 + score); } } @@ -177,7 +167,7 @@ interface AdapterColumnCache { export function createUnifiedSearchPlugin( options: UnifiedSearchOptions ): GraphileConfig.Plugin { - const { adapters, enableSearchScore = true, enableUnifiedSearch = true } = options; + const { adapters, enableSearchScore = true, enableUnifiedSearch = true, rrfK = 60 } = options; // Per-codec cache of discovered columns, keyed by codec name const codecCache = new Map(); @@ -462,7 +452,7 @@ export function createUnifiedSearchPlugin( } } - // ── Composite searchScore field ── + // ── Composite searchScore field (RRF — Reciprocal Rank Fusion) ── if (enableSearchScore && adapterColumns.length > 0) { // Collect all meta keys for all adapters/columns so the // composite field can read them at execution time @@ -494,13 +484,12 @@ export function createUnifiedSearchPlugin( // Resolve effective weights: per-table > global > equal (undefined) const effectiveWeights = tableSearchConfig?.weights ?? options.searchScoreWeights; - // Resolve normalization strategy: per-table > default 'linear' - const normalizationStrategy = tableSearchConfig?.normalization ?? 'linear'; // Recency boost config from per-table smart tag let boostRecent = tableSearchConfig?.boost_recent ?? false; const boostRecencyField = tableSearchConfig?.boost_recency_field ?? 'updated_at'; const boostRecencyDecay = tableSearchConfig?.boost_recency_decay ?? 0.95; + // Phase I: Validate that the recency field actually exists on the table. // If it doesn't, disable recency boost gracefully instead of crashing at query time. if (boostRecent && boostRecencyField && !codec.attributes[boostRecencyField]) { @@ -522,7 +511,7 @@ export function createUnifiedSearchPlugin( () => ({ description: 'Composite search relevance score (0..1, higher = more relevant). ' + - 'Computed by normalizing and averaging all active search signals. ' + + 'Computed using Reciprocal Rank Fusion (RRF) across all active search signals. ' + 'Supports per-table weight customization via @searchConfig smart tag. ' + 'Returns null when no search filters are active.', type: GraphQLFloat, @@ -551,56 +540,111 @@ export function createUnifiedSearchPlugin( // Capture the index in a local const for the lambda closure const capturedRecencyIndex = recencySelectIndex; + // For RRF we also need rank expressions. Inject ROW_NUMBER() + // window functions for each adapter score into the SELECT. + // These will be populated at filter-apply time via meta. + // We store a meta key suffix "__rank" alongside the score. + const rankMetaKeys = allMetaKeys.map( + (mk) => `${mk.metaKey}__rank` + ); + const $rankMetaSteps = rankMetaKeys.map( + (key) => $select.getMeta(key) + ); + return lambda( - [...$metaSteps, $row], + [...$metaSteps, ...$rankMetaSteps, $row], (args: readonly any[]) => { const row = args[args.length - 1]; if (row == null) return null; - let weightedSum = 0; - let totalWeight = 0; + const numAdapters = allMetaKeys.length; + let rrfSum = 0; + let maxPossibleRrf = 0; + let hasAnyScore = false; // Read recency value from the injected SELECT column const recencyValue = (boostRecent && capturedRecencyIndex != null) ? row[capturedRecencyIndex] : null; - for (let i = 0; i < allMetaKeys.length; i++) { - const details = args[i] as SearchScoreDetails | null; - if (details == null || details.selectIndex == null) continue; - - const rawValue = row[details.selectIndex]; - if (rawValue == null) continue; - - const score = TYPES.float.fromPg(rawValue as string); - if (typeof score !== 'number' || isNaN(score)) continue; + for (let i = 0; i < numAdapters; i++) { + const scoreDetails = args[i] as SearchScoreDetails | null; + const rankDetails = args[numAdapters + i] as SearchScoreDetails | null; const mk = allMetaKeys[i]; const weight = effectiveWeights?.[mk.adapterName] ?? 1; - // Normalize using the resolved strategy - let normalized = normalizeScore( - score, - mk.lowerIsBetter, - mk.range, - normalizationStrategy, - ); - - // Apply recency boost if configured - if (boostRecent && recencyValue != null) { - normalized = applyRecencyBoost( - normalized, - recencyValue, - boostRecencyDecay, - ); + // Determine if this adapter is active (has meta set by a filter) + const adapterHasMeta = (rankDetails != null && rankDetails.selectIndex != null) + || (scoreDetails != null && scoreDetails.selectIndex != null); + + if (!adapterHasMeta) continue; + + // Only include active adapters in normalization denominator + maxPossibleRrf += weight / (rrfK + 1); + + // Try to use rank-based RRF (preferred) + if (rankDetails != null && rankDetails.selectIndex != null) { + const rawRank = row[rankDetails.selectIndex]; + if (rawRank != null) { + const rank = TYPES.float.fromPg(rawRank as string); + if (typeof rank === 'number' && !isNaN(rank) && rank > 0) { + hasAnyScore = true; + let contribution = weight / (rrfK + rank); + + // Apply recency boost if configured + if (boostRecent && recencyValue != null) { + contribution = applyRecencyBoost( + contribution, + recencyValue, + boostRecencyDecay, + ); + } + + rrfSum += contribution; + continue; + } + } } - weightedSum += normalized * weight; - totalWeight += weight; + // Fallback: if rank is not available but score exists, + // use score-based rank estimation. + if (scoreDetails != null && scoreDetails.selectIndex != null) { + const rawValue = row[scoreDetails.selectIndex]; + if (rawValue != null) { + const score = TYPES.float.fromPg(rawValue as string); + if (typeof score === 'number' && !isNaN(score)) { + hasAnyScore = true; + const normalizedScore = normalizeScore( + score, + mk.lowerIsBetter, + mk.range, + ); + // Map normalized score to an effective rank: + // score=1.0 → rank=1, score=0.5 → rank=rrfK, score→0 → rank=very high + const effectiveRank = Math.max(1, Math.round( + 1 + (1 - normalizedScore) * (rrfK * 2) + )); + let contribution = weight / (rrfK + effectiveRank); + + if (boostRecent && recencyValue != null) { + contribution = applyRecencyBoost( + contribution, + recencyValue, + boostRecencyDecay, + ); + } + + rrfSum += contribution; + } + } + } } - if (totalWeight === 0) return null; - return weightedSum / totalWeight; + if (!hasAnyScore || maxPossibleRrf === 0) return null; + + // Normalize to 0..1 by dividing by max possible RRF score + return Math.min(1, rrfSum / maxPossibleRrf); } ); }, @@ -835,6 +879,15 @@ export function createUnifiedSearchPlugin( selectIndex: scoreIndex, } as SearchScoreDetails); + // Add rank (ROW_NUMBER window function) for RRF scoring + const rankMetaKey = `${scoreMetaKey}__rank`; + const orderDirection = adapter.scoreSemantics.lowerIsBetter ? 'ASC' : 'DESC'; + const rankSql = sql`(ROW_NUMBER() OVER (ORDER BY ${sql.parens(result.scoreExpression)} ${orderDirection === 'ASC' ? sql.fragment`ASC` : sql.fragment`DESC`} NULLS LAST))::text`; + const rankIndex = qb.selectAndReturnIndex(rankSql); + qb.setMeta(rankMetaKey, { + selectIndex: rankIndex, + } as SearchScoreDetails); + // ORDER BY: read the direction stored by the orderBy // enum (which ran first) via the shared alias key. const orderKey = `unified_order_${adapter.name}_${baseFieldName}`; @@ -927,6 +980,15 @@ export function createUnifiedSearchPlugin( selectIndex: scoreIndex, } as SearchScoreDetails); + // Add rank (ROW_NUMBER window function) for RRF scoring + const rankMetaKey = `${scoreMetaKey}__rank`; + const orderDirection = adapter.scoreSemantics.lowerIsBetter ? 'ASC' : 'DESC'; + const rankSql = sql`(ROW_NUMBER() OVER (ORDER BY ${sql.parens(result.scoreExpression)} ${orderDirection === 'ASC' ? sql.fragment`ASC` : sql.fragment`DESC`} NULLS LAST))::text`; + const rankIndex = qb.selectAndReturnIndex(rankSql); + qb.setMeta(rankMetaKey, { + selectIndex: rankIndex, + } as SearchScoreDetails); + // ORDER BY: read the direction stored by the orderBy // enum (which ran first) via the shared alias key. const orderKey = `unified_order_${adapter.name}_${baseFieldName}`; diff --git a/graphile/graphile-search/src/preset.ts b/graphile/graphile-search/src/preset.ts index 9a51894853..5ad42bec48 100644 --- a/graphile/graphile-search/src/preset.ts +++ b/graphile/graphile-search/src/preset.ts @@ -91,6 +91,13 @@ export interface UnifiedSearchPresetOptions { * @default 'english' */ tsConfig?: string; + + /** + * RRF (Reciprocal Rank Fusion) smoothing constant for composite searchScore. + * Higher values make scoring more democratic across rank positions. + * @default 60 + */ + rrfK?: number; } /** @@ -109,6 +116,7 @@ export function UnifiedSearchPreset( searchScoreWeights, fullTextScalarName = 'FullText', tsConfig = 'english', + rrfK, } = options; const adapters = []; @@ -138,6 +146,7 @@ export function UnifiedSearchPreset( enableSearchScore, enableUnifiedSearch, searchScoreWeights, + rrfK, }; // Collect codec plugins based on which adapters are enabled diff --git a/graphile/graphile-search/src/types.ts b/graphile/graphile-search/src/types.ts index f99f1377fe..87407ed0af 100644 --- a/graphile/graphile-search/src/types.ts +++ b/graphile/graphile-search/src/types.ts @@ -226,4 +226,15 @@ export interface UnifiedSearchOptions { * @example { bm25: 0.5, trgm: 0.3, tsv: 0.2 } */ searchScoreWeights?: Record; + + /** + * RRF (Reciprocal Rank Fusion) smoothing constant. Controls how much + * top-ranked items dominate the composite score. Higher values make the + * scoring more democratic across rank positions. + * + * The RRF contribution of each adapter is: weight / (rrfK + rank) + * + * @default 60 + */ + rrfK?: number; } diff --git a/graphile/graphile-settings/__tests__/preset-integration.test.ts b/graphile/graphile-settings/__tests__/preset-integration.test.ts index 91f8064bd0..99b0be867e 100644 --- a/graphile/graphile-settings/__tests__/preset-integration.test.ts +++ b/graphile/graphile-settings/__tests__/preset-integration.test.ts @@ -878,7 +878,7 @@ describe('Kitchen sink (multi-plugin queries)', () => { * * generates: * - * ORDER BY paradedb.score(id) ASC, + * ORDER BY (body <@> to_bm25query('park green', '"schema"."idx"')) ASC, * similarity(name, 'park') DESC * * Each scoring plugin (tsvector, BM25, pg_trgm) registers its own enum @@ -947,7 +947,7 @@ describe('Kitchen sink (multi-plugin queries)', () => { tsvTsv: "park" # 2. BM25 relevance search (Bm25SearchPlugin via pg_textsearch) - # WHERE body @@@ paradedb.parse('park green') + # WHERE (body <@> to_bm25query('park green', '"schema"."idx"')) < threshold # (BM25 filter apply runs first in the schema → primary ORDER BY) bm25Body: { query: "park green" } @@ -978,7 +978,7 @@ describe('Kitchen sink (multi-plugin queries)', () => { # Tiebreaker: pg_trgm similarity score (best fuzzy match first) # # Generates SQL: - # ORDER BY paradedb.score(id) ASC, + # ORDER BY (body <@> to_bm25query('park green', '"schema"."idx"')) ASC, # similarity(name, 'park') DESC # # Each plugin registers its own enum values on LocationOrderBy: diff --git a/graphql/orm-test/__fixtures__/seed/mega-seed.sql b/graphql/orm-test/__fixtures__/seed/mega-seed.sql index 7b936e3ccc..02e4ba596c 100644 --- a/graphql/orm-test/__fixtures__/seed/mega-seed.sql +++ b/graphql/orm-test/__fixtures__/seed/mega-seed.sql @@ -186,6 +186,11 @@ INSERT INTO mega_test.location_amenities (location_id, amenity_id) VALUES (5, 3), -- High Line Park: Restrooms (6, 1), (6, 3), (6, 4); -- Met Museum: WiFi, Restrooms, Gift Shop +-- Ensure BM25 index is fully built before tests run. +-- pg_textsearch's bm25 index can have a brief lag after INSERT; VACUUM forces +-- a full index pass so queries immediately return correct results. +VACUUM ANALYZE mega_test.locations; + -- Reset sequences SELECT setval('mega_test.categories_id_seq', 3); SELECT setval('mega_test.locations_id_seq', 7); diff --git a/graphql/orm-test/__tests__/mega-query.test.ts b/graphql/orm-test/__tests__/mega-query.test.ts index 176255d088..9289bcc344 100644 --- a/graphql/orm-test/__tests__/mega-query.test.ts +++ b/graphql/orm-test/__tests__/mega-query.test.ts @@ -819,10 +819,10 @@ describe('Mega query integration (ORM)', () => { expect(nodes).toHaveLength(4); for (const node of nodes) { - // searchScore is a composite of the individual signals + // searchScore is a composite via RRF — 0..1 (rank-1 doc can be exactly 1.0) expect(typeof node.searchScore).toBe('number'); expect(node.searchScore).toBeGreaterThan(0); - expect(node.searchScore).toBeLessThan(1); + expect(node.searchScore).toBeLessThanOrEqual(1); // All three individual signals are populated expect(node.tsvRank).toBeGreaterThan(0); @@ -921,10 +921,10 @@ describe('Mega query integration (ORM)', () => { expect(typeof node.nameTrgmSimilarity).toBe('number'); expect(node.nameTrgmSimilarity).toBeGreaterThan(0.2); - // searchScore \u2014 composite signal combining all active search signals + // searchScore \u2014 composite via RRF, 0..1 (rank-1 can be exactly 1.0) expect(typeof node.searchScore).toBe('number'); expect(node.searchScore).toBeGreaterThan(0); - expect(node.searchScore).toBeLessThan(1); + expect(node.searchScore).toBeLessThanOrEqual(1); // pgvector embedding (float array) expect(Array.isArray(node.embedding)).toBe(true); diff --git a/packages/node-type-registry/src/data/search-unified.ts b/packages/node-type-registry/src/data/search-unified.ts index d433fcfbd1..9794dd8996 100644 --- a/packages/node-type-registry/src/data/search-unified.ts +++ b/packages/node-type-registry/src/data/search-unified.ts @@ -201,15 +201,7 @@ export const SearchUnified: NodeTypeDefinition = { type: 'object', description: 'Per-algorithm weights: {tsv: 1.5, bm25: 1.0, pgvector: 0.8, trgm: 0.3}' }, - normalization: { - type: 'string', - enum: [ - 'linear', - 'sigmoid' - ], - description: 'Score normalization strategy', - default: 'linear' - }, + boost_recent: { type: 'boolean', description: 'Enable recency boost for search results',