From 75360370dfd61fb60bdf5b83e1b0ff122fe376d7 Mon Sep 17 00:00:00 2001 From: jazairi <16103405+jazairi@users.noreply.github.com> Date: Fri, 12 Jun 2026 13:16:33 -0700 Subject: [PATCH] Add filter support to semantic search Why these changes are being introduced: Semantic searches ignore filters, meaning that when semantic mode is enabled in the UI, the tabs do not filter as intended. Relevant ticket(s): - [USE-593](https://mitlibraries.atlassian.net/browse/USE-593) How this addresses that need: SemanticQueryBuilder now applies FilterBuilder filters to semantic queries, making it consistent with lexical and hybrid search. Side effects of this change: We should consider adding regression test coverage in the TIMDEX UI repo. --- app/models/hybrid_query_builder.rb | 12 ++- app/models/semantic_query_builder.rb | 23 +++++- test/models/semantic_query_builder_test.rb | 86 +++++++++++++++++++++- 3 files changed, 116 insertions(+), 5 deletions(-) diff --git a/app/models/hybrid_query_builder.rb b/app/models/hybrid_query_builder.rb index 6cf408b..e50f81f 100644 --- a/app/models/hybrid_query_builder.rb +++ b/app/models/hybrid_query_builder.rb @@ -46,9 +46,19 @@ def combine_queries(semantic_query, lexical_query) lexical_query end + # Remove filters from semantic branch since they'll be applied at top level + # to avoid redundant filter clauses + semantic_without_filters = if semantic_query.is_a?(Hash) && semantic_query[:bool] + { + bool: semantic_query[:bool].reject { |k, _| k == :filter } + } + else + semantic_query + end + hybrid_bool = { should: [ - semantic_query, + semantic_without_filters, lexical_search ] } diff --git a/app/models/semantic_query_builder.rb b/app/models/semantic_query_builder.rb index bf884aa..dafffe3 100644 --- a/app/models/semantic_query_builder.rb +++ b/app/models/semantic_query_builder.rb @@ -5,11 +5,28 @@ class LambdaError < StandardError; end def build(params, fulltext: false) query_text = params[:q].to_s.strip - # If no query text provided, return a match_all query (consistent with keyword search behavior) - return { match_all: {} } if query_text.blank? + # If no query text provided, return a match_all query with filters applied + # (consistent with keyword search behavior and enabling tab filtering with empty search) + if query_text.blank? + filters = FilterBuilder.new.build(params) + return { bool: { filter: filters } } if filters.present? + + return { match_all: {} } + end lambda_response = invoke_semantic_builder(query_text) - parse_lambda_response(lambda_response) + semantic_query = parse_lambda_response(lambda_response) + + # Validate the query structure has a bool clause before applying filters + unless semantic_query.is_a?(Hash) && semantic_query[:bool].is_a?(Hash) + raise "Invalid semantic query structure: expected bool clause, got #{semantic_query.inspect}" + end + + # Apply filters to the semantic query (matching LexicalQueryBuilder pattern) + filters = FilterBuilder.new.build(params) + semantic_query[:bool][:filter] = filters + + semantic_query end private diff --git a/test/models/semantic_query_builder_test.rb b/test/models/semantic_query_builder_test.rb index f11ef85..756511f 100644 --- a/test/models/semantic_query_builder_test.rb +++ b/test/models/semantic_query_builder_test.rb @@ -34,6 +34,16 @@ def setup_mock_lambda(response_data) assert_equal({ match_all: {} }, result) end + test 'applies filters to blank query' do + params = { q: '', source_filter: ['aspace'] } + result = @builder.build(params) + + # When query is blank but filters are specified, should return bool query with filter clause + assert result.key?(:bool) + assert result[:bool].key?(:filter) + assert result[:bool][:filter].present? + end + test 'builds semantic query from lambda response' do query_text = 'hello world' mock_response = { @@ -57,7 +67,8 @@ def setup_mock_lambda(response_data) should: [ { rank_feature: { field: 'embedding_full_record.hello', boost: 6.94 } }, { rank_feature: { field: 'embedding_full_record.world', boost: 3.42 } } - ] + ], + filter: [] } } @@ -114,4 +125,77 @@ def setup_mock_lambda(response_data) @builder.build(params) end end + + test 'preserves source_filter in semantic queries' do + query_text = 'test search' + mock_response = { + 'query' => { + 'bool' => { + 'should' => [ + { 'rank_feature' => { 'field' => 'embedding_full_record.test', 'boost' => 5.0 } } + ] + } + } + } + + setup_mock_lambda(mock_response) + + params = { q: query_text, source_filter: ['aspace'] } + result = @builder.build(params) + + # Verify filter clause was added to the semantic query + assert_includes result[:bool].keys, :filter + assert result[:bool][:filter].present? + + # Verify the filter contains the source filter + filter_terms = result[:bool][:filter].map { |f| f[:bool][:should].first[:term][:source] }.flatten + assert_includes filter_terms, 'aspace' + end + + test 'preserves content_type_filter in semantic queries' do + query_text = 'test search' + mock_response = { + 'query' => { + 'bool' => { + 'should' => [ + { 'rank_feature' => { 'field' => 'embedding_full_record.test', 'boost' => 5.0 } } + ] + } + } + } + + setup_mock_lambda(mock_response) + + params = { q: query_text, content_type_filter: %w[article book] } + result = @builder.build(params) + + # Verify filter clause was added to the semantic query + assert_includes result[:bool].keys, :filter + assert result[:bool][:filter].present? + + # Verify the filter contains multiple content type filters + assert_equal 2, result[:bool][:filter].length + end + + test 'applies empty filters array when no filters specified' do + query_text = 'test search' + mock_response = { + 'query' => { + 'bool' => { + 'should' => [ + { 'rank_feature' => { 'field' => 'embedding_full_record.test', 'boost' => 5.0 } } + ] + } + } + } + + setup_mock_lambda(mock_response) + + params = { q: query_text } + result = @builder.build(params) + + # Verify filter clause exists but is empty array + assert_includes result[:bool].keys, :filter + assert_equal [], result[:bool][:filter] + end end