diff --git a/Cargo.toml b/Cargo.toml index 2c95688..1148842 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -14,7 +14,7 @@ members = [ ] [workspace.package] -version = "1.20.5" +version = "1.21.0" edition = "2024" authors = ["Terraphim Team "] documentation = "https://terraphim.ai" diff --git a/crates/terraphim_grep/Cargo.toml b/crates/terraphim_grep/Cargo.toml index b3ee0f2..1116a1e 100644 --- a/crates/terraphim_grep/Cargo.toml +++ b/crates/terraphim_grep/Cargo.toml @@ -28,7 +28,7 @@ log.workspace = true terraphim_types = { version = "1.15.0" } terraphim_rolegraph = { version = "1.15.0" } terraphim_automata = { version = "1.19.2" } -terraphim_service = { version = "1.20.4", optional = true, registry = "terraphim" } +terraphim_service = { version = "1.20.5", optional = true, registry = "terraphim" } terraphim_config = { version = "1.15.0" } fff-search = { version = "0.8.4", optional = true } @@ -41,8 +41,11 @@ tracing-subscriber = { version = "0.3", features = ["env-filter"] } clap = { version = "4", features = ["derive"] } [features] -default = ["llm"] +default = ["llm", "code-search"] llm = ["dep:terraphim_service"] +# Enable fast file-finder code search. This is enabled by default so that +# terraphim-grep can fall back to plain enhanced grep when no knowledge graph +# thesaurus is configured. code-search = ["dep:fff-search"] # Enable OpenRouter provider support (required for live OpenRouter tests against free models) openrouter = ["llm", "terraphim_service/openrouter"] diff --git a/crates/terraphim_grep/src/lib.rs b/crates/terraphim_grep/src/lib.rs index 796d717..fdf0057 100644 --- a/crates/terraphim_grep/src/lib.rs +++ b/crates/terraphim_grep/src/lib.rs @@ -390,4 +390,47 @@ mod tests { ); assert!(result.answer.is_none(), "no LLM -> no synthesised answer"); } + + /// When no thesaurus is available, the searcher must still run the `fff-search` code path + /// and return results with empty concepts. This is the "enhanced grep" failover mode. + #[cfg(feature = "code-search")] + #[tokio::test] + async fn search_without_thesaurus_uses_fff_mode() { + let tmp = tempfile::TempDir::new().expect("tempdir"); + for i in 0..3 { + let path = tmp.path().join(format!("file_{i}.rs")); + std::fs::write(&path, format!("fn target_{i}() {{ /* target */ }}\n")).unwrap(); + } + + // Empty thesaurus => no KG configuration. + let thesaurus = Thesaurus::new("test-role".to_string()); + assert!(thesaurus.is_empty()); + + let hybrid = HybridSearcher::new("test-role".to_string(), thesaurus) + .expect("build hybrid searcher") + .with_search_path(tmp.path().to_path_buf()); + let grep = TerraphimGrep::new(Arc::new(hybrid), Arc::new(SufficiencyJudge::default())); + + let result = grep + .search( + "target", + GrepOptions { + haystack: Haystack::Code, + max_results: 50, + ..GrepOptions::default() + }, + ) + .await + .expect("search should succeed without thesaurus"); + + assert!( + !result.chunks.is_empty(), + "expected fff-search to return chunks without KG" + ); + assert!( + result.concepts.is_empty(), + "expected no KG concepts without thesaurus" + ); + assert_eq!(result.stats.kg_hits, 0); + } } diff --git a/crates/terraphim_grep/src/main.rs b/crates/terraphim_grep/src/main.rs index fab7a4d..9632818 100644 --- a/crates/terraphim_grep/src/main.rs +++ b/crates/terraphim_grep/src/main.rs @@ -1,4 +1,4 @@ -use std::path::PathBuf; +use std::path::{Path, PathBuf}; use std::sync::Arc; use anyhow::{Context, Result}; @@ -7,6 +7,7 @@ use terraphim_automata::AutomataPath; use terraphim_grep::{ GrepOptions, GrepResult, Haystack, HybridSearcher, SufficiencyJudge, TerraphimGrep, }; +use terraphim_types::Thesaurus; use tracing_subscriber::{EnvFilter, fmt, prelude::*}; #[derive(Parser, Debug)] @@ -181,6 +182,29 @@ fn find_default_thesaurus(role_name: &str) -> Option { None } +/// Build a thesaurus for the requested role. +/// +/// Resolution order: +/// 1. If `--thesaurus ` is provided, load it. +/// 2. Otherwise try `find_default_thesaurus` (project config or filesystem heuristic). +/// 3. If none of the above succeeds, return an empty thesaurus so the CLI can fall back to +/// `fff-search` enhanced grep without a knowledge graph. +async fn resolve_thesaurus(role_name: &str, explicit: Option<&Path>) -> Result { + if let Some(path) = explicit { + let automata_path = AutomataPath::from_local(path); + return terraphim_automata::load_thesaurus(&automata_path) + .await + .with_context(|| format!("Failed to load thesaurus from {:?}", path)); + } + if let Some(path) = find_default_thesaurus(role_name) { + let automata_path = AutomataPath::from_local(&path); + return terraphim_automata::load_thesaurus(&automata_path) + .await + .with_context(|| format!("Failed to load thesaurus from {:?}", path)); + } + Ok(Thesaurus::new(role_name.to_string())) +} + /// Build an `LlmClient` for the requested role. /// /// Resolution order: @@ -312,20 +336,17 @@ async fn main() -> Result<()> { project_config.as_ref().map(|(_, config)| config), )?; - let thesaurus_path = args - .thesaurus - .or_else(|| find_default_thesaurus(&role_name)) - .context( - "No thesaurus specified and could not find default. Use --thesaurus to specify path.", - )?; - - // Load thesaurus - let automata_path = AutomataPath::from_local(&thesaurus_path); - let thesaurus = terraphim_automata::load_thesaurus(&automata_path) - .await - .with_context(|| format!("Failed to load thesaurus from {:?}", thesaurus_path))?; - - tracing::debug!("Loaded thesaurus with {} entries", thesaurus.len()); + // Load thesaurus, falling back to an empty one when no project thesaurus exists. + // This lets terraphim-grep behave like an enhanced fff-search grep without a KG. + let thesaurus = resolve_thesaurus(&role_name, args.thesaurus.as_deref()).await?; + if thesaurus.is_empty() { + tracing::info!( + "No thesaurus found for role '{}'; running in fff-search enhanced grep mode", + role_name + ); + } else { + tracing::debug!("Loaded thesaurus with {} entries", thesaurus.len()); + } // Determine search path let search_path = args diff --git a/crates/terraphim_grep/tests/no_thesaurus_cli.rs b/crates/terraphim_grep/tests/no_thesaurus_cli.rs new file mode 100644 index 0000000..46227f9 --- /dev/null +++ b/crates/terraphim_grep/tests/no_thesaurus_cli.rs @@ -0,0 +1,60 @@ +//! Integration test: terraphim-grep works without a knowledge-graph thesaurus. +//! +//! Verifies that the CLI falls back to `fff-search` enhanced grep mode when no +//! thesaurus is available, returning valid JSON results with empty concepts. + +use std::process::Command; + +#[test] +fn cli_runs_without_thesaurus() { + let tmp = tempfile::TempDir::new().expect("tempdir"); + let file_path = tmp.path().join("sample.rs"); + std::fs::write(&file_path, "fn search_target() { /* found */ }\n").unwrap(); + + let bin = env!("CARGO_BIN_EXE_terraphim-grep"); + + let output = Command::new(bin) + .args([ + "search_target", + "--json", + "--haystack", + "code", + "--paths", + tmp.path().to_str().unwrap(), + ]) + .output() + .expect("failed to run terraphim-grep"); + + let stderr = String::from_utf8_lossy(&output.stderr); + let stdout = String::from_utf8_lossy(&output.stdout); + + assert!( + output.status.success(), + "terraphim-grep should succeed without a thesaurus\nstdout: {stdout}\nstderr: {stderr}" + ); + + let result: serde_json::Value = + serde_json::from_str(&stdout).expect("stdout should be valid JSON"); + + assert!( + result.get("chunks").is_some(), + "JSON result should contain chunks" + ); + let chunks = result["chunks"].as_array().expect("chunks is an array"); + assert!( + !chunks.is_empty(), + "expected at least one fff-search chunk without thesaurus" + ); + + let concepts = result["concepts"].as_array().expect("concepts is an array"); + assert!( + concepts.is_empty(), + "expected empty KG concepts without thesaurus" + ); + + assert_eq!( + result["stats"]["kg_hits"].as_u64(), + Some(0), + "kg_hits should be zero" + ); +}