diff --git a/surf-core/src/anchor.rs b/surf-core/src/anchor.rs index 289deeb..d7fd8ba 100644 --- a/surf-core/src/anchor.rs +++ b/surf-core/src/anchor.rs @@ -144,4 +144,40 @@ mod tests { ); assert_eq!(parse_anchor("> rotate"), Err(AnchorParseError::EmptyFile)); } + + // --- Path edge cases (#45) ---------------------------------------------- + // + // The grammar (§6.3) mandates forward slashes in the file path. These pin the *current* + // behaviour: backslash / mixed-separator paths are accepted verbatim and the backslash is + // retained in `Anchor::file`, so the file later fails to match on disk — a silent non-match + // rather than a clear parse error. + + #[test] + fn backslash_path_is_currently_accepted_verbatim() { + let a = parse_anchor("auth\\service.ts > Foo").unwrap(); + assert_eq!(a.file, "auth\\service.ts"); + assert_eq!(a.segments, vec![seg("Foo", None)]); + } + + #[test] + fn mixed_separator_path_is_currently_accepted_verbatim() { + let a = parse_anchor("src\\auth.py > rotate").unwrap(); + assert_eq!(a.file, "src\\auth.py"); + assert_eq!(a.segments, vec![seg("rotate", None)]); + } + + // Target behaviour for the follow-up (#144): a backslash in the file path is a clear parse + // error (a dedicated variant, e.g. `BadSeparator`), not a silently-retained separator. Flip + // these to assert on the new variant and drop the `#[ignore]` when the fix lands. + #[test] + #[ignore = "#144: backslash paths should error with a dedicated variant, not parse Ok"] + fn backslash_path_should_be_a_parse_error() { + assert!(parse_anchor("auth\\service.ts > Foo").is_err()); + } + + #[test] + #[ignore = "#144: mixed-separator paths should error with a dedicated variant, not parse Ok"] + fn mixed_separator_path_should_be_a_parse_error() { + assert!(parse_anchor("src\\auth.py > rotate").is_err()); + } } diff --git a/surf-core/tests/golden_hash.rs b/surf-core/tests/golden_hash.rs index 0ef75d4..77c4fa6 100644 --- a/surf-core/tests/golden_hash.rs +++ b/surf-core/tests/golden_hash.rs @@ -94,6 +94,59 @@ fn golden_hashes_are_stable_per_language() { assert_eq!(format_stamp(Recipe::V1, "f1075e760a17"), "f1075e760a17"); } +#[test] +fn golden_unicode_identifier_hashes_are_stable() { + // Non-ASCII symbol names and bodies across the four families (#45). Pinning these as goldens + // turns any future locale/encoding sensitivity in canonicalization into a loud diff. Each + // snippet carries a comment + non-canonical whitespace, so it also re-asserts the + // "comments + formatting ignored" guarantee for Unicode source. All are member-access-free, + // so v1 and v2 agree byte-for-byte. + let rust = "pub fn café(δ: i64) -> i64 {\n // accent\n δ\n}\n"; + assert_eq!( + raw(rust, Lang::Rust, "x.rs > café", Recipe::V1), + "9c1a869d1c60" + ); + assert_eq!( + raw(rust, Lang::Rust, "x.rs > café", Recipe::V2), + "9c1a869d1c60" + ); + + let ts = "export function café(δ: string): string {\n return δ; // u\n}\n"; + assert_eq!( + raw(ts, Lang::TypeScript, "x.ts > café", Recipe::V1), + "f7607eacbd73" + ); + assert_eq!( + raw(ts, Lang::TypeScript, "x.ts > café", Recipe::V2), + "f7607eacbd73" + ); + + let py = "def café(δ):\n # accent\n return δ\n"; + assert_eq!( + raw(py, Lang::Python, "x.py > café", Recipe::V1), + "bc2439d5f488" + ); + assert_eq!( + raw(py, Lang::Python, "x.py > café", Recipe::V2), + "bc2439d5f488" + ); + + let go = "func Café(δ int) int {\n\t// u\n\treturn δ\n}\n"; + assert_eq!(raw(go, Lang::Go, "x.go > Café", Recipe::V1), "9a101a4d062f"); + assert_eq!(raw(go, Lang::Go, "x.go > Café", Recipe::V2), "9a101a4d062f"); +} + +#[test] +fn unicode_identifier_hashes_are_recomputation_stable() { + // Re-running the same hash yields the same value — the determinism half of the guarantee, + // independent of the pinned goldens above. + let py = "def café(δ):\n # accent\n return δ\n"; + assert_eq!( + raw(py, Lang::Python, "x.py > café", Recipe::V2), + raw(py, Lang::Python, "x.py > café", Recipe::V2) + ); +} + #[test] fn golden_member_access_hashes_differ_by_recipe() { // Symbols whose only interesting content is a member access: v1 and v2 diverge, and both diff --git a/surf-core/tests/resolve.rs b/surf-core/tests/resolve.rs index a789bb9..e80c3b5 100644 --- a/surf-core/tests/resolve.rs +++ b/surf-core/tests/resolve.rs @@ -238,6 +238,25 @@ fn span_lines_are_one_based() { assert!(s.start_line >= 1 && s.end_line >= s.start_line); } +// --- Unicode identifiers (#45) ---------------------------------------------- + +#[test] +fn unicode_identifier_resolves_across_families() { + // Non-ASCII symbol names must resolve to the Unicode-named body, with no locale/encoding + // sensitivity. Pairs with the pinned hashes in golden_hash.rs. + let rs = "pub fn café(δ: i64) -> i64 {\n δ\n}\n"; + assert!(snippet(rs, span(rs, Lang::Rust, "x.rs > café")).contains("δ")); + + let ts = "export function café(δ: string): string {\n return δ;\n}\n"; + assert!(snippet(ts, span(ts, Lang::TypeScript, "x.ts > café")).contains("return δ")); + + let py = "def café(δ):\n return δ\n"; + assert!(snippet(py, span(py, Lang::Python, "x.py > café")).contains("return δ")); + + let go = "func Café(δ int) int {\n\treturn δ\n}\n"; + assert!(snippet(go, span(go, Lang::Go, "x.go > Café")).contains("return δ")); +} + // --- Python module-level if/try blocks (#81) -------------------------------- const GUARDED_PY: &str = r#"