gabb_cli/languages/
mod.rs

1pub mod kotlin;
2pub mod rust;
3pub mod typescript;
4
5/// Represents an import binding that maps a local name to a symbol in another file.
6/// Used for two-phase indexing to resolve cross-file references.
7#[derive(Clone, Debug)]
8pub struct ImportBindingInfo {
9    /// The name used locally in the importing file (may be aliased)
10    pub local_name: String,
11    /// The resolved path of the source file (canonical path)
12    pub source_file: String,
13    /// The original name exported from the source file (before aliasing)
14    pub original_name: String,
15}
16
17/// Minimum symbol size in bytes to compute content hash.
18/// Smaller symbols (getters, trivial functions) are skipped to reduce noise.
19const MIN_HASH_SIZE: usize = 50;
20
21/// Compute a normalized content hash for duplicate detection.
22/// Returns None if the content is too small (< MIN_HASH_SIZE bytes).
23///
24/// Normalization:
25/// - Strips leading/trailing whitespace
26/// - Collapses all internal whitespace to single spaces
27/// - Uses blake3 for fast, high-quality hashing
28/// - Returns first 16 hex chars (64 bits) - sufficient for grouping
29pub fn compute_content_hash(source: &[u8], start: usize, end: usize) -> Option<String> {
30    if end <= start || end > source.len() {
31        return None;
32    }
33
34    let body = &source[start..end];
35    if body.len() < MIN_HASH_SIZE {
36        return None;
37    }
38
39    // Normalize: collapse whitespace, trim
40    let normalized = normalize_whitespace(body);
41    if normalized.len() < MIN_HASH_SIZE {
42        return None;
43    }
44
45    let hash = blake3::hash(&normalized);
46    Some(hash.to_hex()[..16].to_string())
47}
48
49/// Normalize whitespace in source code for consistent hashing.
50/// - Converts all whitespace sequences to single space
51/// - Trims leading/trailing whitespace
52fn normalize_whitespace(source: &[u8]) -> Vec<u8> {
53    let mut result = Vec::with_capacity(source.len());
54    let mut in_whitespace = true; // Start true to trim leading
55
56    for &b in source {
57        if b.is_ascii_whitespace() {
58            if !in_whitespace && !result.is_empty() {
59                result.push(b' ');
60            }
61            in_whitespace = true;
62        } else {
63            result.push(b);
64            in_whitespace = false;
65        }
66    }
67
68    // Trim trailing space
69    if result.last() == Some(&b' ') {
70        result.pop();
71    }
72
73    result
74}
75
76#[cfg(test)]
77mod tests {
78    use super::*;
79
80    #[test]
81    fn test_compute_content_hash_normalizes_whitespace() {
82        let source1 = b"function foo() {\n    return 42;\n} // some padding to meet min size requirement here";
83        let source2 =
84            b"function foo() { return 42; } // some padding to meet min size requirement here";
85
86        let hash1 = compute_content_hash(source1, 0, source1.len());
87        let hash2 = compute_content_hash(source2, 0, source2.len());
88
89        assert!(hash1.is_some());
90        assert!(hash2.is_some());
91        assert_eq!(
92            hash1, hash2,
93            "Different whitespace should produce same hash"
94        );
95    }
96
97    #[test]
98    fn test_compute_content_hash_skips_small_content() {
99        let source = b"fn x() {}";
100        let hash = compute_content_hash(source, 0, source.len());
101        assert!(hash.is_none(), "Small content should not be hashed");
102    }
103
104    #[test]
105    fn test_compute_content_hash_different_content() {
106        let source1 =
107            b"function calculateTotal(items) { return items.reduce((a, b) => a + b, 0); }";
108        let source2 =
109            b"function calculateSum(values) { return values.reduce((x, y) => x + y, 0); }";
110
111        let hash1 = compute_content_hash(source1, 0, source1.len());
112        let hash2 = compute_content_hash(source2, 0, source2.len());
113
114        assert!(hash1.is_some());
115        assert!(hash2.is_some());
116        assert_ne!(
117            hash1, hash2,
118            "Different content should produce different hashes"
119        );
120    }
121}