Skip to main content

mana_core/
util.rs

1//! Utility functions for unit ID parsing and status conversion.
2
3use crate::unit::Status;
4use anyhow::{Context, Result};
5use std::path::Path;
6use std::str::FromStr;
7
8/// Validate a unit ID to prevent path traversal attacks.
9///
10/// Valid IDs match the pattern: ^[a-zA-Z0-9._-]+$
11/// This prevents directory escape attacks like "../../../etc/passwd".
12///
13/// # Examples
14/// - "1" ✓ (valid)
15/// - "3.2.1" ✓ (valid)
16/// - "my-task" ✓ (valid)
17/// - "task_v1.0" ✓ (valid)
18/// - "../etc/passwd" ✗ (invalid)
19/// - "task/../escape" ✗ (invalid)
20pub fn validate_unit_id(id: &str) -> Result<()> {
21    if id.is_empty() {
22        return Err(anyhow::anyhow!("Unit ID cannot be empty"));
23    }
24
25    if id.len() > 255 {
26        return Err(anyhow::anyhow!("Unit ID too long (max 255 characters)"));
27    }
28
29    // Check that ID only contains safe characters: alphanumeric, dots, underscores, hyphens
30    if !id
31        .chars()
32        .all(|c| c.is_ascii_alphanumeric() || c == '.' || c == '_' || c == '-')
33    {
34        return Err(anyhow::anyhow!(
35            "Invalid unit ID '{}': must contain only alphanumeric characters, dots, underscores, and hyphens",
36            id
37        ));
38    }
39
40    // Ensure no path traversal sequences
41    if id.contains("..") {
42        return Err(anyhow::anyhow!(
43            "Invalid unit ID '{}': cannot contain '..' (path traversal protection)",
44            id
45        ));
46    }
47
48    Ok(())
49}
50
51/// A segment of a dot-separated ID, either numeric or alphanumeric.
52/// Numeric segments sort before alpha segments when compared.
53#[derive(Debug, Clone, PartialEq, Eq)]
54enum IdSegment {
55    Num(u64),
56    Alpha(String),
57}
58
59impl PartialOrd for IdSegment {
60    fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
61        Some(self.cmp(other))
62    }
63}
64
65impl Ord for IdSegment {
66    fn cmp(&self, other: &Self) -> std::cmp::Ordering {
67        match (self, other) {
68            (IdSegment::Num(a), IdSegment::Num(b)) => a.cmp(b),
69            (IdSegment::Alpha(a), IdSegment::Alpha(b)) => a.cmp(b),
70            // Numeric segments sort before alpha segments
71            (IdSegment::Num(_), IdSegment::Alpha(_)) => std::cmp::Ordering::Less,
72            (IdSegment::Alpha(_), IdSegment::Num(_)) => std::cmp::Ordering::Greater,
73        }
74    }
75}
76
77/// Compare two unit IDs using natural ordering.
78/// Parses IDs as dot-separated segments and compares them.
79/// Numeric segments are compared numerically, alpha segments lexicographically.
80/// Numeric segments sort before alpha segments.
81///
82/// # Examples
83/// - "1" < "2" (numeric comparison)
84/// - "1" < "10" (numeric comparison, not string comparison)
85/// - "3.1" < "3.2" (multi-level comparison)
86/// - "abc" < "def" (alpha comparison)
87/// - "1" < "abc" (numeric sorts before alpha)
88pub fn natural_cmp(a: &str, b: &str) -> std::cmp::Ordering {
89    let sa = parse_id_segments(a);
90    let sb = parse_id_segments(b);
91    sa.cmp(&sb)
92}
93
94/// Parse a dot-separated ID into segments.
95///
96/// Each segment is parsed as numeric (u64) if possible, otherwise kept as a string.
97/// Used for natural ID comparison.
98///
99/// # Examples
100/// - "1" → [Num(1)]
101/// - "3.1" → [Num(3), Num(1)]
102/// - "my-task" → [Alpha("my-task")]
103/// - "1.abc.2" → [Num(1), Alpha("abc"), Num(2)]
104fn parse_id_segments(id: &str) -> Vec<IdSegment> {
105    id.split('.')
106        .map(|seg| match seg.parse::<u64>() {
107            Ok(n) => IdSegment::Num(n),
108            Err(_) => IdSegment::Alpha(seg.to_string()),
109        })
110        .collect()
111}
112
113/// Convert a status string to a Status enum, or None if invalid.
114///
115/// Valid inputs: "open", "in_progress", "closed"
116pub fn parse_status(s: &str) -> Option<Status> {
117    match s {
118        "open" => Some(Status::Open),
119        "in_progress" => Some(Status::InProgress),
120        "closed" => Some(Status::Closed),
121        _ => None,
122    }
123}
124
125/// Implement FromStr for Status to support standard parsing.
126impl FromStr for Status {
127    type Err = String;
128
129    fn from_str(s: &str) -> Result<Self, Self::Err> {
130        parse_status(s).ok_or_else(|| format!("Invalid status: {}", s))
131    }
132}
133
134/// Convert a unit title into a URL-safe kebab-case slug for use in filenames.
135///
136/// Algorithm:
137/// 1. Trim whitespace
138/// 2. Lowercase all characters
139/// 3. Replace spaces with hyphens
140/// 4. Remove non-alphanumeric characters except hyphens
141/// 5. Collapse consecutive hyphens into single hyphen
142/// 6. Remove leading/trailing hyphens
143/// 7. Truncate to 50 characters
144/// 8. Return "unnamed" if empty
145///
146/// # Examples
147/// - "My Task" → "my-task"
148/// - "Build API v2.0" → "build-api-v20"
149/// - "Foo   Bar" → "foo-bar"
150/// - "Implement `mana show` to render Markdown" → "implement-mana-show-to-render-markdown"
151/// - "Update Unit parser to read .md + YAML frontmatter" → "update-unit-parser-to-read-md-yaml-frontmatter"
152/// - "My-Task!!!" → "my-task"
153/// - "   Spaces   " → "spaces"
154/// - "" (empty) → "unnamed"
155/// - "a" (single char) → "a"
156pub fn title_to_slug(title: &str) -> String {
157    // Step 1: Trim whitespace
158    let trimmed = title.trim();
159
160    // Step 2: Lowercase all characters
161    let lowercased = trimmed.to_lowercase();
162
163    // Step 3 & 4: Replace spaces with hyphens and remove non-alphanumeric (except hyphens)
164    let mut slug = String::new();
165    for c in lowercased.chars() {
166        if c.is_ascii_alphanumeric() {
167            slug.push(c);
168        } else if c.is_whitespace() || c == '-' {
169            slug.push('-');
170        }
171        // Skip all other characters (special chars, punctuation, etc.)
172    }
173
174    // Step 5: Collapse consecutive hyphens into single hyphen
175    let slug = slug.chars().fold(String::new(), |mut acc, c| {
176        if c == '-' && acc.ends_with('-') {
177            acc
178        } else {
179            acc.push(c);
180            acc
181        }
182    });
183
184    // Step 6: Remove leading/trailing hyphens
185    let slug = slug.trim_matches('-').to_string();
186
187    // Step 7: Truncate to 50 characters and re-trim hyphens
188    let slug = if slug.len() > 50 {
189        slug.chars()
190            .take(50)
191            .collect::<String>()
192            .trim_end_matches('-')
193            .to_string()
194    } else {
195        slug
196    };
197
198    // Step 8: Return "unnamed" if empty
199    if slug.is_empty() {
200        "unnamed".to_string()
201    } else {
202        slug
203    }
204}
205
206/// Normalize a title for similarity comparison.
207///
208/// Lowercases, strips punctuation, and splits into a set of words.
209/// Common short words (stop words) are removed to focus on meaningful content.
210fn normalize_title_words(title: &str) -> Vec<String> {
211    let stop_words: &[&str] = &[
212        "a", "an", "the", "to", "in", "on", "of", "for", "and", "or", "is", "it", "by", "at", "be",
213        "do", "up", "as", "so", "if", "no", "not", "but", "all", "can", "had", "has", "was", "are",
214        "its", "may", "our", "out", "own", "too", "use", "via", "way", "yet", "with", "from",
215        "that", "this", "into", "when", "will", "been", "have", "each", "make", "than", "them",
216        "then", "some",
217    ];
218
219    let lowered = title.to_lowercase();
220    lowered
221        .split(|c: char| !c.is_ascii_alphanumeric())
222        .map(|w| w.trim())
223        .filter(|w| !w.is_empty() && w.len() > 1 && !stop_words.contains(w))
224        .map(|w| w.to_string())
225        .collect()
226}
227
228/// Compute word-overlap similarity between two titles.
229///
230/// Returns a value between 0.0 (no overlap) and 1.0 (identical words).
231/// Uses Jaccard-like similarity: |intersection| / |smaller set|.
232/// Dividing by the smaller set means "Fix auth" matches "Fix authentication timeout handling"
233/// at a high score even though one title has more words.
234pub fn title_similarity(a: &str, b: &str) -> f64 {
235    let words_a = normalize_title_words(a);
236    let words_b = normalize_title_words(b);
237
238    if words_a.is_empty() || words_b.is_empty() {
239        return 0.0;
240    }
241
242    let intersection = words_a.iter().filter(|w| words_b.contains(w)).count();
243    let min_len = words_a.len().min(words_b.len());
244
245    intersection as f64 / min_len as f64
246}
247
248/// A similar unit found during duplicate detection.
249#[derive(Debug, Clone)]
250pub struct SimilarUnit {
251    pub id: String,
252    pub title: String,
253    pub score: f64,
254}
255
256/// Find open/in-progress units with titles similar to the given title.
257///
258/// Returns units whose title similarity exceeds the threshold (default 0.7).
259/// Only checks units with status Open or InProgress.
260pub fn find_similar_titles(
261    index: &crate::index::Index,
262    new_title: &str,
263    threshold: f64,
264) -> Vec<SimilarUnit> {
265    let mut matches = Vec::new();
266
267    for entry in &index.units {
268        if entry.status != Status::Open && entry.status != Status::InProgress {
269            continue;
270        }
271
272        let score = title_similarity(new_title, &entry.title);
273        if score >= threshold {
274            matches.push(SimilarUnit {
275                id: entry.id.clone(),
276                title: entry.title.clone(),
277                score,
278            });
279        }
280    }
281
282    // Sort by score descending
283    matches.sort_by(|a, b| {
284        b.score
285            .partial_cmp(&a.score)
286            .unwrap_or(std::cmp::Ordering::Equal)
287    });
288    matches
289}
290
291/// Default similarity threshold for duplicate detection (70% word overlap).
292pub const DEFAULT_SIMILARITY_THRESHOLD: f64 = 0.7;
293
294/// Write contents to a file atomically using write-to-temp + rename.
295///
296/// Writes to a temporary file in the same directory as `path`, then renames
297/// it to the target. `rename()` is atomic on POSIX when source and destination
298/// are on the same filesystem (guaranteed here since we use the same directory).
299/// The temp file is cleaned up on error.
300pub fn atomic_write(path: &Path, contents: &str) -> Result<()> {
301    let tmp_path = path.with_extension(format!("tmp.{}", std::process::id()));
302
303    // Write to temp file; clean up on failure
304    if let Err(e) = std::fs::write(&tmp_path, contents) {
305        let _ = std::fs::remove_file(&tmp_path);
306        return Err(e)
307            .with_context(|| format!("Failed to write temp file: {}", tmp_path.display()));
308    }
309
310    // Atomic rename; clean up temp on failure
311    if let Err(e) = std::fs::rename(&tmp_path, path) {
312        let _ = std::fs::remove_file(&tmp_path);
313        return Err(e).with_context(|| {
314            format!(
315                "Failed to rename {} -> {}",
316                tmp_path.display(),
317                path.display()
318            )
319        });
320    }
321
322    Ok(())
323}
324
325#[cfg(test)]
326mod tests {
327    use super::*;
328
329    // ---------- title_to_slug tests ----------
330
331    #[test]
332    fn title_to_slug_simple_case() {
333        assert_eq!(title_to_slug("My Task"), "my-task");
334    }
335
336    #[test]
337    fn title_to_slug_with_numbers_and_dots() {
338        assert_eq!(title_to_slug("Build API v2.0"), "build-api-v20");
339    }
340
341    #[test]
342    fn title_to_slug_multiple_spaces() {
343        assert_eq!(title_to_slug("Foo   Bar"), "foo-bar");
344    }
345
346    #[test]
347    fn title_to_slug_with_backticks() {
348        assert_eq!(
349            title_to_slug("Implement `mana show` to render Markdown"),
350            "implement-mana-show-to-render-markdown"
351        );
352    }
353
354    #[test]
355    fn title_to_slug_with_special_chars() {
356        assert_eq!(
357            title_to_slug("Update Unit parser to read .md + YAML frontmatter"),
358            "update-unit-parser-to-read-md-yaml-frontmatter"
359        );
360    }
361
362    #[test]
363    fn title_to_slug_with_exclamation() {
364        assert_eq!(title_to_slug("My-Task!!!"), "my-task");
365    }
366
367    #[test]
368    fn title_to_slug_leading_trailing_spaces() {
369        assert_eq!(title_to_slug("   Spaces   "), "spaces");
370    }
371
372    #[test]
373    fn title_to_slug_empty_string() {
374        assert_eq!(title_to_slug(""), "unnamed");
375    }
376
377    #[test]
378    fn title_to_slug_single_character() {
379        assert_eq!(title_to_slug("a"), "a");
380        assert_eq!(title_to_slug("Z"), "z");
381    }
382
383    #[test]
384    fn title_to_slug_only_spaces() {
385        assert_eq!(title_to_slug("   "), "unnamed");
386    }
387
388    #[test]
389    fn title_to_slug_only_special_chars() {
390        assert_eq!(title_to_slug("!!!@@@###"), "unnamed");
391    }
392
393    #[test]
394    fn title_to_slug_truncate_50_chars() {
395        let long_title = "a".repeat(60);
396        let result = title_to_slug(&long_title);
397        assert_eq!(result, "a".repeat(50));
398        assert_eq!(result.len(), 50);
399    }
400
401    #[test]
402    fn title_to_slug_truncate_with_hyphens() {
403        let title = "word ".repeat(20); // Creates long string with hyphens after truncation
404        let result = title_to_slug(&title);
405        assert!(result.len() <= 50);
406    }
407
408    #[test]
409    fn title_to_slug_mixed_case() {
410        assert_eq!(
411            title_to_slug("ThIs Is A MiXeD CaSe TiTle"),
412            "this-is-a-mixed-case-title"
413        );
414    }
415
416    #[test]
417    fn title_to_slug_numbers_preserved() {
418        assert_eq!(
419            title_to_slug("Task 123 Version 4.5.6"),
420            "task-123-version-456"
421        );
422    }
423
424    #[test]
425    fn title_to_slug_consecutive_hyphens() {
426        assert_eq!(title_to_slug("foo---bar"), "foo-bar");
427        assert_eq!(title_to_slug("foo - - bar"), "foo-bar");
428    }
429
430    #[test]
431    fn title_to_slug_unicode_removed() {
432        // Unicode characters are not ASCII alphanumeric, so they get removed
433        assert_eq!(title_to_slug("café"), "caf");
434        assert_eq!(title_to_slug("naïve"), "nave");
435    }
436
437    #[test]
438    fn title_to_slug_all_whitespace_types() {
439        assert_eq!(title_to_slug("foo\tbar\nbaz"), "foo-bar-baz");
440    }
441
442    #[test]
443    fn title_to_slug_exactly_50_chars() {
444        let title = "a".repeat(50);
445        assert_eq!(title_to_slug(&title), title);
446    }
447
448    // ---------- natural_cmp tests ----------
449
450    #[test]
451    fn natural_cmp_single_digit() {
452        assert_eq!(natural_cmp("1", "2"), std::cmp::Ordering::Less);
453        assert_eq!(natural_cmp("2", "1"), std::cmp::Ordering::Greater);
454        assert_eq!(natural_cmp("1", "1"), std::cmp::Ordering::Equal);
455    }
456
457    #[test]
458    fn natural_cmp_multi_digit() {
459        assert_eq!(natural_cmp("1", "10"), std::cmp::Ordering::Less);
460        assert_eq!(natural_cmp("10", "1"), std::cmp::Ordering::Greater);
461        assert_eq!(natural_cmp("10", "10"), std::cmp::Ordering::Equal);
462    }
463
464    #[test]
465    fn natural_cmp_multi_level() {
466        assert_eq!(natural_cmp("3.1", "3.2"), std::cmp::Ordering::Less);
467        assert_eq!(natural_cmp("3.2", "3.1"), std::cmp::Ordering::Greater);
468        assert_eq!(natural_cmp("3.1", "3.1"), std::cmp::Ordering::Equal);
469    }
470
471    #[test]
472    fn natural_cmp_three_level() {
473        assert_eq!(natural_cmp("3.2.1", "3.2.2"), std::cmp::Ordering::Less);
474        assert_eq!(natural_cmp("3.2.2", "3.2.1"), std::cmp::Ordering::Greater);
475        assert_eq!(natural_cmp("3.2.1", "3.2.1"), std::cmp::Ordering::Equal);
476    }
477
478    #[test]
479    fn natural_cmp_different_prefix() {
480        assert_eq!(natural_cmp("2.1", "3.1"), std::cmp::Ordering::Less);
481        assert_eq!(natural_cmp("10.5", "9.99"), std::cmp::Ordering::Greater);
482    }
483
484    // ---------- parse_id_segments tests ----------
485
486    #[test]
487    fn parse_id_segments_single() {
488        assert_eq!(parse_id_segments("1"), vec![IdSegment::Num(1)]);
489        assert_eq!(parse_id_segments("42"), vec![IdSegment::Num(42)]);
490    }
491
492    #[test]
493    fn parse_id_segments_multi_level() {
494        assert_eq!(
495            parse_id_segments("1.2"),
496            vec![IdSegment::Num(1), IdSegment::Num(2)]
497        );
498        assert_eq!(
499            parse_id_segments("3.2.1"),
500            vec![IdSegment::Num(3), IdSegment::Num(2), IdSegment::Num(1)]
501        );
502    }
503
504    #[test]
505    fn parse_id_segments_leading_zeros() {
506        // Leading zeros are parsed as decimal, not octal
507        assert_eq!(parse_id_segments("01"), vec![IdSegment::Num(1)]);
508        assert_eq!(
509            parse_id_segments("03.02"),
510            vec![IdSegment::Num(3), IdSegment::Num(2)]
511        );
512    }
513
514    #[test]
515    fn parse_id_segments_alpha() {
516        assert_eq!(
517            parse_id_segments("abc"),
518            vec![IdSegment::Alpha("abc".to_string())]
519        );
520        assert_eq!(
521            parse_id_segments("1.abc.2"),
522            vec![
523                IdSegment::Num(1),
524                IdSegment::Alpha("abc".to_string()),
525                IdSegment::Num(2)
526            ]
527        );
528    }
529
530    #[test]
531    fn natural_cmp_alpha_ids() {
532        // Alpha IDs should not all compare equal
533        assert_eq!(natural_cmp("abc", "def"), std::cmp::Ordering::Less);
534        assert_eq!(natural_cmp("def", "abc"), std::cmp::Ordering::Greater);
535        assert_eq!(natural_cmp("abc", "abc"), std::cmp::Ordering::Equal);
536    }
537
538    #[test]
539    fn natural_cmp_numeric_before_alpha() {
540        assert_eq!(natural_cmp("1", "abc"), std::cmp::Ordering::Less);
541        assert_eq!(natural_cmp("abc", "1"), std::cmp::Ordering::Greater);
542    }
543
544    #[test]
545    fn natural_cmp_mixed_segments() {
546        // "1.abc.2" vs "1.abc.3" — third segment differs
547        assert_eq!(natural_cmp("1.abc.2", "1.abc.3"), std::cmp::Ordering::Less);
548        // "1.abc" vs "1.def" — second segment differs
549        assert_eq!(natural_cmp("1.abc", "1.def"), std::cmp::Ordering::Less);
550    }
551
552    // ---------- parse_status tests ----------
553
554    #[test]
555    fn parse_status_valid_open() {
556        assert_eq!(parse_status("open"), Some(Status::Open));
557    }
558
559    #[test]
560    fn parse_status_valid_in_progress() {
561        assert_eq!(parse_status("in_progress"), Some(Status::InProgress));
562    }
563
564    #[test]
565    fn parse_status_valid_closed() {
566        assert_eq!(parse_status("closed"), Some(Status::Closed));
567    }
568
569    #[test]
570    fn parse_status_invalid() {
571        assert_eq!(parse_status("invalid"), None);
572        assert_eq!(parse_status(""), None);
573        assert_eq!(parse_status("OPEN"), None);
574        assert_eq!(parse_status("Closed"), None);
575    }
576
577    #[test]
578    fn parse_status_whitespace() {
579        assert_eq!(parse_status("open "), None);
580        assert_eq!(parse_status(" open"), None);
581    }
582
583    // ---------- Status::FromStr tests ----------
584
585    #[test]
586    fn status_from_str_open() {
587        assert_eq!("open".parse::<Status>(), Ok(Status::Open));
588    }
589
590    #[test]
591    fn status_from_str_in_progress() {
592        assert_eq!("in_progress".parse::<Status>(), Ok(Status::InProgress));
593    }
594
595    #[test]
596    fn status_from_str_closed() {
597        assert_eq!("closed".parse::<Status>(), Ok(Status::Closed));
598    }
599
600    #[test]
601    fn status_from_str_invalid() {
602        assert!("invalid".parse::<Status>().is_err());
603        assert!("".parse::<Status>().is_err());
604    }
605
606    // ---------- validate_unit_id tests ----------
607
608    #[test]
609    fn validate_unit_id_simple_numeric() {
610        assert!(validate_unit_id("1").is_ok());
611        assert!(validate_unit_id("42").is_ok());
612        assert!(validate_unit_id("999").is_ok());
613    }
614
615    #[test]
616    fn validate_unit_id_dotted() {
617        assert!(validate_unit_id("3.1").is_ok());
618        assert!(validate_unit_id("3.2.1").is_ok());
619        assert!(validate_unit_id("1.2.3.4.5").is_ok());
620    }
621
622    #[test]
623    fn validate_unit_id_with_underscores() {
624        assert!(validate_unit_id("task_1").is_ok());
625        assert!(validate_unit_id("my_task_v1").is_ok());
626    }
627
628    #[test]
629    fn validate_unit_id_with_hyphens() {
630        assert!(validate_unit_id("my-task").is_ok());
631        assert!(validate_unit_id("task-v1-0").is_ok());
632    }
633
634    #[test]
635    fn validate_unit_id_alphanumeric() {
636        assert!(validate_unit_id("abc123def").is_ok());
637        assert!(validate_unit_id("Task1").is_ok());
638    }
639
640    #[test]
641    fn validate_unit_id_empty_fails() {
642        assert!(validate_unit_id("").is_err());
643    }
644
645    #[test]
646    fn validate_unit_id_path_traversal_fails() {
647        assert!(validate_unit_id("../etc/passwd").is_err());
648        assert!(validate_unit_id("..").is_err());
649        assert!(validate_unit_id("foo/../bar").is_err());
650        assert!(validate_unit_id("task..escape").is_err());
651    }
652
653    #[test]
654    fn validate_unit_id_absolute_path_fails() {
655        assert!(validate_unit_id("/etc/passwd").is_err());
656    }
657
658    #[test]
659    fn validate_unit_id_spaces_fail() {
660        assert!(validate_unit_id("my task").is_err());
661        assert!(validate_unit_id(" 1").is_err());
662        assert!(validate_unit_id("1 ").is_err());
663    }
664
665    #[test]
666    fn validate_unit_id_special_chars_fail() {
667        assert!(validate_unit_id("task@home").is_err());
668        assert!(validate_unit_id("task#1").is_err());
669        assert!(validate_unit_id("task$money").is_err());
670        assert!(validate_unit_id("task%complete").is_err());
671        assert!(validate_unit_id("task&friend").is_err());
672        assert!(validate_unit_id("task*star").is_err());
673        assert!(validate_unit_id("task(paren").is_err());
674        assert!(validate_unit_id("task)close").is_err());
675        assert!(validate_unit_id("task+plus").is_err());
676        assert!(validate_unit_id("task=equals").is_err());
677        assert!(validate_unit_id("task[bracket").is_err());
678        assert!(validate_unit_id("task]close").is_err());
679        assert!(validate_unit_id("task{brace").is_err());
680        assert!(validate_unit_id("task}close").is_err());
681        assert!(validate_unit_id("task|pipe").is_err());
682        assert!(validate_unit_id("task;semicolon").is_err());
683        assert!(validate_unit_id("task:colon").is_err());
684        assert!(validate_unit_id("task\"quote").is_err());
685        assert!(validate_unit_id("task'apostrophe").is_err());
686        assert!(validate_unit_id("task<less").is_err());
687        assert!(validate_unit_id("task>greater").is_err());
688        assert!(validate_unit_id("task,comma").is_err());
689        assert!(validate_unit_id("task?question").is_err());
690    }
691
692    #[test]
693    fn validate_unit_id_too_long() {
694        let long_id = "a".repeat(256);
695        assert!(validate_unit_id(&long_id).is_err());
696
697        let max_id = "a".repeat(255);
698        assert!(validate_unit_id(&max_id).is_ok());
699    }
700
701    // ---------- atomic_write tests ----------
702
703    #[test]
704    fn test_atomic_write_creates_file_with_correct_contents() {
705        let dir = tempfile::tempdir().unwrap();
706        let path = dir.path().join("test.yaml");
707
708        atomic_write(&path, "hello: world\n").unwrap();
709
710        let contents = std::fs::read_to_string(&path).unwrap();
711        assert_eq!(contents, "hello: world\n");
712    }
713
714    #[test]
715    fn test_atomic_write_overwrites_existing_file() {
716        let dir = tempfile::tempdir().unwrap();
717        let path = dir.path().join("test.yaml");
718
719        std::fs::write(&path, "old content").unwrap();
720        atomic_write(&path, "new content").unwrap();
721
722        let contents = std::fs::read_to_string(&path).unwrap();
723        assert_eq!(contents, "new content");
724    }
725
726    #[test]
727    fn test_atomic_write_no_temp_file_left_behind() {
728        let dir = tempfile::tempdir().unwrap();
729        let path = dir.path().join("test.yaml");
730
731        atomic_write(&path, "data").unwrap();
732
733        let entries: Vec<_> = std::fs::read_dir(dir.path())
734            .unwrap()
735            .filter_map(|e| e.ok())
736            .collect();
737        assert_eq!(entries.len(), 1, "only the target file should exist");
738        assert_eq!(entries[0].file_name().to_str().unwrap(), "test.yaml");
739    }
740
741    // ---------- title_similarity tests ----------
742
743    #[test]
744    fn similarity_identical_titles() {
745        assert!(
746            (title_similarity("Fix auth timeout", "Fix auth timeout") - 1.0).abs() < f64::EPSILON
747        );
748    }
749
750    #[test]
751    fn similarity_close_titles() {
752        // "Fix auth timeout" vs "Fix authentication timeout handling"
753        // Normalized: ["fix", "auth", "timeout"] vs ["fix", "authentication", "timeout", "handling"]
754        // "auth" != "authentication" so intersection = {"fix", "timeout"} = 2
755        // min_len = 3 → 2/3 ≈ 0.67
756        let score = title_similarity("Fix auth timeout", "Fix authentication timeout handling");
757        assert!(score > 0.5, "Expected > 0.5, got {}", score);
758    }
759
760    #[test]
761    fn similarity_very_different_titles() {
762        let score = title_similarity("Fix auth timeout", "Add database migration");
763        assert!(score < 0.3, "Expected < 0.3, got {}", score);
764    }
765
766    #[test]
767    fn similarity_empty_title() {
768        assert!((title_similarity("", "Something")).abs() < f64::EPSILON);
769        assert!((title_similarity("Something", "")).abs() < f64::EPSILON);
770    }
771
772    #[test]
773    fn similarity_case_insensitive() {
774        let score = title_similarity("Fix Auth Timeout", "fix auth timeout");
775        assert!((score - 1.0).abs() < f64::EPSILON);
776    }
777
778    #[test]
779    fn similarity_ignores_stop_words() {
780        // "Add a new feature" normalized: ["add", "new", "feature"]
781        // "Add the new feature" normalized: ["add", "new", "feature"]
782        let score = title_similarity("Add a new feature", "Add the new feature");
783        assert!((score - 1.0).abs() < f64::EPSILON);
784    }
785
786    #[test]
787    fn similarity_strips_punctuation() {
788        let score = title_similarity("Fix: auth timeout!", "Fix auth timeout");
789        assert!((score - 1.0).abs() < f64::EPSILON);
790    }
791
792    #[test]
793    fn similarity_subset_match_scores_high() {
794        // "Fix auth" vs "Fix auth timeout" → intersection = {fix, auth} = 2, min_len = 2 → 1.0
795        let score = title_similarity("Fix auth", "Fix auth timeout");
796        assert!((score - 1.0).abs() < f64::EPSILON);
797    }
798
799    // ---------- find_similar_titles tests ----------
800
801    #[test]
802    fn find_similar_returns_matches_above_threshold() {
803        use crate::index::{Index, IndexEntry};
804        use chrono::Utc;
805
806        let index = Index {
807            units: vec![
808                IndexEntry {
809                    id: "1".to_string(),
810                    title: "Fix auth timeout".to_string(),
811                    status: Status::Open,
812                    priority: 2,
813                    parent: None,
814                    dependencies: vec![],
815                    labels: vec![],
816                    assignee: None,
817                    updated_at: Utc::now(),
818                    produces: vec![],
819                    requires: vec![],
820                    has_verify: false,
821                    verify: None,
822                    created_at: Utc::now(),
823                    claimed_by: None,
824                    attempts: 0,
825                    paths: vec![],
826                    feature: false,
827                    has_decisions: false,
828                },
829                IndexEntry {
830                    id: "2".to_string(),
831                    title: "Add database migration".to_string(),
832                    status: Status::Open,
833                    priority: 2,
834                    parent: None,
835                    dependencies: vec![],
836                    labels: vec![],
837                    assignee: None,
838                    updated_at: Utc::now(),
839                    produces: vec![],
840                    requires: vec![],
841                    has_verify: false,
842                    verify: None,
843                    created_at: Utc::now(),
844                    claimed_by: None,
845                    attempts: 0,
846                    paths: vec![],
847                    feature: false,
848                    has_decisions: false,
849                },
850            ],
851        };
852
853        let matches = find_similar_titles(&index, "Fix auth timeout handling", 0.7);
854        assert_eq!(matches.len(), 1);
855        assert_eq!(matches[0].id, "1");
856    }
857
858    #[test]
859    fn find_similar_skips_closed_units() {
860        use crate::index::{Index, IndexEntry};
861        use chrono::Utc;
862
863        let index = Index {
864            units: vec![IndexEntry {
865                id: "1".to_string(),
866                title: "Fix auth timeout".to_string(),
867                status: Status::Closed,
868                priority: 2,
869                parent: None,
870                dependencies: vec![],
871                labels: vec![],
872                assignee: None,
873                updated_at: Utc::now(),
874                produces: vec![],
875                requires: vec![],
876                has_verify: false,
877                verify: None,
878                created_at: Utc::now(),
879                claimed_by: None,
880                attempts: 0,
881                paths: vec![],
882                feature: false,
883                has_decisions: false,
884            }],
885        };
886
887        let matches = find_similar_titles(&index, "Fix auth timeout", 0.7);
888        assert!(matches.is_empty());
889    }
890
891    #[test]
892    fn find_similar_returns_empty_when_no_match() {
893        use crate::index::{Index, IndexEntry};
894        use chrono::Utc;
895
896        let index = Index {
897            units: vec![IndexEntry {
898                id: "1".to_string(),
899                title: "Fix auth timeout".to_string(),
900                status: Status::Open,
901                priority: 2,
902                parent: None,
903                dependencies: vec![],
904                labels: vec![],
905                assignee: None,
906                updated_at: Utc::now(),
907                produces: vec![],
908                requires: vec![],
909                has_verify: false,
910                verify: None,
911                created_at: Utc::now(),
912                claimed_by: None,
913                attempts: 0,
914                paths: vec![],
915                feature: false,
916                has_decisions: false,
917            }],
918        };
919
920        let matches = find_similar_titles(&index, "Add database migration", 0.7);
921        assert!(matches.is_empty());
922    }
923}