Skip to main content

hyalo_cli/commands/
lint.rs

1/// `hyalo lint` — validate frontmatter properties against the `.hyalo.toml` schema.
2///
3/// Reads each file's frontmatter, applies the type-specific schema (or the
4/// default schema if `type` is absent), and reports violations at two severity
5/// levels:
6///
7///   - **error**  — schema violation (missing required field, wrong value type,
8///     invalid enum value, failed pattern match)
9///   - **warn**   — soft issue (no `type` property, no `tags` property, property
10///     not declared in schema)
11///
12/// Exit code: 0 = clean, 1 = errors found, 2 = internal error.
13use std::collections::HashMap;
14use std::path::Path;
15
16use anyhow::{Context, Result};
17use indexmap::IndexMap;
18use regex::Regex;
19use serde::Serialize;
20use serde_json::Value;
21
22use hyalo_core::filename_template::FilenameTemplate;
23use hyalo_core::frontmatter::{read_frontmatter, write_frontmatter};
24use hyalo_core::schema::{self, PropertyConstraint, SchemaConfig, TypeSchema};
25use hyalo_core::util::is_iso8601_date;
26
27use crate::output::{CommandOutcome, Format, format_success};
28
29// ---------------------------------------------------------------------------
30// Types
31// ---------------------------------------------------------------------------
32
33/// Severity of a single lint violation.
34#[derive(Debug, Clone, PartialEq, Eq, Serialize)]
35#[serde(rename_all = "snake_case")]
36pub enum Severity {
37    Error,
38    Warn,
39}
40
41impl std::fmt::Display for Severity {
42    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
43        match self {
44            Self::Error => f.write_str("error"),
45            Self::Warn => f.write_str("warn"),
46        }
47    }
48}
49
50/// A single lint violation found in a file.
51#[derive(Debug, Clone, Serialize)]
52pub struct Violation {
53    pub severity: Severity,
54    pub message: String,
55}
56
57/// Lint results for a single file.
58#[derive(Debug, Serialize)]
59pub struct FileLintResult {
60    pub file: String,
61    pub violations: Vec<Violation>,
62}
63
64/// A single auto-fix that was (or would be) applied.
65#[derive(Debug, Clone, Serialize)]
66pub struct FixAction {
67    /// Kind of fix: "insert-default", "fix-enum-typo", "normalize-date", "infer-type".
68    pub kind: String,
69    /// Frontmatter property affected.
70    pub property: String,
71    /// Old value (if any) — omitted for inserted properties.
72    #[serde(skip_serializing_if = "Option::is_none")]
73    pub old: Option<String>,
74    /// New value applied (or previewed with --dry-run).
75    pub new: String,
76}
77
78/// Aggregated lint output.
79///
80/// The `files` field is renamed from the internal `results` to avoid a
81/// confusing `results.results` nesting once the CLI envelope wraps the payload.
82#[derive(Debug, Serialize)]
83pub struct LintOutput {
84    pub files: Vec<FileLintResult>,
85    /// Total number of violations found across all files.
86    pub total: usize,
87    /// Number of error-severity violations across all files (not limited by `--limit`).
88    pub errors: usize,
89    /// Number of warn-severity violations across all files (not limited by `--limit`).
90    pub warnings: usize,
91    /// Number of files with at least one violation (not limited by `--limit`).
92    pub files_with_issues: usize,
93    /// Number of files that were checked.
94    pub files_checked: usize,
95    /// Fixes that were applied (or previewed) per file. Omitted when no
96    /// `--fix` run produced any changes.
97    #[serde(skip_serializing_if = "Vec::is_empty")]
98    pub fixes: Vec<FileFixResult>,
99    /// `true` when `--dry-run` was passed and fixes were not written.
100    #[serde(skip_serializing_if = "std::ops::Not::not")]
101    pub dry_run: bool,
102    /// `true` when `--limit` truncated the file list.
103    #[serde(skip_serializing_if = "std::ops::Not::not")]
104    pub limited: bool,
105}
106
107/// Fixes applied to a single file.
108#[derive(Debug, Clone, Serialize)]
109pub struct FileFixResult {
110    pub file: String,
111    pub actions: Vec<FixAction>,
112}
113
114/// Summary counts returned to callers (e.g. `hyalo summary`).
115#[derive(Debug, Clone, Default)]
116pub struct LintCounts {
117    pub errors: usize,
118    pub warnings: usize,
119    /// Number of files with at least one violation.
120    pub files_with_issues: usize,
121}
122
123// ---------------------------------------------------------------------------
124// Public API
125// ---------------------------------------------------------------------------
126
127/// Run `hyalo lint` against a list of `(full_path, rel_path)` file pairs.
128///
129/// Returns the formatted output and the set of counts; the caller is
130/// responsible for translating counts into an exit code.
131pub fn lint_files(
132    files: &[(std::path::PathBuf, String)],
133    schema: &SchemaConfig,
134) -> Result<(CommandOutcome, LintCounts)> {
135    lint_files_with_options(files, schema, FixMode::Off, None)
136}
137
138/// Prepend an additional `FileLintResult` (e.g. `.hyalo.toml` view violations)
139/// to the outcome produced by [`lint_files_with_options`]. Adjusts the totals
140/// and the `files_with_issues` counter in the serialized payload to stay
141/// consistent with the new entry.
142pub fn prepend_file_result(
143    outcome: CommandOutcome,
144    extra: &FileLintResult,
145) -> Result<CommandOutcome> {
146    let (payload, total) = match outcome {
147        CommandOutcome::Success { output, total } => (output, total),
148        other => return Ok(other),
149    };
150
151    let mut value: serde_json::Value =
152        serde_json::from_str(&payload).context("failed to re-parse lint output JSON")?;
153
154    if let Some(obj) = value.as_object_mut() {
155        let extra_errors = extra
156            .violations
157            .iter()
158            .filter(|v| matches!(v.severity, Severity::Error))
159            .count();
160        let extra_warnings = extra.violations.len() - extra_errors;
161
162        if let Some(files) = obj.get_mut("files").and_then(|f| f.as_array_mut()) {
163            let extra_value = serde_json::to_value(extra)
164                .context("failed to serialize .hyalo.toml lint result")?;
165            files.insert(0, extra_value);
166        }
167        if let Some(n) = obj.get_mut("total").and_then(|v| v.as_u64()) {
168            obj.insert(
169                "total".to_string(),
170                serde_json::Value::from(n + extra.violations.len() as u64),
171            );
172        }
173        if let Some(n) = obj.get_mut("errors").and_then(|v| v.as_u64()) {
174            obj.insert(
175                "errors".to_string(),
176                serde_json::Value::from(n + extra_errors as u64),
177            );
178        }
179        if let Some(n) = obj.get_mut("warnings").and_then(|v| v.as_u64()) {
180            obj.insert(
181                "warnings".to_string(),
182                serde_json::Value::from(n + extra_warnings as u64),
183            );
184        }
185        if let Some(n) = obj.get_mut("files_with_issues").and_then(|v| v.as_u64()) {
186            obj.insert(
187                "files_with_issues".to_string(),
188                serde_json::Value::from(n + 1),
189            );
190        }
191    }
192
193    let new_payload = format_success(Format::Json, &value);
194    // The outcome's `total` (used by `--count`) tracks files-with-issues —
195    // bump it by 1 when the prepended pseudo-file has at least one violation,
196    // so `--count` stays in sync with `files_with_issues` in the JSON payload.
197    let extra_counts_toward_total = !extra.violations.is_empty();
198    Ok(match total {
199        Some(t) => CommandOutcome::success_with_total(
200            new_payload,
201            if extra_counts_toward_total { t + 1 } else { t },
202        ),
203        None => CommandOutcome::success(new_payload),
204    })
205}
206
207/// Validate `.hyalo.toml` view definitions and return a pseudo-file lint
208/// result when at least one view looks suspicious.
209///
210/// Current checks:
211/// - Views whose only narrowing mechanism is `fields = ["backlinks"]` or
212///   similar — `fields` controls display columns, not filtering, so such a
213///   view matches every file. The likely intent is `orphan = true`.
214///
215/// Returns `None` when there is nothing to report.
216pub fn validate_views(dir: &Path) -> Option<FileLintResult> {
217    // Keys that actually *narrow* the result set.
218    const NARROWING_KEYS: &[&str] = &[
219        "pattern",
220        "regexp",
221        "properties",
222        "tag",
223        "task",
224        "sections",
225        "file",
226        "glob",
227        "broken_links",
228        "orphan",
229        "dead_end",
230        "title",
231        "language",
232    ];
233
234    let toml_path = dir.join(".hyalo.toml");
235    let contents = std::fs::read_to_string(&toml_path).ok()?;
236    let table: toml::Table = toml::from_str(&contents).ok()?;
237    let Some(toml::Value::Table(views_table)) = table.get("views") else {
238        return None;
239    };
240
241    let mut violations: Vec<Violation> = Vec::new();
242    for (name, value) in views_table {
243        let Some(view_tbl) = value.as_table() else {
244            continue;
245        };
246
247        let has_narrowing = view_tbl.iter().any(|(k, v)| {
248            if !NARROWING_KEYS.contains(&k.as_str()) {
249                return false;
250            }
251            // Treat `orphan = false` / `dead_end = false` as non-narrowing.
252            if matches!(k.as_str(), "orphan" | "dead_end" | "broken_links") {
253                return matches!(v, toml::Value::Boolean(true));
254            }
255            // List-typed narrowing keys with empty values don't narrow either.
256            if let toml::Value::Array(a) = v {
257                return !a.is_empty();
258            }
259            true
260        });
261
262        let has_fields = view_tbl.contains_key("fields");
263
264        if !has_narrowing && has_fields {
265            violations.push(Violation {
266                severity: Severity::Warn,
267                message: format!(
268                    "view '{name}' has no narrowing filter — `fields` controls display columns only, \
269                     not filtering. Did you mean `orphan = true` or `dead_end = true`?"
270                ),
271            });
272        } else if !has_narrowing {
273            violations.push(Violation {
274                severity: Severity::Warn,
275                message: format!(
276                    "view '{name}' has no narrowing filter — add at least one of: \
277                     tag, properties, task, orphan, dead_end, broken_links, glob, file, title"
278                ),
279            });
280        }
281    }
282
283    if violations.is_empty() {
284        None
285    } else {
286        Some(FileLintResult {
287            file: ".hyalo.toml".to_string(),
288            violations,
289        })
290    }
291}
292
293/// Whether — and how — `lint_files_with_options` should apply auto-fixes.
294#[derive(Debug, Clone, Copy, PartialEq, Eq)]
295pub enum FixMode {
296    /// Read-only: do not attempt to fix anything.
297    Off,
298    /// Apply fixes in memory and write them back to disk.
299    Apply,
300    /// Compute the fixes that would be applied but don't write any files.
301    DryRun,
302}
303
304/// Run lint with the given fix mode.
305///
306/// When `fix` is `Apply`, repairable violations are written back to each file
307/// before the final counts are computed, so the returned counts reflect only
308/// the violations that *remain* after fixing. With `DryRun`, counts reflect
309/// the post-fix state but files are untouched.
310pub fn lint_files_with_options(
311    files: &[(std::path::PathBuf, String)],
312    schema: &SchemaConfig,
313    fix: FixMode,
314    limit: Option<usize>,
315) -> Result<(CommandOutcome, LintCounts)> {
316    let mut results: Vec<FileLintResult> = Vec::new();
317    let mut counts = LintCounts::default();
318    let mut fix_results: Vec<FileFixResult> = Vec::new();
319
320    for (full_path, rel_path) in files {
321        let (file_result, file_fixes) = lint_file_with_fix(full_path, rel_path, schema, fix)?;
322        for v in &file_result.violations {
323            match v.severity {
324                Severity::Error => counts.errors += 1,
325                Severity::Warn => counts.warnings += 1,
326            }
327        }
328        if !file_result.violations.is_empty() {
329            counts.files_with_issues += 1;
330        }
331        if !file_fixes.actions.is_empty() {
332            fix_results.push(file_fixes);
333        }
334        if !file_result.violations.is_empty() {
335            results.push(file_result);
336        }
337    }
338
339    let files_checked = files.len();
340    let total = counts.errors + counts.warnings;
341    let limited = limit.is_some_and(|n| results.len() > n);
342    if let Some(n) = limit {
343        results.truncate(n);
344    }
345    let output = LintOutput {
346        files: results,
347        total,
348        errors: counts.errors,
349        warnings: counts.warnings,
350        files_with_issues: counts.files_with_issues,
351        files_checked,
352        fixes: fix_results,
353        dry_run: matches!(fix, FixMode::DryRun),
354        limited,
355    };
356
357    let val = serde_json::to_value(&output).context("failed to serialize lint output")?;
358    // Use success_with_total so that `--count` returns the number of files with issues.
359    let outcome = CommandOutcome::success_with_total(
360        format_success(Format::Json, &val),
361        counts.files_with_issues as u64,
362    );
363
364    Ok((outcome, counts))
365}
366
367/// Compute lint counts for `hyalo summary` without formatting output.
368pub fn lint_counts_only(
369    files: &[(std::path::PathBuf, String)],
370    schema: &SchemaConfig,
371) -> Result<LintCounts> {
372    let mut counts = LintCounts::default();
373    for (full_path, rel_path) in files {
374        let file_result = lint_file(full_path, rel_path, schema)?;
375        for v in &file_result.violations {
376            match v.severity {
377                Severity::Error => counts.errors += 1,
378                Severity::Warn => counts.warnings += 1,
379            }
380        }
381        if !file_result.violations.is_empty() {
382            counts.files_with_issues += 1;
383        }
384    }
385    Ok(counts)
386}
387
388/// Compute lint counts from pre-indexed `IndexEntry` properties.
389///
390/// Used by `hyalo summary` to avoid re-reading files from disk.
391/// The `index_entries` iterator yields `(rel_path, properties, has_tags)` tuples.
392pub fn lint_counts_from_properties<'a>(
393    entries: impl Iterator<Item = (&'a str, &'a IndexMap<String, Value>, bool)>,
394    schema: &SchemaConfig,
395) -> LintCounts {
396    let mut counts = LintCounts::default();
397    for (rel_path, properties, has_tags) in entries {
398        let violations = validate_properties(rel_path, properties, has_tags, schema);
399        for v in &violations {
400            match v.severity {
401                Severity::Error => counts.errors += 1,
402                Severity::Warn => counts.warnings += 1,
403            }
404        }
405        if !violations.is_empty() {
406            counts.files_with_issues += 1;
407        }
408    }
409    counts
410}
411
412// ---------------------------------------------------------------------------
413// Per-file validation
414// ---------------------------------------------------------------------------
415
416fn lint_file(full_path: &Path, rel_path: &str, schema: &SchemaConfig) -> Result<FileLintResult> {
417    let (result, _) = lint_file_with_fix(full_path, rel_path, schema, FixMode::Off)?;
418    Ok(result)
419}
420
421/// Lint a single file, optionally applying auto-fixes.
422fn lint_file_with_fix(
423    full_path: &Path,
424    rel_path: &str,
425    schema: &SchemaConfig,
426    fix: FixMode,
427) -> Result<(FileLintResult, FileFixResult)> {
428    let properties = match read_frontmatter(full_path) {
429        Ok(props) => props,
430        Err(e) if hyalo_core::frontmatter::is_parse_error(&e) => {
431            // Malformed frontmatter — report as a single error violation.
432            return Ok((
433                FileLintResult {
434                    file: rel_path.to_owned(),
435                    violations: vec![Violation {
436                        severity: Severity::Error,
437                        message: format!("{}: {e}", crate::hints::PARSE_ERROR_PREFIX),
438                    }],
439                },
440                FileFixResult {
441                    file: rel_path.to_owned(),
442                    actions: Vec::new(),
443                },
444            ));
445        }
446        Err(e) => return Err(e).context(format!("reading {rel_path}")),
447    };
448
449    // Apply fixes in memory (or dry-run) before final validation.
450    let (final_props, actions) = if matches!(fix, FixMode::Apply | FixMode::DryRun) {
451        let mut mutable = properties.clone();
452        let actions = apply_fixes(rel_path, &mut mutable, schema);
453        if matches!(fix, FixMode::Apply) && !actions.is_empty() {
454            write_frontmatter(full_path, &mutable)
455                .with_context(|| format!("writing fixed frontmatter to {rel_path}"))?;
456        }
457        (mutable, actions)
458    } else {
459        (properties, Vec::new())
460    };
461
462    let has_tags = final_props.contains_key("tags");
463    let violations = validate_properties(rel_path, &final_props, has_tags, schema);
464    Ok((
465        FileLintResult {
466            file: rel_path.to_owned(),
467            violations,
468        },
469        FileFixResult {
470            file: rel_path.to_owned(),
471            actions,
472        },
473    ))
474}
475
476// ---------------------------------------------------------------------------
477// Auto-fix
478// ---------------------------------------------------------------------------
479
480/// Maximum Levenshtein distance accepted for an enum-typo fix.
481/// Chosen so that single-letter slips (e.g. "planed" → "planned") are corrected
482/// while unrelated values (e.g. "wip" vs. "in-progress") are left alone.
483const ENUM_TYPO_MAX_DISTANCE: usize = 2;
484
485/// Compute and apply in-memory auto-fixes to `props`. Returns the list of
486/// actions that were taken. Caller is responsible for persisting `props` to
487/// disk when appropriate.
488fn apply_fixes(
489    rel_path: &str,
490    props: &mut IndexMap<String, Value>,
491    schema: &SchemaConfig,
492) -> Vec<FixAction> {
493    let mut actions: Vec<FixAction> = Vec::new();
494
495    // Step 1: infer `type` from filename-template if missing.
496    if !props.contains_key("type")
497        && let Some(inferred) = infer_type_from_path(rel_path, schema)
498    {
499        // Insert `type` at the front of the map so downstream logic picks it up.
500        props.shift_insert(0, "type".to_owned(), Value::String(inferred.clone()));
501        actions.push(FixAction {
502            kind: "infer-type".to_owned(),
503            property: "type".to_owned(),
504            old: None,
505            new: inferred,
506        });
507    }
508
509    // Determine the effective schema after any type inference.
510    let doc_type: Option<String> = props.get("type").and_then(|v| match v {
511        Value::String(s) => Some(s.clone()),
512        _ => None,
513    });
514    let effective_schema: TypeSchema = match &doc_type {
515        Some(t) => schema.merged_schema_for_type(t),
516        None => schema.default_schema().clone(),
517    };
518
519    // Step 2: insert defaults for missing properties.
520    // Iterate in the schema's `required` order first, then any remaining defaults,
521    // so the resulting frontmatter is ordered deterministically.
522    let mut inserted: std::collections::HashSet<String> = std::collections::HashSet::new();
523    for req in &effective_schema.required {
524        if !props.contains_key(req.as_str())
525            && let Some(raw) = effective_schema.defaults.get(req.as_str())
526        {
527            let value = schema::expand_default(raw);
528            props.insert(req.clone(), Value::String(value.clone()));
529            inserted.insert(req.clone());
530            actions.push(FixAction {
531                kind: "insert-default".to_owned(),
532                property: req.clone(),
533                old: None,
534                new: value,
535            });
536        }
537    }
538    // Also honour defaults for properties not listed in `required`.
539    for (name, raw) in &effective_schema.defaults {
540        if inserted.contains(name) || props.contains_key(name.as_str()) {
541            continue;
542        }
543        let value = schema::expand_default(raw);
544        props.insert(name.clone(), Value::String(value.clone()));
545        actions.push(FixAction {
546            kind: "insert-default".to_owned(),
547            property: name.clone(),
548            old: None,
549            new: value,
550        });
551    }
552
553    // Step 3: per-property fixes (enum typos, date normalization).
554    let prop_names: Vec<String> = props.keys().cloned().collect();
555    for name in prop_names {
556        let Some(constraint) = effective_schema.properties.get(name.as_str()) else {
557            continue;
558        };
559        // Snapshot the current value to avoid double-borrowing `props`.
560        let Some(current) = props.get(name.as_str()).cloned() else {
561            continue;
562        };
563        match constraint {
564            PropertyConstraint::Enum { values } => {
565                let Value::String(s) = &current else { continue };
566                if values.iter().any(|v| v == s) {
567                    continue;
568                }
569                if let Some((suggestion, dist)) = values
570                    .iter()
571                    .map(|v| (v, strsim::levenshtein(s, v.as_str())))
572                    .min_by_key(|(_, d)| *d)
573                    && dist <= ENUM_TYPO_MAX_DISTANCE
574                {
575                    let old = s.clone();
576                    let new_value = suggestion.clone();
577                    props.insert(name.clone(), Value::String(new_value.clone()));
578                    actions.push(FixAction {
579                        kind: "fix-enum-typo".to_owned(),
580                        property: name.clone(),
581                        old: Some(old),
582                        new: new_value,
583                    });
584                }
585            }
586            PropertyConstraint::Date => {
587                let Value::String(s) = &current else { continue };
588                if is_iso8601_date(s) {
589                    continue;
590                }
591                if let Some(normalized) = normalize_date(s) {
592                    let old = s.clone();
593                    props.insert(name.clone(), Value::String(normalized.clone()));
594                    actions.push(FixAction {
595                        kind: "normalize-date".to_owned(),
596                        property: name.clone(),
597                        old: Some(old),
598                        new: normalized,
599                    });
600                }
601            }
602            _ => {}
603        }
604    }
605
606    // Step 4: split comma-joined tags (e.g. ["cli,ux"] -> ["cli", "ux"]).
607    if let Some(Value::Array(items)) = props.get("tags") {
608        let needs_fix = items
609            .iter()
610            .any(|v| matches!(v, Value::String(s) if s.contains(',')));
611        if needs_fix {
612            let old_tags: Vec<Value> = items.clone();
613            let new_tags: Vec<Value> = old_tags
614                .iter()
615                .flat_map(|v| match v {
616                    Value::String(s) if s.contains(',') => s
617                        .split(',')
618                        .map(str::trim)
619                        .filter(|p| !p.is_empty())
620                        .map(|p| Value::String(p.to_owned()))
621                        .collect::<Vec<_>>(),
622                    other => vec![other.clone()],
623                })
624                .collect();
625            let old_str = old_tags
626                .iter()
627                .filter_map(|v| v.as_str())
628                .collect::<Vec<_>>()
629                .join(", ");
630            let new_str = new_tags
631                .iter()
632                .filter_map(|v| v.as_str())
633                .collect::<Vec<_>>()
634                .join(", ");
635            props.insert("tags".to_owned(), Value::Array(new_tags));
636            actions.push(FixAction {
637                kind: "split-comma-tags".to_owned(),
638                property: "tags".to_owned(),
639                old: Some(old_str),
640                new: new_str,
641            });
642        }
643    }
644
645    actions
646}
647
648/// Try to infer a `type` value for a file at `rel_path` by matching it against
649/// every `[schema.types.*].filename-template`. Returns `None` if zero or more
650/// than one type matches (ambiguous).
651fn infer_type_from_path(rel_path: &str, schema: &SchemaConfig) -> Option<String> {
652    let mut matches: Vec<String> = Vec::new();
653    for (type_name, ts) in &schema.types {
654        let Some(template_str) = &ts.filename_template else {
655            continue;
656        };
657        let Ok(template) = FilenameTemplate::parse(template_str) else {
658            continue;
659        };
660        if template.matches(rel_path) {
661            matches.push(type_name.clone());
662        }
663    }
664    if matches.len() == 1 {
665        matches.pop()
666    } else {
667        None
668    }
669}
670
671/// Normalize a loose date string to `YYYY-MM-DD`.
672///
673/// Accepts inputs of the form `Y-M-D` where `Y`, `M`, `D` are decimal digit
674/// runs and month/day are in the valid calendar ranges. Returns `None` for
675/// inputs that are ambiguous (e.g. natural-language dates, non-ISO separators,
676/// or out-of-range values); those are reported as violations instead.
677fn normalize_date(s: &str) -> Option<String> {
678    let parts: Vec<&str> = s.split('-').collect();
679    if parts.len() != 3 {
680        return None;
681    }
682    let y = parts[0];
683    let m = parts[1];
684    let d = parts[2];
685    if y.len() != 4 || !y.bytes().all(|b| b.is_ascii_digit()) {
686        return None;
687    }
688    if m.is_empty() || m.len() > 2 || !m.bytes().all(|b| b.is_ascii_digit()) {
689        return None;
690    }
691    if d.is_empty() || d.len() > 2 || !d.bytes().all(|b| b.is_ascii_digit()) {
692        return None;
693    }
694    let yi: i32 = y.parse().ok()?;
695    let mi: u32 = m.parse().ok()?;
696    let di: u32 = d.parse().ok()?;
697    if !(1..=12).contains(&mi) {
698        return None;
699    }
700    let max_day = match mi {
701        1 | 3 | 5 | 7 | 8 | 10 | 12 => 31,
702        4 | 6 | 9 | 11 => 30,
703        2 => {
704            let leap = (yi % 4 == 0 && yi % 100 != 0) || (yi % 400 == 0);
705            if leap { 29 } else { 28 }
706        }
707        _ => return None,
708    };
709    if !(1..=max_day).contains(&di) {
710        return None;
711    }
712    Some(format!("{y}-{mi:02}-{di:02}"))
713}
714
715/// Core property validation logic.
716///
717/// Separated so it can be used both by the disk-reading path (`lint_file`) and
718/// the index-based path (`lint_counts_from_properties`).
719fn validate_properties(
720    _rel_path: &str,
721    properties: &IndexMap<String, Value>,
722    has_tags: bool,
723    schema: &SchemaConfig,
724) -> Vec<Violation> {
725    let mut violations: Vec<Violation> = Vec::new();
726
727    // Determine the document type.
728    let type_value = properties.get("type");
729    let doc_type: Option<String> = type_value.and_then(|v| match v {
730        Value::String(s) => Some(s.clone()),
731        _ => None,
732    });
733
734    // If `type` is present but not a string, report an error. A non-string `type`
735    // still satisfies a bare `required = ["type"]` check, so without this error
736    // invalid type values would slip through silently.
737    if let Some(v) = type_value
738        && doc_type.is_none()
739    {
740        violations.push(Violation {
741            severity: Severity::Error,
742            message: format!("property \"type\" expected string, got {v}"),
743        });
744    }
745
746    // Warn when no `type` property is present.
747    if type_value.is_none() && !schema.is_empty() {
748        violations.push(Violation {
749            severity: Severity::Warn,
750            message: "no 'type' property — validating against default schema only".to_owned(),
751        });
752    }
753
754    // Determine the effective schema for this file.
755    let effective_schema: TypeSchema = match &doc_type {
756        Some(t) => schema.merged_schema_for_type(t),
757        None => schema.default_schema().clone(),
758    };
759
760    // Check required properties.
761    for req in &effective_schema.required {
762        if !properties.contains_key(req.as_str()) {
763            let type_hint = doc_type
764                .as_deref()
765                .map(|t| format!(" (type: {t})"))
766                .unwrap_or_default();
767            violations.push(Violation {
768                severity: Severity::Error,
769                message: format!("missing required property \"{req}\"{type_hint}"),
770            });
771        }
772    }
773
774    // Warn when no `tags` property is present and the schema has at least one type defined.
775    if !has_tags && !schema.types.is_empty() {
776        violations.push(Violation {
777            severity: Severity::Warn,
778            message: "no tags defined".to_owned(),
779        });
780    }
781
782    // Build a per-call regex cache so the same pattern isn't recompiled across
783    // properties (this matters in `hyalo summary`, which runs lint over the full
784    // index).
785    let mut regex_cache: HashMap<String, Result<Regex, String>> = HashMap::new();
786
787    // Type-specific property constraint validation.
788    for (name, value) in properties {
789        // `tags` is validated against its declared constraint if present, but we
790        // never emit an "undeclared property" warning for it (it has its own
791        // "no tags defined" warning above).
792        if name == "tags" {
793            if let Some(constraint) = effective_schema.properties.get(name.as_str())
794                && let Some(v) = validate_constraint(name, value, constraint, &mut regex_cache)
795            {
796                violations.push(v);
797            }
798            // Check for comma-joined tags (e.g. "cli,ux" instead of ["cli", "ux"]).
799            if let Value::Array(items) = value {
800                for item in items {
801                    if let Value::String(tag) = item
802                        && tag.contains(',')
803                    {
804                        violations.push(Violation {
805                            severity: Severity::Warn,
806                            message: format!(
807                                "tag \"{tag}\" appears to be comma-joined -- should be separate list items"
808                            ),
809                        });
810                    }
811                }
812            }
813            continue;
814        }
815        // Never warn about "type" (type discriminator) or properties listed in `required`
816        // — they're implicitly accepted even if not in the `properties` map.
817        let implicitly_accepted = name == "type" || effective_schema.required.contains(name);
818
819        if let Some(constraint) = effective_schema.properties.get(name.as_str()) {
820            if let Some(v) = validate_constraint(name, value, constraint, &mut regex_cache) {
821                violations.push(v);
822            }
823        } else if !effective_schema.properties.is_empty() && !implicitly_accepted {
824            // Property not declared in schema — warn only when the schema declares
825            // some properties. Schemas that only specify `required` remain
826            // intentionally permissive about extra fields.
827            violations.push(Violation {
828                severity: Severity::Warn,
829                message: format!("property \"{name}\" is not declared in schema"),
830            });
831        }
832    }
833
834    violations
835}
836
837// ---------------------------------------------------------------------------
838// Constraint validators
839// ---------------------------------------------------------------------------
840
841fn validate_constraint(
842    name: &str,
843    value: &Value,
844    constraint: &PropertyConstraint,
845    regex_cache: &mut HashMap<String, Result<Regex, String>>,
846) -> Option<Violation> {
847    match constraint {
848        PropertyConstraint::String { pattern } => {
849            let Some(s) = value_as_str(value) else {
850                return Some(Violation {
851                    severity: Severity::Error,
852                    message: format!("property \"{name}\" expected string, got {value}"),
853                });
854            };
855            if let Some(pat) = pattern {
856                // Compile (or look up) the regex once per pattern per call.
857                let entry = regex_cache
858                    .entry(pat.clone())
859                    .or_insert_with(|| Regex::new(pat).map_err(|e| e.to_string()));
860                match entry {
861                    Ok(re) => {
862                        if !re.is_match(s) {
863                            return Some(Violation {
864                                severity: Severity::Error,
865                                message: format!(
866                                    "property \"{name}\" value {s:?} does not match pattern {pat:?}"
867                                ),
868                            });
869                        }
870                    }
871                    Err(e) => {
872                        return Some(Violation {
873                            severity: Severity::Error,
874                            message: format!("property \"{name}\": invalid pattern {pat:?}: {e}"),
875                        });
876                    }
877                }
878            }
879            None
880        }
881        PropertyConstraint::Date => {
882            let Some(s) = value_as_str(value) else {
883                return Some(Violation {
884                    severity: Severity::Error,
885                    message: format!("property \"{name}\" expected date (YYYY-MM-DD), got {value}"),
886                });
887            };
888            if !is_iso8601_date(s) {
889                return Some(Violation {
890                    severity: Severity::Error,
891                    message: format!("property \"{name}\" expected date (YYYY-MM-DD), got \"{s}\""),
892                });
893            }
894            None
895        }
896        PropertyConstraint::Number => {
897            if !matches!(value, Value::Number(_)) {
898                return Some(Violation {
899                    severity: Severity::Error,
900                    message: format!("property \"{name}\" expected number, got {value}"),
901                });
902            }
903            None
904        }
905        PropertyConstraint::Boolean => {
906            if !matches!(value, Value::Bool(_)) {
907                return Some(Violation {
908                    severity: Severity::Error,
909                    message: format!("property \"{name}\" expected boolean, got {value}"),
910                });
911            }
912            None
913        }
914        PropertyConstraint::List => {
915            if !matches!(value, Value::Array(_)) {
916                return Some(Violation {
917                    severity: Severity::Error,
918                    message: format!("property \"{name}\" expected list, got {value}"),
919                });
920            }
921            None
922        }
923        PropertyConstraint::Enum { values } => {
924            let Some(s) = value_as_str(value) else {
925                return Some(Violation {
926                    severity: Severity::Error,
927                    message: format!(
928                        "property \"{name}\" expected one of [{}], got {value}",
929                        values.join(", ")
930                    ),
931                });
932            };
933            if values.contains(&s.to_owned()) {
934                return None;
935            }
936            // Find nearest suggestion via Levenshtein.
937            let suggestion = values
938                .iter()
939                .min_by_key(|v| strsim::levenshtein(s, v.as_str()))
940                .map(|v| format!(" (did you mean \"{v}\"?)"))
941                .unwrap_or_default();
942            Some(Violation {
943                severity: Severity::Error,
944                message: format!(
945                    "property \"{name}\" value \"{s}\" not in [{}]{suggestion}",
946                    values.join(", ")
947                ),
948            })
949        }
950    }
951}
952
953/// Extract a `&str` from a `Value::String`, or `None` for other variants.
954fn value_as_str(v: &Value) -> Option<&str> {
955    if let Value::String(s) = v {
956        Some(s.as_str())
957    } else {
958        None
959    }
960}
961
962// ---------------------------------------------------------------------------
963// Public validation helper (used by set/append --validate)
964// ---------------------------------------------------------------------------
965
966/// Validate a single property value against a constraint without a shared regex cache.
967///
968/// Returns `Some(error_message)` when the value violates the constraint, `None`
969/// when it is valid. Regex patterns are compiled fresh for each call — use the
970/// private [`validate_constraint`] with a shared cache in hot paths.
971pub fn validate_constraint_simple(
972    name: &str,
973    value: &Value,
974    constraint: &PropertyConstraint,
975) -> Option<String> {
976    let mut cache = HashMap::new();
977    validate_constraint(name, value, constraint, &mut cache).map(|v| v.message)
978}
979
980// ---------------------------------------------------------------------------
981// Text formatter
982// ---------------------------------------------------------------------------
983
984// ---------------------------------------------------------------------------
985// Unit tests
986// ---------------------------------------------------------------------------
987
988#[cfg(test)]
989mod tests {
990    use super::*;
991    use hyalo_core::schema::{PropertyConstraint, SchemaConfig, TypeSchema};
992    use std::collections::HashMap;
993
994    fn make_schema(
995        default_required: &[&str],
996        type_name: &str,
997        type_required: &[&str],
998        type_properties: HashMap<&str, PropertyConstraint>,
999    ) -> SchemaConfig {
1000        let default = TypeSchema {
1001            required: default_required.iter().map(ToString::to_string).collect(),
1002            ..Default::default()
1003        };
1004        let mut props: HashMap<String, PropertyConstraint> = HashMap::new();
1005        for (k, v) in type_properties {
1006            props.insert(k.to_owned(), v);
1007        }
1008        let type_schema = TypeSchema {
1009            required: type_required.iter().map(ToString::to_string).collect(),
1010            properties: props,
1011            ..Default::default()
1012        };
1013        let mut types = HashMap::new();
1014        types.insert(type_name.to_owned(), type_schema);
1015        SchemaConfig { default, types }
1016    }
1017
1018    // --- is_iso8601_date ---
1019
1020    #[test]
1021    fn valid_date() {
1022        assert!(is_iso8601_date("2026-04-13"));
1023    }
1024
1025    #[test]
1026    fn normalize_date_padding_and_calendar() {
1027        // Short month/day get zero-padded.
1028        assert_eq!(normalize_date("2026-4-9"), Some("2026-04-09".to_owned()));
1029        // Feb 29 is valid in leap years only.
1030        assert_eq!(normalize_date("2024-2-29"), Some("2024-02-29".to_owned()));
1031        assert_eq!(normalize_date("2023-2-29"), None);
1032        // Out-of-range days/months are rejected, not silently normalized.
1033        assert_eq!(normalize_date("2026-02-31"), None);
1034        assert_eq!(normalize_date("2026-04-31"), None);
1035        assert_eq!(normalize_date("2026-13-01"), None);
1036    }
1037
1038    #[test]
1039    fn invalid_date_format() {
1040        assert!(!is_iso8601_date("April 13"));
1041        assert!(!is_iso8601_date("13-04-2026"));
1042        assert!(!is_iso8601_date("2026/04/13"));
1043    }
1044
1045    // Test helper: wraps `validate_constraint` with a throwaway regex cache.
1046    fn vc(name: &str, value: &Value, c: &PropertyConstraint) -> Option<Violation> {
1047        let mut cache = HashMap::new();
1048        validate_constraint(name, value, c, &mut cache)
1049    }
1050
1051    // --- validate_constraint ---
1052
1053    #[test]
1054    fn date_constraint_valid() {
1055        let v = vc(
1056            "date",
1057            &Value::String("2026-04-13".into()),
1058            &PropertyConstraint::Date,
1059        );
1060        assert!(v.is_none());
1061    }
1062
1063    #[test]
1064    fn date_constraint_invalid() {
1065        let v = vc(
1066            "date",
1067            &Value::String("April 13".into()),
1068            &PropertyConstraint::Date,
1069        );
1070        assert!(matches!(
1071            v,
1072            Some(Violation {
1073                severity: Severity::Error,
1074                ..
1075            })
1076        ));
1077    }
1078
1079    #[test]
1080    fn enum_constraint_valid() {
1081        let v = vc(
1082            "status",
1083            &Value::String("planned".into()),
1084            &PropertyConstraint::Enum {
1085                values: vec!["planned".into(), "done".into()],
1086            },
1087        );
1088        assert!(v.is_none());
1089    }
1090
1091    #[test]
1092    fn enum_constraint_invalid_with_suggestion() {
1093        let v = vc(
1094            "status",
1095            &Value::String("planed".into()),
1096            &PropertyConstraint::Enum {
1097                values: vec!["planned".into(), "done".into()],
1098            },
1099        );
1100        let viol = v.expect("expected violation");
1101        assert_eq!(viol.severity, Severity::Error);
1102        assert!(viol.message.contains("did you mean \"planned\""));
1103    }
1104
1105    #[test]
1106    fn number_constraint_valid() {
1107        let v = vc(
1108            "priority",
1109            &Value::Number(5.into()),
1110            &PropertyConstraint::Number,
1111        );
1112        assert!(v.is_none());
1113    }
1114
1115    #[test]
1116    fn number_constraint_invalid() {
1117        let v = vc(
1118            "priority",
1119            &Value::String("five".into()),
1120            &PropertyConstraint::Number,
1121        );
1122        assert!(matches!(
1123            v,
1124            Some(Violation {
1125                severity: Severity::Error,
1126                ..
1127            })
1128        ));
1129    }
1130
1131    #[test]
1132    fn boolean_constraint_valid() {
1133        let v = vc("draft", &Value::Bool(true), &PropertyConstraint::Boolean);
1134        assert!(v.is_none());
1135    }
1136
1137    #[test]
1138    fn boolean_constraint_invalid() {
1139        let v = vc(
1140            "draft",
1141            &Value::String("yes".into()),
1142            &PropertyConstraint::Boolean,
1143        );
1144        assert!(matches!(
1145            v,
1146            Some(Violation {
1147                severity: Severity::Error,
1148                ..
1149            })
1150        ));
1151    }
1152
1153    #[test]
1154    fn list_constraint_valid() {
1155        let v = vc("tags", &Value::Array(vec![]), &PropertyConstraint::List);
1156        assert!(v.is_none());
1157    }
1158
1159    #[test]
1160    fn list_constraint_invalid() {
1161        let v = vc(
1162            "tags",
1163            &Value::String("rust".into()),
1164            &PropertyConstraint::List,
1165        );
1166        assert!(matches!(
1167            v,
1168            Some(Violation {
1169                severity: Severity::Error,
1170                ..
1171            })
1172        ));
1173    }
1174
1175    #[test]
1176    fn string_pattern_constraint_valid() {
1177        let v = vc(
1178            "branch",
1179            &Value::String("iter-42/my-feature".into()),
1180            &PropertyConstraint::String {
1181                pattern: Some(r"^iter-\d+/".into()),
1182            },
1183        );
1184        assert!(v.is_none());
1185    }
1186
1187    #[test]
1188    fn string_pattern_constraint_invalid() {
1189        let v = vc(
1190            "branch",
1191            &Value::String("feature/my-branch".into()),
1192            &PropertyConstraint::String {
1193                pattern: Some(r"^iter-\d+/".into()),
1194            },
1195        );
1196        assert!(matches!(
1197            v,
1198            Some(Violation {
1199                severity: Severity::Error,
1200                ..
1201            })
1202        ));
1203    }
1204
1205    // --- lint_file via a temp file ---
1206
1207    #[test]
1208    fn lint_file_missing_required() {
1209        let dir = tempfile::tempdir().unwrap();
1210        let path = dir.path().join("note.md");
1211        std::fs::write(&path, "---\ntitle: Hello\n---\nBody\n").unwrap();
1212
1213        let schema = make_schema(&["title", "date"], "note", &[], HashMap::new());
1214        let result = lint_file(&path, "note.md", &schema).unwrap();
1215        // date is in default required, but only "title" is present.
1216        // No type -> warn about no type. date missing -> error.
1217        assert!(
1218            result
1219                .violations
1220                .iter()
1221                .any(|v| v.severity == Severity::Error
1222                    && v.message.contains("missing required property \"date\""))
1223        );
1224    }
1225
1226    #[test]
1227    fn lint_file_no_type_warn() {
1228        let dir = tempfile::tempdir().unwrap();
1229        let path = dir.path().join("note.md");
1230        std::fs::write(&path, "---\ntitle: Hello\n---\nBody\n").unwrap();
1231
1232        let schema = make_schema(&["title"], "note", &[], HashMap::new());
1233        let result = lint_file(&path, "note.md", &schema).unwrap();
1234        assert!(
1235            result
1236                .violations
1237                .iter()
1238                .any(|v| v.severity == Severity::Warn && v.message.contains("no 'type' property"))
1239        );
1240    }
1241
1242    #[test]
1243    fn lint_file_no_violations_clean_file() {
1244        let dir = tempfile::tempdir().unwrap();
1245        let path = dir.path().join("note.md");
1246        std::fs::write(
1247            &path,
1248            "---\ntitle: Hello\ntype: note\ntags:\n  - rust\n---\nBody\n",
1249        )
1250        .unwrap();
1251
1252        let schema = make_schema(&["title"], "note", &[], HashMap::new());
1253        let result = lint_file(&path, "note.md", &schema).unwrap();
1254        assert!(result.violations.is_empty());
1255    }
1256
1257    #[test]
1258    fn lint_no_schema_no_violations() {
1259        let dir = tempfile::tempdir().unwrap();
1260        let path = dir.path().join("note.md");
1261        std::fs::write(&path, "---\ntitle: Hello\n---\nBody\n").unwrap();
1262
1263        let schema = SchemaConfig::default();
1264        let files = vec![(path, "note.md".to_owned())];
1265        let (_, counts) = lint_files(&files, &schema).unwrap();
1266        assert_eq!(counts.errors, 0);
1267        assert_eq!(counts.warnings, 0);
1268    }
1269
1270    // --- UX-3: comma-joined tag detection and fix ---
1271
1272    #[test]
1273    fn lint_warns_on_comma_joined_tag() {
1274        let dir = tempfile::tempdir().unwrap();
1275        let path = dir.path().join("note.md");
1276        std::fs::write(
1277            &path,
1278            "---\ntitle: Hello\ntags:\n  - cli,ux\n  - rust\n---\nBody\n",
1279        )
1280        .unwrap();
1281
1282        let schema = SchemaConfig::default();
1283        let result = lint_file(&path, "note.md", &schema).unwrap();
1284        let comma_warn = result
1285            .violations
1286            .iter()
1287            .find(|v| v.severity == Severity::Warn && v.message.contains("cli,ux"));
1288        assert!(
1289            comma_warn.is_some(),
1290            "expected a warning about comma-joined tag, got: {:#?}",
1291            result.violations
1292        );
1293        assert!(
1294            comma_warn.unwrap().message.contains("comma-joined"),
1295            "message should mention comma-joined"
1296        );
1297    }
1298
1299    #[test]
1300    fn lint_fix_splits_comma_joined_tags() {
1301        let dir = tempfile::tempdir().unwrap();
1302        let path = dir.path().join("note.md");
1303        std::fs::write(
1304            &path,
1305            "---\ntitle: Hello\ntags:\n  - cli,ux\n  - rust\n---\nBody\n",
1306        )
1307        .unwrap();
1308
1309        let schema = SchemaConfig::default();
1310        let files = vec![(path.clone(), "note.md".to_owned())];
1311        let (_, counts) = lint_files_with_options(&files, &schema, FixMode::Apply, None).unwrap();
1312
1313        // After fix, the comma-joined tag warning should be gone.
1314        assert_eq!(counts.warnings, 0, "comma-tag warning should be fixed");
1315
1316        let content = std::fs::read_to_string(&path).unwrap();
1317        // Both parts of the split tag should be separate items.
1318        assert!(content.contains("- cli"), "expected 'cli' as separate tag");
1319        assert!(content.contains("- ux"), "expected 'ux' as separate tag");
1320        // The original comma-joined form must be gone.
1321        assert!(
1322            !content.contains("cli,ux"),
1323            "comma-joined tag should be removed"
1324        );
1325    }
1326}