Skip to main content

hyalo_cli/commands/
lint.rs

1/// `hyalo lint` — validate frontmatter properties against the `.hyalo.toml` schema.
2///
3/// Reads each file's frontmatter, applies the type-specific schema (or the
4/// default schema if `type` is absent), and reports violations at two severity
5/// levels:
6///
7///   - **error**  — schema violation (missing required field, wrong value type,
8///     invalid enum value, failed pattern match)
9///   - **warn**   — soft issue (no `type` property, no `tags` property, property
10///     not declared in schema)
11///
12/// Exit code: 0 = clean, 1 = errors found, 2 = internal error.
13use std::collections::HashMap;
14use std::path::Path;
15
16use anyhow::{Context, Result};
17use indexmap::IndexMap;
18use regex::Regex;
19use serde::Serialize;
20use serde_json::Value;
21
22use hyalo_core::filename_template::FilenameTemplate;
23use hyalo_core::frontmatter::{read_frontmatter, write_frontmatter};
24use hyalo_core::schema::{self, PropertyConstraint, SchemaConfig, TypeSchema};
25use hyalo_core::util::is_iso8601_date;
26
27use crate::output::{CommandOutcome, Format, format_success};
28
29// ---------------------------------------------------------------------------
30// Types
31// ---------------------------------------------------------------------------
32
33/// Severity of a single lint violation.
34#[derive(Debug, Clone, PartialEq, Eq, Serialize)]
35#[serde(rename_all = "snake_case")]
36pub enum Severity {
37    Error,
38    Warn,
39}
40
41impl std::fmt::Display for Severity {
42    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
43        match self {
44            Self::Error => f.write_str("error"),
45            Self::Warn => f.write_str("warn"),
46        }
47    }
48}
49
50/// A single lint violation found in a file.
51#[derive(Debug, Clone, Serialize)]
52pub struct Violation {
53    pub severity: Severity,
54    pub message: String,
55}
56
57/// Lint results for a single file.
58#[derive(Debug, Serialize)]
59pub struct FileLintResult {
60    pub file: String,
61    pub violations: Vec<Violation>,
62}
63
64/// A single auto-fix that was (or would be) applied.
65#[derive(Debug, Clone, Serialize)]
66pub struct FixAction {
67    /// Kind of fix: "insert-default", "fix-enum-typo", "normalize-date", "infer-type".
68    pub kind: String,
69    /// Frontmatter property affected.
70    pub property: String,
71    /// Old value (if any) — omitted for inserted properties.
72    #[serde(skip_serializing_if = "Option::is_none")]
73    pub old: Option<String>,
74    /// New value applied (or previewed with --dry-run).
75    pub new: String,
76}
77
78/// Aggregated lint output.
79///
80/// The `files` field is renamed from the internal `results` to avoid a
81/// confusing `results.results` nesting once the CLI envelope wraps the payload.
82#[derive(Debug, Serialize)]
83pub struct LintOutput {
84    pub files: Vec<FileLintResult>,
85    /// Total number of violations found across all files.
86    pub total: usize,
87    /// Number of error-severity violations across all files (not limited by `--limit`).
88    pub errors: usize,
89    /// Number of warn-severity violations across all files (not limited by `--limit`).
90    pub warnings: usize,
91    /// Number of files with at least one violation (not limited by `--limit`).
92    pub files_with_issues: usize,
93    /// Number of files that were checked.
94    pub files_checked: usize,
95    /// Fixes that were applied (or previewed) per file. Omitted when no
96    /// `--fix` run produced any changes.
97    #[serde(skip_serializing_if = "Vec::is_empty")]
98    pub fixes: Vec<FileFixResult>,
99    /// `true` when `--dry-run` was passed and fixes were not written.
100    #[serde(skip_serializing_if = "std::ops::Not::not")]
101    pub dry_run: bool,
102    /// `true` when `--limit` truncated the file list.
103    #[serde(skip_serializing_if = "std::ops::Not::not")]
104    pub limited: bool,
105}
106
107/// Fixes applied to a single file.
108#[derive(Debug, Clone, Serialize)]
109pub struct FileFixResult {
110    pub file: String,
111    pub actions: Vec<FixAction>,
112}
113
114/// Summary counts returned to callers (e.g. `hyalo summary`).
115#[derive(Debug, Clone, Default)]
116pub struct LintCounts {
117    pub errors: usize,
118    pub warnings: usize,
119    /// Number of files with at least one violation.
120    pub files_with_issues: usize,
121}
122
123// ---------------------------------------------------------------------------
124// Public API
125// ---------------------------------------------------------------------------
126
127/// Run `hyalo lint` against a list of `(full_path, rel_path)` file pairs.
128///
129/// Returns the formatted output and the set of counts; the caller is
130/// responsible for translating counts into an exit code.
131pub fn lint_files(
132    files: &[(std::path::PathBuf, String)],
133    schema: &SchemaConfig,
134) -> Result<(CommandOutcome, LintCounts)> {
135    lint_files_with_options(files, schema, FixMode::Off, None, &mut None, None)
136}
137
138/// Prepend an additional `FileLintResult` (e.g. `.hyalo.toml` view violations)
139/// to the outcome produced by [`lint_files_with_options`]. Adjusts the totals
140/// and the `files_with_issues` counter in the serialized payload to stay
141/// consistent with the new entry.
142pub fn prepend_file_result(
143    outcome: CommandOutcome,
144    extra: &FileLintResult,
145) -> Result<CommandOutcome> {
146    let (payload, total) = match outcome {
147        CommandOutcome::Success { output, total } => (output, total),
148        other => return Ok(other),
149    };
150
151    let mut value: serde_json::Value =
152        serde_json::from_str(&payload).context("failed to re-parse lint output JSON")?;
153
154    if let Some(obj) = value.as_object_mut() {
155        let extra_errors = extra
156            .violations
157            .iter()
158            .filter(|v| matches!(v.severity, Severity::Error))
159            .count();
160        let extra_warnings = extra.violations.len() - extra_errors;
161
162        if let Some(files) = obj.get_mut("files").and_then(|f| f.as_array_mut()) {
163            let extra_value = serde_json::to_value(extra)
164                .context("failed to serialize .hyalo.toml lint result")?;
165            files.insert(0, extra_value);
166        }
167        if let Some(n) = obj.get_mut("total").and_then(|v| v.as_u64()) {
168            obj.insert(
169                "total".to_string(),
170                serde_json::Value::from(n + extra.violations.len() as u64),
171            );
172        }
173        if let Some(n) = obj.get_mut("errors").and_then(|v| v.as_u64()) {
174            obj.insert(
175                "errors".to_string(),
176                serde_json::Value::from(n + extra_errors as u64),
177            );
178        }
179        if let Some(n) = obj.get_mut("warnings").and_then(|v| v.as_u64()) {
180            obj.insert(
181                "warnings".to_string(),
182                serde_json::Value::from(n + extra_warnings as u64),
183            );
184        }
185        if let Some(n) = obj.get_mut("files_with_issues").and_then(|v| v.as_u64()) {
186            obj.insert(
187                "files_with_issues".to_string(),
188                serde_json::Value::from(n + 1),
189            );
190        }
191    }
192
193    let new_payload = format_success(Format::Json, &value);
194    // The outcome's `total` (used by `--count`) tracks files-with-issues —
195    // bump it by 1 when the prepended pseudo-file has at least one violation,
196    // so `--count` stays in sync with `files_with_issues` in the JSON payload.
197    let extra_counts_toward_total = !extra.violations.is_empty();
198    Ok(match total {
199        Some(t) => CommandOutcome::success_with_total(
200            new_payload,
201            if extra_counts_toward_total { t + 1 } else { t },
202        ),
203        None => CommandOutcome::success(new_payload),
204    })
205}
206
207/// Validate `.hyalo.toml` view definitions and return a pseudo-file lint
208/// result when at least one view looks suspicious.
209///
210/// Current checks:
211/// - Views whose only narrowing mechanism is `fields = ["backlinks"]` or
212///   similar — `fields` controls display columns, not filtering, so such a
213///   view matches every file. The likely intent is `orphan = true`.
214///
215/// Returns `None` when there is nothing to report.
216pub fn validate_views(dir: &Path) -> Option<FileLintResult> {
217    // Keys that actually *narrow* the result set.
218    const NARROWING_KEYS: &[&str] = &[
219        "pattern",
220        "regexp",
221        "properties",
222        "tag",
223        "task",
224        "sections",
225        "file",
226        "glob",
227        "broken_links",
228        "orphan",
229        "dead_end",
230        "title",
231        "language",
232    ];
233
234    let toml_path = dir.join(".hyalo.toml");
235    let contents = std::fs::read_to_string(&toml_path).ok()?;
236    let table: toml::Table = toml::from_str(&contents).ok()?;
237    let Some(toml::Value::Table(views_table)) = table.get("views") else {
238        return None;
239    };
240
241    let mut violations: Vec<Violation> = Vec::new();
242    for (name, value) in views_table {
243        let Some(view_tbl) = value.as_table() else {
244            continue;
245        };
246
247        let has_narrowing = view_tbl.iter().any(|(k, v)| {
248            if !NARROWING_KEYS.contains(&k.as_str()) {
249                return false;
250            }
251            // Treat `orphan = false` / `dead_end = false` as non-narrowing.
252            if matches!(k.as_str(), "orphan" | "dead_end" | "broken_links") {
253                return matches!(v, toml::Value::Boolean(true));
254            }
255            // List-typed narrowing keys with empty values don't narrow either.
256            if let toml::Value::Array(a) = v {
257                return !a.is_empty();
258            }
259            true
260        });
261
262        let has_fields = view_tbl.contains_key("fields");
263
264        if !has_narrowing && has_fields {
265            violations.push(Violation {
266                severity: Severity::Warn,
267                message: format!(
268                    "view '{name}' has no narrowing filter — `fields` controls display columns only, \
269                     not filtering. Did you mean `orphan = true` or `dead_end = true`?"
270                ),
271            });
272        } else if !has_narrowing {
273            violations.push(Violation {
274                severity: Severity::Warn,
275                message: format!(
276                    "view '{name}' has no narrowing filter — add at least one of: \
277                     tag, properties, task, orphan, dead_end, broken_links, glob, file, title"
278                ),
279            });
280        }
281    }
282
283    if violations.is_empty() {
284        None
285    } else {
286        Some(FileLintResult {
287            file: ".hyalo.toml".to_string(),
288            violations,
289        })
290    }
291}
292
293/// Whether — and how — `lint_files_with_options` should apply auto-fixes.
294#[derive(Debug, Clone, Copy, PartialEq, Eq)]
295pub enum FixMode {
296    /// Read-only: do not attempt to fix anything.
297    Off,
298    /// Apply fixes in memory and write them back to disk.
299    Apply,
300    /// Compute the fixes that would be applied but don't write any files.
301    DryRun,
302}
303
304/// Run lint with the given fix mode.
305///
306/// When `fix` is `Apply`, repairable violations are written back to each file
307/// before the final counts are computed, so the returned counts reflect only
308/// the violations that *remain* after fixing. With `DryRun`, counts reflect
309/// the post-fix state but files are untouched.
310pub fn lint_files_with_options(
311    files: &[(std::path::PathBuf, String)],
312    schema: &SchemaConfig,
313    fix: FixMode,
314    limit: Option<usize>,
315    snapshot_index: &mut Option<hyalo_core::index::SnapshotIndex>,
316    index_path: Option<&Path>,
317) -> Result<(CommandOutcome, LintCounts)> {
318    let mut results: Vec<FileLintResult> = Vec::new();
319    let mut counts = LintCounts::default();
320    let mut fix_results: Vec<FileFixResult> = Vec::new();
321    let mut index_dirty = false;
322
323    for (full_path, rel_path) in files {
324        let (file_result, file_fixes) = lint_file_with_fix(full_path, rel_path, schema, fix)?;
325        for v in &file_result.violations {
326            match v.severity {
327                Severity::Error => counts.errors += 1,
328                Severity::Warn => counts.warnings += 1,
329            }
330        }
331        if !file_result.violations.is_empty() {
332            counts.files_with_issues += 1;
333        }
334        if !file_fixes.actions.is_empty() {
335            // If fixes were actually applied, update the snapshot index entry.
336            if matches!(fix, FixMode::Apply) {
337                let props = read_frontmatter(full_path)
338                    .with_context(|| format!("reading fixed frontmatter from {rel_path}"))?;
339                super::mutation::update_index_entry(
340                    snapshot_index,
341                    rel_path,
342                    props,
343                    full_path,
344                    &mut index_dirty,
345                )?;
346            }
347            fix_results.push(file_fixes);
348        }
349        if !file_result.violations.is_empty() {
350            results.push(file_result);
351        }
352    }
353
354    super::mutation::save_index_if_dirty(snapshot_index, index_path, index_dirty)?;
355
356    let files_checked = files.len();
357    let total = counts.errors + counts.warnings;
358    let limited = limit.is_some_and(|n| results.len() > n);
359    if let Some(n) = limit {
360        results.truncate(n);
361    }
362    let output = LintOutput {
363        files: results,
364        total,
365        errors: counts.errors,
366        warnings: counts.warnings,
367        files_with_issues: counts.files_with_issues,
368        files_checked,
369        fixes: fix_results,
370        dry_run: matches!(fix, FixMode::DryRun),
371        limited,
372    };
373
374    let val = serde_json::to_value(&output).context("failed to serialize lint output")?;
375    // Use success_with_total so that `--count` returns the number of files with issues.
376    let outcome = CommandOutcome::success_with_total(
377        format_success(Format::Json, &val),
378        counts.files_with_issues as u64,
379    );
380
381    Ok((outcome, counts))
382}
383
384/// Compute lint counts for `hyalo summary` without formatting output.
385pub fn lint_counts_only(
386    files: &[(std::path::PathBuf, String)],
387    schema: &SchemaConfig,
388) -> Result<LintCounts> {
389    let mut counts = LintCounts::default();
390    for (full_path, rel_path) in files {
391        let file_result = lint_file(full_path, rel_path, schema)?;
392        for v in &file_result.violations {
393            match v.severity {
394                Severity::Error => counts.errors += 1,
395                Severity::Warn => counts.warnings += 1,
396            }
397        }
398        if !file_result.violations.is_empty() {
399            counts.files_with_issues += 1;
400        }
401    }
402    Ok(counts)
403}
404
405/// Compute lint counts from pre-indexed `IndexEntry` properties.
406///
407/// Used by `hyalo summary` to avoid re-reading files from disk.
408/// The `index_entries` iterator yields `(rel_path, properties, has_tags)` tuples.
409pub fn lint_counts_from_properties<'a>(
410    entries: impl Iterator<Item = (&'a str, &'a IndexMap<String, Value>, bool)>,
411    schema: &SchemaConfig,
412) -> LintCounts {
413    let mut counts = LintCounts::default();
414    for (rel_path, properties, has_tags) in entries {
415        let violations = validate_properties(rel_path, properties, has_tags, schema);
416        for v in &violations {
417            match v.severity {
418                Severity::Error => counts.errors += 1,
419                Severity::Warn => counts.warnings += 1,
420            }
421        }
422        if !violations.is_empty() {
423            counts.files_with_issues += 1;
424        }
425    }
426    counts
427}
428
429// ---------------------------------------------------------------------------
430// Per-file validation
431// ---------------------------------------------------------------------------
432
433fn lint_file(full_path: &Path, rel_path: &str, schema: &SchemaConfig) -> Result<FileLintResult> {
434    let (result, _) = lint_file_with_fix(full_path, rel_path, schema, FixMode::Off)?;
435    Ok(result)
436}
437
438/// Lint a single file, optionally applying auto-fixes.
439fn lint_file_with_fix(
440    full_path: &Path,
441    rel_path: &str,
442    schema: &SchemaConfig,
443    fix: FixMode,
444) -> Result<(FileLintResult, FileFixResult)> {
445    let properties = match read_frontmatter(full_path) {
446        Ok(props) => props,
447        Err(e) if hyalo_core::frontmatter::is_parse_error(&e) => {
448            // Malformed frontmatter — report as a single error violation.
449            return Ok((
450                FileLintResult {
451                    file: rel_path.to_owned(),
452                    violations: vec![Violation {
453                        severity: Severity::Error,
454                        message: format!("{}: {e}", crate::hints::PARSE_ERROR_PREFIX),
455                    }],
456                },
457                FileFixResult {
458                    file: rel_path.to_owned(),
459                    actions: Vec::new(),
460                },
461            ));
462        }
463        Err(e) => return Err(e).context(format!("reading {rel_path}")),
464    };
465
466    // Apply fixes in memory (or dry-run) before final validation.
467    let (final_props, actions) = if matches!(fix, FixMode::Apply | FixMode::DryRun) {
468        let mut mutable = properties.clone();
469        let actions = apply_fixes(rel_path, &mut mutable, schema);
470        if matches!(fix, FixMode::Apply) && !actions.is_empty() {
471            write_frontmatter(full_path, &mutable)
472                .with_context(|| format!("writing fixed frontmatter to {rel_path}"))?;
473        }
474        (mutable, actions)
475    } else {
476        (properties, Vec::new())
477    };
478
479    let has_tags = final_props.contains_key("tags");
480    let violations = validate_properties(rel_path, &final_props, has_tags, schema);
481    Ok((
482        FileLintResult {
483            file: rel_path.to_owned(),
484            violations,
485        },
486        FileFixResult {
487            file: rel_path.to_owned(),
488            actions,
489        },
490    ))
491}
492
493// ---------------------------------------------------------------------------
494// Auto-fix
495// ---------------------------------------------------------------------------
496
497/// Maximum Levenshtein distance accepted for an enum-typo fix.
498/// Chosen so that single-letter slips (e.g. "planed" → "planned") are corrected
499/// while unrelated values (e.g. "wip" vs. "in-progress") are left alone.
500const ENUM_TYPO_MAX_DISTANCE: usize = 2;
501
502/// Compute and apply in-memory auto-fixes to `props`. Returns the list of
503/// actions that were taken. Caller is responsible for persisting `props` to
504/// disk when appropriate.
505fn apply_fixes(
506    rel_path: &str,
507    props: &mut IndexMap<String, Value>,
508    schema: &SchemaConfig,
509) -> Vec<FixAction> {
510    let mut actions: Vec<FixAction> = Vec::new();
511
512    // Step 1: infer `type` from filename-template if missing.
513    if !props.contains_key("type")
514        && let Some(inferred) = infer_type_from_path(rel_path, schema)
515    {
516        // Insert `type` at the front of the map so downstream logic picks it up.
517        props.shift_insert(0, "type".to_owned(), Value::String(inferred.clone()));
518        actions.push(FixAction {
519            kind: "infer-type".to_owned(),
520            property: "type".to_owned(),
521            old: None,
522            new: inferred,
523        });
524    }
525
526    // Determine the effective schema after any type inference.
527    let doc_type: Option<String> = props.get("type").and_then(|v| match v {
528        Value::String(s) => Some(s.clone()),
529        _ => None,
530    });
531    let effective_schema: TypeSchema = match &doc_type {
532        Some(t) => schema.merged_schema_for_type(t),
533        None => schema.default_schema().clone(),
534    };
535
536    // Step 2: insert defaults for missing properties.
537    // Iterate in the schema's `required` order first, then any remaining defaults,
538    // so the resulting frontmatter is ordered deterministically.
539    let mut inserted: std::collections::HashSet<String> = std::collections::HashSet::new();
540    for req in &effective_schema.required {
541        if !props.contains_key(req.as_str())
542            && let Some(raw) = effective_schema.defaults.get(req.as_str())
543        {
544            let value = schema::expand_default(raw);
545            props.insert(req.clone(), Value::String(value.clone()));
546            inserted.insert(req.clone());
547            actions.push(FixAction {
548                kind: "insert-default".to_owned(),
549                property: req.clone(),
550                old: None,
551                new: value,
552            });
553        }
554    }
555    // Also honour defaults for properties not listed in `required`.
556    for (name, raw) in &effective_schema.defaults {
557        if inserted.contains(name) || props.contains_key(name.as_str()) {
558            continue;
559        }
560        let value = schema::expand_default(raw);
561        props.insert(name.clone(), Value::String(value.clone()));
562        actions.push(FixAction {
563            kind: "insert-default".to_owned(),
564            property: name.clone(),
565            old: None,
566            new: value,
567        });
568    }
569
570    // Step 3: per-property fixes (enum typos, date normalization).
571    let prop_names: Vec<String> = props.keys().cloned().collect();
572    for name in prop_names {
573        let Some(constraint) = effective_schema.properties.get(name.as_str()) else {
574            continue;
575        };
576        // Snapshot the current value to avoid double-borrowing `props`.
577        let Some(current) = props.get(name.as_str()).cloned() else {
578            continue;
579        };
580        match constraint {
581            PropertyConstraint::Enum { values } => {
582                let Value::String(s) = &current else { continue };
583                if values.iter().any(|v| v == s) {
584                    continue;
585                }
586                if let Some((suggestion, dist)) = values
587                    .iter()
588                    .map(|v| (v, strsim::levenshtein(s, v.as_str())))
589                    .min_by_key(|(_, d)| *d)
590                    && dist <= ENUM_TYPO_MAX_DISTANCE
591                {
592                    let old = s.clone();
593                    let new_value = suggestion.clone();
594                    props.insert(name.clone(), Value::String(new_value.clone()));
595                    actions.push(FixAction {
596                        kind: "fix-enum-typo".to_owned(),
597                        property: name.clone(),
598                        old: Some(old),
599                        new: new_value,
600                    });
601                }
602            }
603            PropertyConstraint::Date => {
604                let Value::String(s) = &current else { continue };
605                if is_iso8601_date(s) {
606                    continue;
607                }
608                if let Some(normalized) = normalize_date(s) {
609                    let old = s.clone();
610                    props.insert(name.clone(), Value::String(normalized.clone()));
611                    actions.push(FixAction {
612                        kind: "normalize-date".to_owned(),
613                        property: name.clone(),
614                        old: Some(old),
615                        new: normalized,
616                    });
617                }
618            }
619            _ => {}
620        }
621    }
622
623    // Step 4: split comma-joined tags (e.g. ["cli,ux"] -> ["cli", "ux"]).
624    if let Some(Value::Array(items)) = props.get("tags") {
625        let needs_fix = items
626            .iter()
627            .any(|v| matches!(v, Value::String(s) if s.contains(',')));
628        if needs_fix {
629            let old_tags: Vec<Value> = items.clone();
630            let new_tags: Vec<Value> = old_tags
631                .iter()
632                .flat_map(|v| match v {
633                    Value::String(s) if s.contains(',') => s
634                        .split(',')
635                        .map(str::trim)
636                        .filter(|p| !p.is_empty())
637                        .map(|p| Value::String(p.to_owned()))
638                        .collect::<Vec<_>>(),
639                    other => vec![other.clone()],
640                })
641                .collect();
642            let old_str = old_tags
643                .iter()
644                .filter_map(|v| v.as_str())
645                .collect::<Vec<_>>()
646                .join(", ");
647            let new_str = new_tags
648                .iter()
649                .filter_map(|v| v.as_str())
650                .collect::<Vec<_>>()
651                .join(", ");
652            props.insert("tags".to_owned(), Value::Array(new_tags));
653            actions.push(FixAction {
654                kind: "split-comma-tags".to_owned(),
655                property: "tags".to_owned(),
656                old: Some(old_str),
657                new: new_str,
658            });
659        }
660    }
661
662    actions
663}
664
665/// Try to infer a `type` value for a file at `rel_path` by matching it against
666/// every `[schema.types.*].filename-template`. Returns `None` if zero or more
667/// than one type matches (ambiguous).
668fn infer_type_from_path(rel_path: &str, schema: &SchemaConfig) -> Option<String> {
669    let mut matches: Vec<String> = Vec::new();
670    for (type_name, ts) in &schema.types {
671        let Some(template_str) = &ts.filename_template else {
672            continue;
673        };
674        let Ok(template) = FilenameTemplate::parse(template_str) else {
675            continue;
676        };
677        if template.matches(rel_path) {
678            matches.push(type_name.clone());
679        }
680    }
681    if matches.len() == 1 {
682        matches.pop()
683    } else {
684        None
685    }
686}
687
688/// Normalize a loose date string to `YYYY-MM-DD`.
689///
690/// Accepts inputs of the form `Y-M-D` where `Y`, `M`, `D` are decimal digit
691/// runs and month/day are in the valid calendar ranges. Returns `None` for
692/// inputs that are ambiguous (e.g. natural-language dates, non-ISO separators,
693/// or out-of-range values); those are reported as violations instead.
694fn normalize_date(s: &str) -> Option<String> {
695    let parts: Vec<&str> = s.split('-').collect();
696    if parts.len() != 3 {
697        return None;
698    }
699    let y = parts[0];
700    let m = parts[1];
701    let d = parts[2];
702    if y.len() != 4 || !y.bytes().all(|b| b.is_ascii_digit()) {
703        return None;
704    }
705    if m.is_empty() || m.len() > 2 || !m.bytes().all(|b| b.is_ascii_digit()) {
706        return None;
707    }
708    if d.is_empty() || d.len() > 2 || !d.bytes().all(|b| b.is_ascii_digit()) {
709        return None;
710    }
711    let yi: i32 = y.parse().ok()?;
712    let mi: u32 = m.parse().ok()?;
713    let di: u32 = d.parse().ok()?;
714    if !(1..=12).contains(&mi) {
715        return None;
716    }
717    let max_day = match mi {
718        1 | 3 | 5 | 7 | 8 | 10 | 12 => 31,
719        4 | 6 | 9 | 11 => 30,
720        2 => {
721            let leap = (yi % 4 == 0 && yi % 100 != 0) || (yi % 400 == 0);
722            if leap { 29 } else { 28 }
723        }
724        _ => return None,
725    };
726    if !(1..=max_day).contains(&di) {
727        return None;
728    }
729    Some(format!("{y}-{mi:02}-{di:02}"))
730}
731
732/// Core property validation logic.
733///
734/// Separated so it can be used both by the disk-reading path (`lint_file`) and
735/// the index-based path (`lint_counts_from_properties`).
736fn validate_properties(
737    _rel_path: &str,
738    properties: &IndexMap<String, Value>,
739    has_tags: bool,
740    schema: &SchemaConfig,
741) -> Vec<Violation> {
742    let mut violations: Vec<Violation> = Vec::new();
743
744    // Determine the document type.
745    let type_value = properties.get("type");
746    let doc_type: Option<String> = type_value.and_then(|v| match v {
747        Value::String(s) => Some(s.clone()),
748        _ => None,
749    });
750
751    // If `type` is present but not a string, report an error. A non-string `type`
752    // still satisfies a bare `required = ["type"]` check, so without this error
753    // invalid type values would slip through silently.
754    if let Some(v) = type_value
755        && doc_type.is_none()
756    {
757        violations.push(Violation {
758            severity: Severity::Error,
759            message: format!("property \"type\" expected string, got {v}"),
760        });
761    }
762
763    // Warn when no `type` property is present.
764    if type_value.is_none() && !schema.is_empty() {
765        violations.push(Violation {
766            severity: Severity::Warn,
767            message: "no 'type' property — validating against default schema only".to_owned(),
768        });
769    }
770
771    // Determine the effective schema for this file.
772    let effective_schema: TypeSchema = match &doc_type {
773        Some(t) => schema.merged_schema_for_type(t),
774        None => schema.default_schema().clone(),
775    };
776
777    // Check required properties.
778    for req in &effective_schema.required {
779        if !properties.contains_key(req.as_str()) {
780            let type_hint = doc_type
781                .as_deref()
782                .map(|t| format!(" (type: {t})"))
783                .unwrap_or_default();
784            violations.push(Violation {
785                severity: Severity::Error,
786                message: format!("missing required property \"{req}\"{type_hint}"),
787            });
788        }
789    }
790
791    // Warn when no `tags` property is present and the schema has at least one type defined.
792    if !has_tags && !schema.types.is_empty() {
793        violations.push(Violation {
794            severity: Severity::Warn,
795            message: "no tags defined".to_owned(),
796        });
797    }
798
799    // Build a per-call regex cache so the same pattern isn't recompiled across
800    // properties (this matters in `hyalo summary`, which runs lint over the full
801    // index).
802    let mut regex_cache: HashMap<String, Result<Regex, String>> = HashMap::new();
803
804    // Type-specific property constraint validation.
805    for (name, value) in properties {
806        // `tags` is validated against its declared constraint if present, but we
807        // never emit an "undeclared property" warning for it (it has its own
808        // "no tags defined" warning above).
809        if name == "tags" {
810            if let Some(constraint) = effective_schema.properties.get(name.as_str())
811                && let Some(v) = validate_constraint(name, value, constraint, &mut regex_cache)
812            {
813                violations.push(v);
814            }
815            // Check for comma-joined tags (e.g. "cli,ux" instead of ["cli", "ux"]).
816            if let Value::Array(items) = value {
817                for item in items {
818                    if let Value::String(tag) = item
819                        && tag.contains(',')
820                    {
821                        violations.push(Violation {
822                            severity: Severity::Warn,
823                            message: format!(
824                                "tag \"{tag}\" appears to be comma-joined -- should be separate list items"
825                            ),
826                        });
827                    }
828                }
829            }
830            continue;
831        }
832        // Never warn about "type" (type discriminator) or properties listed in `required`
833        // — they're implicitly accepted even if not in the `properties` map.
834        let implicitly_accepted = name == "type" || effective_schema.required.contains(name);
835
836        if let Some(constraint) = effective_schema.properties.get(name.as_str()) {
837            if let Some(v) = validate_constraint(name, value, constraint, &mut regex_cache) {
838                violations.push(v);
839            }
840        } else if !effective_schema.properties.is_empty() && !implicitly_accepted {
841            // Property not declared in schema — warn only when the schema declares
842            // some properties. Schemas that only specify `required` remain
843            // intentionally permissive about extra fields.
844            violations.push(Violation {
845                severity: Severity::Warn,
846                message: format!("property \"{name}\" is not declared in schema"),
847            });
848        }
849    }
850
851    violations
852}
853
854// ---------------------------------------------------------------------------
855// Constraint validators
856// ---------------------------------------------------------------------------
857
858fn validate_constraint(
859    name: &str,
860    value: &Value,
861    constraint: &PropertyConstraint,
862    regex_cache: &mut HashMap<String, Result<Regex, String>>,
863) -> Option<Violation> {
864    match constraint {
865        PropertyConstraint::String { pattern } => {
866            let Some(s) = value_as_str(value) else {
867                return Some(Violation {
868                    severity: Severity::Error,
869                    message: format!("property \"{name}\" expected string, got {value}"),
870                });
871            };
872            if let Some(pat) = pattern {
873                // Compile (or look up) the regex once per pattern per call.
874                let entry = regex_cache
875                    .entry(pat.clone())
876                    .or_insert_with(|| Regex::new(pat).map_err(|e| e.to_string()));
877                match entry {
878                    Ok(re) => {
879                        if !re.is_match(s) {
880                            return Some(Violation {
881                                severity: Severity::Error,
882                                message: format!(
883                                    "property \"{name}\" value {s:?} does not match pattern {pat:?}"
884                                ),
885                            });
886                        }
887                    }
888                    Err(e) => {
889                        return Some(Violation {
890                            severity: Severity::Error,
891                            message: format!("property \"{name}\": invalid pattern {pat:?}: {e}"),
892                        });
893                    }
894                }
895            }
896            None
897        }
898        PropertyConstraint::Date => {
899            let Some(s) = value_as_str(value) else {
900                return Some(Violation {
901                    severity: Severity::Error,
902                    message: format!("property \"{name}\" expected date (YYYY-MM-DD), got {value}"),
903                });
904            };
905            if !is_iso8601_date(s) {
906                return Some(Violation {
907                    severity: Severity::Error,
908                    message: format!("property \"{name}\" expected date (YYYY-MM-DD), got \"{s}\""),
909                });
910            }
911            None
912        }
913        PropertyConstraint::Number => {
914            if !matches!(value, Value::Number(_)) {
915                return Some(Violation {
916                    severity: Severity::Error,
917                    message: format!("property \"{name}\" expected number, got {value}"),
918                });
919            }
920            None
921        }
922        PropertyConstraint::Boolean => {
923            if !matches!(value, Value::Bool(_)) {
924                return Some(Violation {
925                    severity: Severity::Error,
926                    message: format!("property \"{name}\" expected boolean, got {value}"),
927                });
928            }
929            None
930        }
931        PropertyConstraint::List => {
932            if !matches!(value, Value::Array(_)) {
933                return Some(Violation {
934                    severity: Severity::Error,
935                    message: format!("property \"{name}\" expected list, got {value}"),
936                });
937            }
938            None
939        }
940        PropertyConstraint::Enum { values } => {
941            let Some(s) = value_as_str(value) else {
942                return Some(Violation {
943                    severity: Severity::Error,
944                    message: format!(
945                        "property \"{name}\" expected one of [{}], got {value}",
946                        values.join(", ")
947                    ),
948                });
949            };
950            if values.contains(&s.to_owned()) {
951                return None;
952            }
953            // Find nearest suggestion via Levenshtein.
954            let suggestion = values
955                .iter()
956                .min_by_key(|v| strsim::levenshtein(s, v.as_str()))
957                .map(|v| format!(" (did you mean \"{v}\"?)"))
958                .unwrap_or_default();
959            Some(Violation {
960                severity: Severity::Error,
961                message: format!(
962                    "property \"{name}\" value \"{s}\" not in [{}]{suggestion}",
963                    values.join(", ")
964                ),
965            })
966        }
967    }
968}
969
970/// Extract a `&str` from a `Value::String`, or `None` for other variants.
971fn value_as_str(v: &Value) -> Option<&str> {
972    if let Value::String(s) = v {
973        Some(s.as_str())
974    } else {
975        None
976    }
977}
978
979// ---------------------------------------------------------------------------
980// Public validation helper (used by set/append --validate)
981// ---------------------------------------------------------------------------
982
983/// Validate a single property value against a constraint without a shared regex cache.
984///
985/// Returns `Some(error_message)` when the value violates the constraint, `None`
986/// when it is valid. Regex patterns are compiled fresh for each call — use the
987/// private [`validate_constraint`] with a shared cache in hot paths.
988pub fn validate_constraint_simple(
989    name: &str,
990    value: &Value,
991    constraint: &PropertyConstraint,
992) -> Option<String> {
993    let mut cache = HashMap::new();
994    validate_constraint(name, value, constraint, &mut cache).map(|v| v.message)
995}
996
997// ---------------------------------------------------------------------------
998// Text formatter
999// ---------------------------------------------------------------------------
1000
1001// ---------------------------------------------------------------------------
1002// Unit tests
1003// ---------------------------------------------------------------------------
1004
1005#[cfg(test)]
1006mod tests {
1007    use super::*;
1008    use hyalo_core::schema::{PropertyConstraint, SchemaConfig, TypeSchema};
1009    use std::collections::HashMap;
1010
1011    fn make_schema(
1012        default_required: &[&str],
1013        type_name: &str,
1014        type_required: &[&str],
1015        type_properties: HashMap<&str, PropertyConstraint>,
1016    ) -> SchemaConfig {
1017        let default = TypeSchema {
1018            required: default_required.iter().map(ToString::to_string).collect(),
1019            ..Default::default()
1020        };
1021        let mut props: HashMap<String, PropertyConstraint> = HashMap::new();
1022        for (k, v) in type_properties {
1023            props.insert(k.to_owned(), v);
1024        }
1025        let type_schema = TypeSchema {
1026            required: type_required.iter().map(ToString::to_string).collect(),
1027            properties: props,
1028            ..Default::default()
1029        };
1030        let mut types = HashMap::new();
1031        types.insert(type_name.to_owned(), type_schema);
1032        SchemaConfig { default, types }
1033    }
1034
1035    // --- is_iso8601_date ---
1036
1037    #[test]
1038    fn valid_date() {
1039        assert!(is_iso8601_date("2026-04-13"));
1040    }
1041
1042    #[test]
1043    fn normalize_date_padding_and_calendar() {
1044        // Short month/day get zero-padded.
1045        assert_eq!(normalize_date("2026-4-9"), Some("2026-04-09".to_owned()));
1046        // Feb 29 is valid in leap years only.
1047        assert_eq!(normalize_date("2024-2-29"), Some("2024-02-29".to_owned()));
1048        assert_eq!(normalize_date("2023-2-29"), None);
1049        // Out-of-range days/months are rejected, not silently normalized.
1050        assert_eq!(normalize_date("2026-02-31"), None);
1051        assert_eq!(normalize_date("2026-04-31"), None);
1052        assert_eq!(normalize_date("2026-13-01"), None);
1053    }
1054
1055    #[test]
1056    fn invalid_date_format() {
1057        assert!(!is_iso8601_date("April 13"));
1058        assert!(!is_iso8601_date("13-04-2026"));
1059        assert!(!is_iso8601_date("2026/04/13"));
1060    }
1061
1062    // Test helper: wraps `validate_constraint` with a throwaway regex cache.
1063    fn vc(name: &str, value: &Value, c: &PropertyConstraint) -> Option<Violation> {
1064        let mut cache = HashMap::new();
1065        validate_constraint(name, value, c, &mut cache)
1066    }
1067
1068    // --- validate_constraint ---
1069
1070    #[test]
1071    fn date_constraint_valid() {
1072        let v = vc(
1073            "date",
1074            &Value::String("2026-04-13".into()),
1075            &PropertyConstraint::Date,
1076        );
1077        assert!(v.is_none());
1078    }
1079
1080    #[test]
1081    fn date_constraint_invalid() {
1082        let v = vc(
1083            "date",
1084            &Value::String("April 13".into()),
1085            &PropertyConstraint::Date,
1086        );
1087        assert!(matches!(
1088            v,
1089            Some(Violation {
1090                severity: Severity::Error,
1091                ..
1092            })
1093        ));
1094    }
1095
1096    #[test]
1097    fn enum_constraint_valid() {
1098        let v = vc(
1099            "status",
1100            &Value::String("planned".into()),
1101            &PropertyConstraint::Enum {
1102                values: vec!["planned".into(), "done".into()],
1103            },
1104        );
1105        assert!(v.is_none());
1106    }
1107
1108    #[test]
1109    fn enum_constraint_invalid_with_suggestion() {
1110        let v = vc(
1111            "status",
1112            &Value::String("planed".into()),
1113            &PropertyConstraint::Enum {
1114                values: vec!["planned".into(), "done".into()],
1115            },
1116        );
1117        let viol = v.expect("expected violation");
1118        assert_eq!(viol.severity, Severity::Error);
1119        assert!(viol.message.contains("did you mean \"planned\""));
1120    }
1121
1122    #[test]
1123    fn number_constraint_valid() {
1124        let v = vc(
1125            "priority",
1126            &Value::Number(5.into()),
1127            &PropertyConstraint::Number,
1128        );
1129        assert!(v.is_none());
1130    }
1131
1132    #[test]
1133    fn number_constraint_invalid() {
1134        let v = vc(
1135            "priority",
1136            &Value::String("five".into()),
1137            &PropertyConstraint::Number,
1138        );
1139        assert!(matches!(
1140            v,
1141            Some(Violation {
1142                severity: Severity::Error,
1143                ..
1144            })
1145        ));
1146    }
1147
1148    #[test]
1149    fn boolean_constraint_valid() {
1150        let v = vc("draft", &Value::Bool(true), &PropertyConstraint::Boolean);
1151        assert!(v.is_none());
1152    }
1153
1154    #[test]
1155    fn boolean_constraint_invalid() {
1156        let v = vc(
1157            "draft",
1158            &Value::String("yes".into()),
1159            &PropertyConstraint::Boolean,
1160        );
1161        assert!(matches!(
1162            v,
1163            Some(Violation {
1164                severity: Severity::Error,
1165                ..
1166            })
1167        ));
1168    }
1169
1170    #[test]
1171    fn list_constraint_valid() {
1172        let v = vc("tags", &Value::Array(vec![]), &PropertyConstraint::List);
1173        assert!(v.is_none());
1174    }
1175
1176    #[test]
1177    fn list_constraint_invalid() {
1178        let v = vc(
1179            "tags",
1180            &Value::String("rust".into()),
1181            &PropertyConstraint::List,
1182        );
1183        assert!(matches!(
1184            v,
1185            Some(Violation {
1186                severity: Severity::Error,
1187                ..
1188            })
1189        ));
1190    }
1191
1192    #[test]
1193    fn string_pattern_constraint_valid() {
1194        let v = vc(
1195            "branch",
1196            &Value::String("iter-42/my-feature".into()),
1197            &PropertyConstraint::String {
1198                pattern: Some(r"^iter-\d+/".into()),
1199            },
1200        );
1201        assert!(v.is_none());
1202    }
1203
1204    #[test]
1205    fn string_pattern_constraint_invalid() {
1206        let v = vc(
1207            "branch",
1208            &Value::String("feature/my-branch".into()),
1209            &PropertyConstraint::String {
1210                pattern: Some(r"^iter-\d+/".into()),
1211            },
1212        );
1213        assert!(matches!(
1214            v,
1215            Some(Violation {
1216                severity: Severity::Error,
1217                ..
1218            })
1219        ));
1220    }
1221
1222    // --- lint_file via a temp file ---
1223
1224    #[test]
1225    fn lint_file_missing_required() {
1226        let dir = tempfile::tempdir().unwrap();
1227        let path = dir.path().join("note.md");
1228        std::fs::write(&path, "---\ntitle: Hello\n---\nBody\n").unwrap();
1229
1230        let schema = make_schema(&["title", "date"], "note", &[], HashMap::new());
1231        let result = lint_file(&path, "note.md", &schema).unwrap();
1232        // date is in default required, but only "title" is present.
1233        // No type -> warn about no type. date missing -> error.
1234        assert!(
1235            result
1236                .violations
1237                .iter()
1238                .any(|v| v.severity == Severity::Error
1239                    && v.message.contains("missing required property \"date\""))
1240        );
1241    }
1242
1243    #[test]
1244    fn lint_file_no_type_warn() {
1245        let dir = tempfile::tempdir().unwrap();
1246        let path = dir.path().join("note.md");
1247        std::fs::write(&path, "---\ntitle: Hello\n---\nBody\n").unwrap();
1248
1249        let schema = make_schema(&["title"], "note", &[], HashMap::new());
1250        let result = lint_file(&path, "note.md", &schema).unwrap();
1251        assert!(
1252            result
1253                .violations
1254                .iter()
1255                .any(|v| v.severity == Severity::Warn && v.message.contains("no 'type' property"))
1256        );
1257    }
1258
1259    #[test]
1260    fn lint_file_no_violations_clean_file() {
1261        let dir = tempfile::tempdir().unwrap();
1262        let path = dir.path().join("note.md");
1263        std::fs::write(
1264            &path,
1265            "---\ntitle: Hello\ntype: note\ntags:\n  - rust\n---\nBody\n",
1266        )
1267        .unwrap();
1268
1269        let schema = make_schema(&["title"], "note", &[], HashMap::new());
1270        let result = lint_file(&path, "note.md", &schema).unwrap();
1271        assert!(result.violations.is_empty());
1272    }
1273
1274    #[test]
1275    fn lint_no_schema_no_violations() {
1276        let dir = tempfile::tempdir().unwrap();
1277        let path = dir.path().join("note.md");
1278        std::fs::write(&path, "---\ntitle: Hello\n---\nBody\n").unwrap();
1279
1280        let schema = SchemaConfig::default();
1281        let files = vec![(path, "note.md".to_owned())];
1282        let (_, counts) = lint_files(&files, &schema).unwrap();
1283        assert_eq!(counts.errors, 0);
1284        assert_eq!(counts.warnings, 0);
1285    }
1286
1287    // --- UX-3: comma-joined tag detection and fix ---
1288
1289    #[test]
1290    fn lint_warns_on_comma_joined_tag() {
1291        let dir = tempfile::tempdir().unwrap();
1292        let path = dir.path().join("note.md");
1293        std::fs::write(
1294            &path,
1295            "---\ntitle: Hello\ntags:\n  - cli,ux\n  - rust\n---\nBody\n",
1296        )
1297        .unwrap();
1298
1299        let schema = SchemaConfig::default();
1300        let result = lint_file(&path, "note.md", &schema).unwrap();
1301        let comma_warn = result
1302            .violations
1303            .iter()
1304            .find(|v| v.severity == Severity::Warn && v.message.contains("cli,ux"));
1305        assert!(
1306            comma_warn.is_some(),
1307            "expected a warning about comma-joined tag, got: {:#?}",
1308            result.violations
1309        );
1310        assert!(
1311            comma_warn.unwrap().message.contains("comma-joined"),
1312            "message should mention comma-joined"
1313        );
1314    }
1315
1316    #[test]
1317    fn lint_fix_splits_comma_joined_tags() {
1318        let dir = tempfile::tempdir().unwrap();
1319        let path = dir.path().join("note.md");
1320        std::fs::write(
1321            &path,
1322            "---\ntitle: Hello\ntags:\n  - cli,ux\n  - rust\n---\nBody\n",
1323        )
1324        .unwrap();
1325
1326        let schema = SchemaConfig::default();
1327        let files = vec![(path.clone(), "note.md".to_owned())];
1328        let (_, counts) =
1329            lint_files_with_options(&files, &schema, FixMode::Apply, None, &mut None, None)
1330                .unwrap();
1331
1332        // After fix, the comma-joined tag warning should be gone.
1333        assert_eq!(counts.warnings, 0, "comma-tag warning should be fixed");
1334
1335        let content = std::fs::read_to_string(&path).unwrap();
1336        // Both parts of the split tag should be separate items.
1337        assert!(content.contains("- cli"), "expected 'cli' as separate tag");
1338        assert!(content.contains("- ux"), "expected 'ux' as separate tag");
1339        // The original comma-joined form must be gone.
1340        assert!(
1341            !content.contains("cli,ux"),
1342            "comma-joined tag should be removed"
1343        );
1344    }
1345}