alint-core 0.4.8

Core types and execution engine for the alint language-agnostic repository linter.
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
use std::collections::HashMap;
use std::path::PathBuf;

use serde::Deserialize;

use crate::facts::FactSpec;
use crate::level::Level;

/// Parsed form of a `.alint.yml` file.
#[derive(Debug, Clone, Deserialize, Default)]
#[serde(deny_unknown_fields)]
pub struct Config {
    pub version: u32,
    /// Other config files this one inherits from. Entries resolved
    /// left-to-right; later entries override earlier ones; the
    /// current file's own definitions override everything it extends.
    ///
    /// Each entry is either a bare string (local path, `https://`
    /// URL with SRI, or `alint://bundled/...`) or a mapping with
    /// `url:` and optional `only:` / `except:` filters.
    #[serde(default)]
    pub extends: Vec<ExtendsEntry>,
    #[serde(default)]
    pub ignore: Vec<String>,
    #[serde(default = "default_respect_gitignore")]
    pub respect_gitignore: bool,
    /// Free-form string variables referenced from rule messages and
    /// `when` expressions as `{{vars.<name>}}` and `vars.<name>`.
    #[serde(default)]
    pub vars: HashMap<String, String>,
    /// Repository properties evaluated once per run and referenced from
    /// `when` clauses as `facts.<id>`.
    #[serde(default)]
    pub facts: Vec<FactSpec>,
    #[serde(default)]
    pub rules: Vec<RuleSpec>,
    /// Maximum file size, in bytes, that content-editing fixes
    /// will read and rewrite. Files over this limit are reported
    /// as `Skipped` in the fix report and a one-line warning is
    /// printed to stderr. Defaults to 1 MiB; set explicitly to
    /// `null` to disable the cap entirely.
    ///
    /// Path-only fixes (`file_create`, `file_remove`,
    /// `file_rename`) ignore the cap — they don't read content.
    #[serde(default = "default_fix_size_limit")]
    pub fix_size_limit: Option<u64>,
    /// Opt in to discovery of `.alint.yml` / `.alint.yaml` files
    /// in subdirectories. When `true`, the loader walks the
    /// repository tree (from the root config's directory,
    /// respecting `.gitignore` and `ignore:`) and finds any
    /// nested config files; each nested rule's path-like fields
    /// (`paths`, `select`, `primary`) are prefixed with the
    /// directory that nested config lives in, so the rule
    /// auto-scopes to that subtree. Default `false`.
    ///
    /// Only the user's top-level config may set this — nested
    /// configs themselves cannot spawn further nested discovery.
    #[serde(default)]
    pub nested_configs: bool,
}

// Returning `Option<u64>` (rather than bare `u64`) keeps the
// YAML-facing type consistent with `Config.fix_size_limit`:
// users set `null` in YAML to mean "no limit". The Option is
// load-bearing at the field level, so clippy's warning on the
// default fn is noise here.
#[allow(clippy::unnecessary_wraps)]
fn default_fix_size_limit() -> Option<u64> {
    Some(1 << 20)
}

fn default_respect_gitignore() -> bool {
    true
}

impl Config {
    pub const CURRENT_VERSION: u32 = 1;
}

/// A single `extends:` entry. Accepts either a bare string (the
/// classic form — a local path, `https://` URL with SRI, or
/// `alint://bundled/<name>@<rev>`) or a mapping that adds
/// `only:` / `except:` filters on the inherited rule set.
///
/// ```yaml
/// extends:
///   - alint://bundled/oss-baseline@v1             # classic form
///   - url: alint://bundled/rust@v1                # filtered form
///     except: [rust-no-target-dir]                # drop by id
///   - url: ./team-defaults.yml
///     only: [team-copyright-header]               # keep by id
/// ```
///
/// Filters resolve against the *fully-resolved* rule set of the
/// entry (i.e. anything it transitively extends). `only:` and
/// `except:` are mutually exclusive on a single entry; listing an
/// unknown rule id is a config error so typos surface at load
/// time.
#[derive(Debug, Clone, Deserialize)]
#[serde(untagged)]
pub enum ExtendsEntry {
    Url(String),
    Filtered {
        url: String,
        #[serde(default)]
        only: Option<Vec<String>>,
        #[serde(default)]
        except: Option<Vec<String>>,
    },
}

impl ExtendsEntry {
    /// The URL / path of the extended config. Uniform across both
    /// enum variants.
    pub fn url(&self) -> &str {
        match self {
            Self::Url(s) | Self::Filtered { url: s, .. } => s,
        }
    }

    /// Rule ids to keep (drop everything else). `None` when no
    /// `only:` filter is specified.
    pub fn only(&self) -> Option<&[String]> {
        match self {
            Self::Filtered { only: Some(v), .. } => Some(v),
            _ => None,
        }
    }

    /// Rule ids to drop. `None` when no `except:` filter is
    /// specified.
    pub fn except(&self) -> Option<&[String]> {
        match self {
            Self::Filtered {
                except: Some(v), ..
            } => Some(v),
            _ => None,
        }
    }
}

/// YAML shape for a rule's `paths:` field — a single glob, an array (with
/// optional `!pattern` negations), or an explicit `{include, exclude}` pair.
/// For the include/exclude form, each field accepts either a single string
/// or a list of strings.
#[derive(Debug, Clone, Deserialize)]
#[serde(untagged)]
pub enum PathsSpec {
    Single(String),
    Many(Vec<String>),
    IncludeExclude {
        #[serde(default, deserialize_with = "string_or_vec")]
        include: Vec<String>,
        #[serde(default, deserialize_with = "string_or_vec")]
        exclude: Vec<String>,
    },
}

fn string_or_vec<'de, D>(deserializer: D) -> Result<Vec<String>, D::Error>
where
    D: serde::Deserializer<'de>,
{
    #[derive(Deserialize)]
    #[serde(untagged)]
    enum OneOrMany {
        One(String),
        Many(Vec<String>),
    }
    match OneOrMany::deserialize(deserializer)? {
        OneOrMany::One(s) => Ok(vec![s]),
        OneOrMany::Many(v) => Ok(v),
    }
}

/// YAML-level description of a rule before it is instantiated into a `Box<dyn Rule>`
/// by a [`RuleBuilder`](crate::registry::RuleBuilder).
#[derive(Debug, Clone, Deserialize)]
pub struct RuleSpec {
    pub id: String,
    pub kind: String,
    pub level: Level,
    #[serde(default)]
    pub paths: Option<PathsSpec>,
    #[serde(default)]
    pub message: Option<String>,
    #[serde(default)]
    pub policy_url: Option<String>,
    #[serde(default)]
    pub when: Option<String>,
    /// Optional mechanical-fix strategy. Rules whose builders understand
    /// the chosen op attach a [`Fixer`](crate::Fixer) to the built rule;
    /// rules whose kind is incompatible with the op return a config error
    /// at build time.
    #[serde(default)]
    pub fix: Option<FixSpec>,
    /// Restrict the rule to files / directories tracked in git's index.
    /// When `true`, the rule's `paths`-matched entries are intersected
    /// with the set of git-tracked files; entries that exist in the
    /// walked tree but aren't in `git ls-files` output are skipped.
    /// Only meaningful for rule kinds that opt in (currently the
    /// existence family — `file_exists`, `file_absent`, `dir_exists`,
    /// `dir_absent`); rule kinds that don't support it surface a clean
    /// config error when this is `true` so silent mis-configuration
    /// doesn't slip through.
    ///
    /// Default `false`. Has no effect outside a git repo.
    #[serde(default)]
    pub git_tracked_only: bool,
    /// The entire YAML mapping, retained so each rule builder can deserialize
    /// its kind-specific fields without every option being represented here.
    #[serde(flatten)]
    pub extra: serde_yaml_ng::Mapping,
}

/// The `fix:` block on a rule. Exactly one op key must be present —
/// alint errors at load time when the op and rule kind are incompatible.
#[derive(Debug, Clone, Deserialize)]
#[serde(untagged)]
pub enum FixSpec {
    FileCreate {
        file_create: FileCreateFixSpec,
    },
    FileRemove {
        file_remove: FileRemoveFixSpec,
    },
    FilePrepend {
        file_prepend: FilePrependFixSpec,
    },
    FileAppend {
        file_append: FileAppendFixSpec,
    },
    FileRename {
        file_rename: FileRenameFixSpec,
    },
    FileTrimTrailingWhitespace {
        file_trim_trailing_whitespace: FileTrimTrailingWhitespaceFixSpec,
    },
    FileAppendFinalNewline {
        file_append_final_newline: FileAppendFinalNewlineFixSpec,
    },
    FileNormalizeLineEndings {
        file_normalize_line_endings: FileNormalizeLineEndingsFixSpec,
    },
    FileStripBidi {
        file_strip_bidi: FileStripBidiFixSpec,
    },
    FileStripZeroWidth {
        file_strip_zero_width: FileStripZeroWidthFixSpec,
    },
    FileStripBom {
        file_strip_bom: FileStripBomFixSpec,
    },
    FileCollapseBlankLines {
        file_collapse_blank_lines: FileCollapseBlankLinesFixSpec,
    },
}

impl FixSpec {
    /// The op name as it appears in YAML — used in config-error messages.
    pub fn op_name(&self) -> &'static str {
        match self {
            Self::FileCreate { .. } => "file_create",
            Self::FileRemove { .. } => "file_remove",
            Self::FilePrepend { .. } => "file_prepend",
            Self::FileAppend { .. } => "file_append",
            Self::FileRename { .. } => "file_rename",
            Self::FileTrimTrailingWhitespace { .. } => "file_trim_trailing_whitespace",
            Self::FileAppendFinalNewline { .. } => "file_append_final_newline",
            Self::FileNormalizeLineEndings { .. } => "file_normalize_line_endings",
            Self::FileStripBidi { .. } => "file_strip_bidi",
            Self::FileStripZeroWidth { .. } => "file_strip_zero_width",
            Self::FileStripBom { .. } => "file_strip_bom",
            Self::FileCollapseBlankLines { .. } => "file_collapse_blank_lines",
        }
    }
}

#[derive(Debug, Clone, Deserialize)]
#[serde(deny_unknown_fields)]
pub struct FileCreateFixSpec {
    /// Content to write. Required — there is no implicit empty default;
    /// for an empty file, pass `content: ""` explicitly.
    pub content: String,
    /// Path to create, relative to the repo root. When omitted, the
    /// rule builder substitutes the first literal entry from the rule's
    /// `paths:` list.
    #[serde(default)]
    pub path: Option<PathBuf>,
    /// Whether to create intermediate directories. Defaults to true.
    #[serde(default = "default_create_parents")]
    pub create_parents: bool,
}

fn default_create_parents() -> bool {
    true
}

#[derive(Debug, Clone, Deserialize, Default)]
#[serde(deny_unknown_fields)]
pub struct FileRemoveFixSpec {}

#[derive(Debug, Clone, Deserialize)]
#[serde(deny_unknown_fields)]
pub struct FilePrependFixSpec {
    /// Bytes to insert at the beginning of each violating file. A
    /// trailing newline in `content` is the caller's responsibility.
    pub content: String,
}

#[derive(Debug, Clone, Deserialize)]
#[serde(deny_unknown_fields)]
pub struct FileAppendFixSpec {
    /// Bytes to append to each violating file. A leading newline in
    /// `content` is the caller's responsibility.
    pub content: String,
}

/// Empty marker: `file_rename` takes no parameters. The target name
/// is derived from the parent rule (e.g. `filename_case` converts the
/// stem to its configured case; the extension is preserved).
#[derive(Debug, Clone, Deserialize, Default)]
#[serde(deny_unknown_fields)]
pub struct FileRenameFixSpec {}

/// Empty marker. Behavior: read file (subject to `fix_size_limit`),
/// strip trailing space/tab on every line, write back.
#[derive(Debug, Clone, Deserialize, Default)]
#[serde(deny_unknown_fields)]
pub struct FileTrimTrailingWhitespaceFixSpec {}

/// Empty marker. Behavior: if the file has content and does not
/// end with `\n`, append one.
#[derive(Debug, Clone, Deserialize, Default)]
#[serde(deny_unknown_fields)]
pub struct FileAppendFinalNewlineFixSpec {}

/// Empty marker. Behavior: rewrite the file with every line ending
/// replaced by the parent rule's configured target (`lf` or `crlf`).
#[derive(Debug, Clone, Deserialize, Default)]
#[serde(deny_unknown_fields)]
pub struct FileNormalizeLineEndingsFixSpec {}

/// Empty marker. Behavior: remove every Unicode bidi control
/// character (U+202A–202E, U+2066–2069) from the file's content.
#[derive(Debug, Clone, Deserialize, Default)]
#[serde(deny_unknown_fields)]
pub struct FileStripBidiFixSpec {}

/// Empty marker. Behavior: remove every zero-width character
/// (U+200B / U+200C / U+200D / U+FEFF) from the file's content,
/// *except* a leading BOM (U+FEFF at position 0) — that's the
/// responsibility of the `no_bom` rule.
#[derive(Debug, Clone, Deserialize, Default)]
#[serde(deny_unknown_fields)]
pub struct FileStripZeroWidthFixSpec {}

/// Empty marker. Behavior: remove a leading UTF-8/UTF-16/UTF-32
/// BOM byte sequence if present; otherwise a no-op.
#[derive(Debug, Clone, Deserialize, Default)]
#[serde(deny_unknown_fields)]
pub struct FileStripBomFixSpec {}

/// Empty marker. Behavior: collapse runs of blank lines longer than
/// the parent rule's `max` down to exactly `max` blank lines.
#[derive(Debug, Clone, Deserialize, Default)]
#[serde(deny_unknown_fields)]
pub struct FileCollapseBlankLinesFixSpec {}

impl RuleSpec {
    /// Deserialize the full spec (common + kind-specific fields) into a typed
    /// options struct. Common fields are reconstructed into the mapping so
    /// the target struct can `#[derive(Deserialize)]` against the whole shape
    /// when convenient.
    pub fn deserialize_options<T>(&self) -> crate::error::Result<T>
    where
        T: serde::de::DeserializeOwned,
    {
        Ok(serde_yaml_ng::from_value(serde_yaml_ng::Value::Mapping(
            self.extra.clone(),
        ))?)
    }
}

/// Rule specification for nested rules (e.g. the `require:` block of
/// `for_each_dir`). Unlike [`RuleSpec`], `id` and `level` are synthesized
/// from the parent rule — users just supply the `kind` plus kind-specific
/// options, optionally with a `message` / `policy_url` / `when`.
#[derive(Debug, Clone, Deserialize)]
pub struct NestedRuleSpec {
    pub kind: String,
    #[serde(default)]
    pub paths: Option<PathsSpec>,
    #[serde(default)]
    pub message: Option<String>,
    #[serde(default)]
    pub policy_url: Option<String>,
    #[serde(default)]
    pub when: Option<String>,
    #[serde(flatten)]
    pub extra: serde_yaml_ng::Mapping,
}

impl NestedRuleSpec {
    /// Synthesize a full [`RuleSpec`] for a single iteration, applying
    /// path-template substitution (using the iterated entry's tokens) to
    /// every string field. The resulting spec has `id =
    /// "{parent_id}.require[{idx}]"` and inherits `level` from the parent.
    pub fn instantiate(
        &self,
        parent_id: &str,
        idx: usize,
        level: Level,
        tokens: &crate::template::PathTokens,
    ) -> RuleSpec {
        RuleSpec {
            id: format!("{parent_id}.require[{idx}]"),
            kind: self.kind.clone(),
            level,
            paths: self
                .paths
                .as_ref()
                .map(|p| crate::template::render_paths_spec(p, tokens)),
            message: self
                .message
                .as_deref()
                .map(|m| crate::template::render_path(m, tokens)),
            policy_url: self.policy_url.clone(),
            when: self.when.clone(),
            fix: None,
            // Nested rules don't currently expose
            // `git_tracked_only` from their parent's spec — the
            // option is meaningful on top-level rules only for
            // now. If/when `for_each_dir`'s nested rules need it,
            // plumb it through here.
            git_tracked_only: false,
            extra: crate::template::render_mapping(self.extra.clone(), tokens),
        }
    }
}