1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
// SPDX-License-Identifier: AGPL-3.0-or-later
// Copyright (C) 2026 Two Wells <contact@twowells.dev>
//! Workspace scanning and file indexing.
//!
//! Discovers all markdown files under the workspace root, parses them into
//! an in-memory index backed by the unified parse tree, and supports
//! incremental updates when individual files change.
use std::collections::{BTreeMap, HashMap};
use std::ops::Range;
use std::path::{Path, PathBuf};
use thiserror::Error;
use crate::block::{self, Syntax, Tree};
use crate::config::{Config, ConfigError};
use crate::fm;
use crate::json;
use crate::line_index::LineIndex;
use crate::structural;
use crate::toml;
use crate::validation::Diagnostic;
use crate::yaml;
/// Errors that can occur during workspace operations.
#[derive(Debug, Error)]
pub enum WorkspaceError {
/// Failed to read a markdown file.
#[error("failed to read {path}: {source}")]
Read {
/// Path that could not be read.
path: PathBuf,
/// Underlying I/O error.
source: std::io::Error,
},
/// Failed to determine the workspace root.
#[error("could not determine workspace root from {start}")]
NoRoot {
/// The starting path used for the search.
start: PathBuf,
},
}
/// Parsed data for a single markdown file.
#[derive(Debug)]
pub struct FileData {
/// The unified parse tree.
pub tree: Tree,
/// Parsed frontmatter, if present.
pub frontmatter: Option<Frontmatter>,
/// Diagnostics from frontmatter parsing (unknown backlink predicates).
pub backlink_diagnostics: Vec<BacklinkDiagnostic>,
/// Frontmatter parse diagnostics (partial recovery — file is still indexed).
pub parse_diagnostics: Vec<ParseDiagnostic>,
/// Cached structural diagnostics (`structural::collect` output) for this
/// file.
///
/// Structural diagnostics are file-local: they depend only on this file's
/// tree plus workspace *membership* (the bare-path "refers to an existing
/// file" check reads the file set). The cache is refreshed when this file
/// is reparsed and, on a membership change, for every file — so the
/// diagnostic collectors read it directly instead of re-walking every
/// cached tree on each sync (issue 013 — stage 2).
pub structural: Vec<Diagnostic>,
/// Cached suppression ledger entry for this file (issue 036, decision 012):
/// what each suppression source (literal frontmatter exceptions, count-keys)
/// actually suppressed, by severity. Refreshed alongside `structural` from
/// the same `structural::collect_with_suppressions` pass; the CLI lint loop
/// aggregates it into the workspace ledger. The LSP never reads it.
pub suppressions: structural::FileSuppressions,
/// Cached extracted headings (`Tree::headings()` output, with precomputed
/// github/gitlab/vscode slugs) for this file.
///
/// Unlike `structural`, which also reads workspace membership, this is a
/// pure function of this file's own tree — so it is built directly in the
/// parse path and refreshed exactly when the file reparses. Fragment
/// validation reads it instead of re-deriving a linked document's headings
/// once per `file.md#heading` reference (issue 013 — ticket perf 06).
pub headings: Vec<block::Heading>,
/// Cached extracted links (`Tree::links()` output) for this file, classified
/// against its own workspace-relative path.
///
/// Like `headings`, a pure function of this file's tree and path, rebuilt
/// only on reparse. The forward-link, backlink, connectivity, and
/// reciprocal-link validators read it instead of re-walking and
/// re-classifying every link on each sync (ticket perf 06).
pub links: Vec<block::Link>,
/// Cached explicit in-page anchor targets (`Tree::anchors()` output) —
/// `id`/`name` values harvested from this file's raw-HTML `<a>` tags.
///
/// Like `headings`/`links`, a pure function of this file's own tree, rebuilt
/// only on reparse. Same-document fragment validation resolves `[…](#x)`
/// against this set in addition to heading slugs, so an explicit
/// `<a id="x"></a>` / `<a name="x">` anchor is a valid `#x` target (issue
/// 025).
pub anchors: Vec<block::Anchor>,
/// Cached byte-offset ↔ LSP-position map for this file's source.
///
/// Built from `tree.source()` once per parse, so it refreshes exactly when
/// the file reparses — like `headings`/`links`, a pure function of this
/// file's own text. Diagnostic materialization routes its byte→UTF-16
/// position conversion through it instead of re-walking the source per
/// diagnostic, and the inverse direction feeds the future incremental
/// text-sync path (ticket perf 01).
pub line_index: LineIndex,
}
/// Parsed frontmatter from a markdown document.
#[derive(Debug)]
pub struct Frontmatter {
/// Byte range of the entire frontmatter block (including `---` delimiters).
pub byte_range: Range<usize>,
/// 1-based line of the opening `---`.
pub start_line: usize,
/// 1-based line of the closing `---`.
pub end_line: usize,
/// Parsed backlinks: backlink label → list of relative file paths. The
/// label is any known predicate — an inverse value or a forward label
/// (decision 008).
pub backlinks: HashMap<String, Vec<String>>,
/// Parsed `exceptions` block (issue 031, decision 011): per-reference,
/// reconciled suppressions over the path-shaped lints. Consumed by the
/// structural pass, which suppresses a matching live diagnostic and flags an
/// exception that matches none as unused.
pub exceptions: fm::Exceptions,
}
/// A diagnostic about a backlink predicate issue.
#[derive(Debug, PartialEq, Eq)]
pub struct BacklinkDiagnostic {
/// 1-based line number of the predicate key in the source file.
pub line: usize,
/// The unknown backlink predicate (known in neither direction).
pub predicate: String,
}
/// A parse diagnostic from frontmatter.
#[derive(Debug)]
pub struct ParseDiagnostic {
/// 1-based line number.
pub line: usize,
/// Severity level.
pub severity: fm::FmSeverity,
/// Human-readable message.
pub message: String,
}
/// In-memory index of all markdown files in a workspace.
#[derive(Debug)]
pub struct Workspace {
/// Absolute path to the workspace root directory.
root: PathBuf,
/// Configuration loaded from the workspace.
config: Config,
/// Error from loading `.lattice.toml`, if any. When set, defaults were used.
config_error: Option<ConfigError>,
/// Whether a `.lattice.toml` file was found in the workspace root.
has_config: bool,
/// Parsed file data, keyed by workspace-relative path.
files: BTreeMap<PathBuf, FileData>,
}
impl Workspace {
/// Scan a workspace starting from `start`, discovering and parsing all
/// markdown files.
///
/// The workspace root is determined by walking up from `start` looking for
/// `.lattice.toml`, then `.git`. Falls back to `start` itself (or its
/// parent directory if `start` is a file).
///
/// # Errors
///
/// Returns [`WorkspaceError::NoRoot`] if the starting path cannot be
/// resolved to a directory. Individual file read errors are collected
/// but do not abort the scan.
pub fn scan(start: &Path) -> Result<Self, WorkspaceError> {
// Absolutize `start` before root discovery. A bare single-component
// relative path (`archive`) has `Path::parent() == Some("")` — an empty
// path — so the walk-up loop in `find_workspace_root` would step to `""`,
// match `.lattice.toml`/`.git` relative to the process CWD, and return an
// empty root that `discover_markdown_files` walks to zero files (a silent
// false-clean — issue 024). Canonicalizing here makes every spelling
// (`archive`, `archive/`, `tickets/misc`, `./archive/`, the absolute
// form) resolve to the same absolute root, and matches the canonicalized
// form `lint::scope_relative_to_root` strips the scope against, so
// discovery and scoping stay consistent. The scan path must exist on disk
// for the lint to be meaningful, so canonicalize is safe; on failure we
// fall back to `start` unchanged so behavior never regresses below the
// pre-fix state.
let start = std::fs::canonicalize(start).unwrap_or_else(|_| start.to_path_buf());
let start = start.as_path();
let root = find_workspace_root(start).ok_or_else(|| WorkspaceError::NoRoot {
start: start.to_path_buf(),
})?;
let has_config = root.join(".lattice.toml").is_file();
let (config, config_error) = match Config::load(&root) {
Ok(c) => (c, None),
Err(e) => {
tracing::warn!(root = %root.display(), "config error, using defaults: {e}");
(Config::default(), Some(e))
}
};
let md_paths = discover_markdown_files(&root);
let mut files = BTreeMap::new();
for abs_path in md_paths {
let rel_path = abs_path
.strip_prefix(&root)
.unwrap_or(&abs_path)
.to_path_buf();
match parse_file(&abs_path, &rel_path, &config) {
Ok(data) => {
files.insert(rel_path, data);
}
Err(e) => {
tracing::warn!(path = %rel_path.display(), "failed to read file: {e}");
}
}
}
let mut workspace = Self {
root,
config,
config_error,
has_config,
files,
};
// Membership is final after the scan loop, so structural caches can be
// computed for every file now (bare-path existence sees the full set).
workspace.recompute_all_structural();
Ok(workspace)
}
/// Re-parse a single file and update the workspace index.
///
/// `rel_path` must be relative to the workspace root. If the file no
/// longer exists, it is removed from the index.
///
/// # Errors
///
/// Returns [`WorkspaceError::Read`] if the file exists but cannot be read.
pub fn update(&mut self, rel_path: &Path) -> Result<(), WorkspaceError> {
let abs_path = self.root.join(rel_path);
if !abs_path.is_file() {
if self.files.remove(rel_path).is_some() {
self.recompute_all_structural();
}
return Ok(());
}
let membership_changed = !self.files.contains_key(rel_path);
match parse_file(&abs_path, rel_path, &self.config) {
Ok(data) => {
self.files.insert(rel_path.to_path_buf(), data);
self.refresh_structural_after_update(rel_path, membership_changed);
}
Err(e) => {
if self.files.remove(rel_path).is_some() {
self.recompute_all_structural();
}
return Err(WorkspaceError::Read {
path: rel_path.to_path_buf(),
source: e,
});
}
}
Ok(())
}
/// Update the index for a file using in-memory content.
///
/// `rel_path` must be relative to the workspace root. The content is
/// parsed directly without reading from disk, which is used by the LSP
/// server for unsaved editor buffers.
pub fn update_content(&mut self, rel_path: &Path, content: &str) {
let membership_changed = !self.files.contains_key(rel_path);
let data = parse_content(content, rel_path, &self.config);
self.files.insert(rel_path.to_path_buf(), data);
self.refresh_structural_after_update(rel_path, membership_changed);
}
/// Refresh the structural cache after `rel_path` was (re)parsed.
///
/// An edit that does not change membership only invalidates the edited
/// file's cache. A membership change (a file added or removed) can flip a
/// bare-path existence answer in *any* file, so it forces a full recompute.
fn refresh_structural_after_update(&mut self, rel_path: &Path, membership_changed: bool) {
if membership_changed {
self.recompute_all_structural();
} else {
self.recompute_structural(rel_path);
}
}
/// Recompute and cache the structural diagnostics for a single indexed
/// file from its cached tree and the current workspace membership. No-op if
/// the path is not indexed.
fn recompute_structural(&mut self, rel_path: &Path) {
let Some(file_data) = self.files.get(rel_path) else {
return;
};
let file_exists = |target: &Path| self.files.contains_key(target);
// External-namespace (`{Name}/…`) references resolve existence-only
// against the configured alias directories, which live *outside* the
// workspace index — so this oracle `stat`s the real filesystem rather
// than consulting workspace membership (issue 030, decision 010). It
// only ever `stat`s; the aliased repository is never read or indexed.
let external_exists = |path: &Path| path.exists();
// The `exceptions` frontmatter block (issue 031) is per-file and lives
// in this file's own frontmatter; an empty default applies when there
// is no frontmatter. The structural pass suppresses a matching live
// diagnostic and reconciles the rest as unused.
let empty_exceptions = fm::Exceptions::default();
let exceptions = file_data
.frontmatter
.as_ref()
.map_or(&empty_exceptions, |fm| &fm.exceptions);
// Resolve this file's effective 028-family policy by applying any
// matching `[[override]]` level entries (issue 037, decision 012). Only
// an override that sets `stale_references` / `bare_paths` as a *level*
// changes the per-file collect (a `disabled` freeze, or a raise such as
// `warn` → `deny`); an `{ expect = N }` aggregate leaves the per-file
// level alone and is reconciled later by the lint loop's expect pass.
// The clone is taken only when an override actually moves this file's
// policy — the common no-override file reuses the base config directly.
let effective_policy = self.config.effective_policy(rel_path);
let effective_config;
let config: &Config = if effective_policy == self.config.policy {
&self.config
} else {
effective_config = Config {
policy: effective_policy,
..self.config.clone()
};
&effective_config
};
let (diagnostics, suppressions) = structural::collect_with_suppressions(
&file_data.tree,
rel_path,
config,
&file_exists,
&external_exists,
exceptions,
);
if let Some(file_data) = self.files.get_mut(rel_path) {
file_data.structural = diagnostics;
file_data.suppressions = suppressions;
}
}
/// Recompute the structural cache for every indexed file.
///
/// Required on a membership change: the bare-path "refers to an existing
/// file" check reads the full file set, so adding or removing one file can
/// change structural diagnostics on any other file.
fn recompute_all_structural(&mut self) {
let paths: Vec<PathBuf> = self.files.keys().cloned().collect();
for path in &paths {
self.recompute_structural(path);
}
}
/// The absolute path to the workspace root.
pub fn root(&self) -> &Path {
&self.root
}
/// The workspace configuration.
pub fn config(&self) -> &Config {
&self.config
}
/// Error from loading `.lattice.toml`, if any.
///
/// When this is `Some`, the workspace is using default configuration.
/// The LSP should publish this as a diagnostic on the config file;
/// the CLI should treat it as a hard error.
pub fn config_error(&self) -> Option<&ConfigError> {
self.config_error.as_ref()
}
/// Whether a `.lattice.toml` file was found in the workspace root.
///
/// This gates only the **graph** diagnostic tier — forward-link
/// existence, backlink reconciliation, and unknown predicates — which is
/// active only when this returns `true`. The **structural** tier (heading
/// hierarchy, trailing whitespace, HTML and code-block well-formedness,
/// bare paths, etc.) always runs via `structural::collect`, so
/// `has_config() == false` does not mean Lattice is silent.
pub fn has_config(&self) -> bool {
self.has_config
}
/// Parsed file data for all successfully parsed files.
pub fn files(&self) -> &BTreeMap<PathBuf, FileData> {
&self.files
}
/// Get parsed data for a specific file by its workspace-relative path.
pub fn file(&self, rel_path: &Path) -> Option<&FileData> {
self.files.get(rel_path)
}
}
// --- Internal helpers ---
/// Parse a single markdown file from disk into [`FileData`].
fn parse_file(
abs_path: &Path,
rel_path: &Path,
config: &Config,
) -> Result<FileData, std::io::Error> {
let content = std::fs::read_to_string(abs_path)?;
Ok(parse_content(&content, rel_path, config))
}
/// Parse markdown content into [`FileData`].
///
/// Always succeeds — YAML parse errors become diagnostics instead of
/// hard failures, enabling partial frontmatter recovery.
#[must_use]
pub fn parse_content(content: &str, rel_path: &Path, config: &Config) -> FileData {
// Try YAML (`---`), then TOML (`+++`), then JSON (`{`) frontmatter.
let (fm_block, fm_syntax) = yaml::parse_frontmatter_block(content).map_or_else(
|| {
toml::parse_frontmatter_block(content).map_or_else(
|| {
json::parse_frontmatter_block(content)
.map_or((None, Syntax::Yaml), |block| (Some(block), Syntax::Json))
},
|block| (Some(block), Syntax::Toml),
)
},
|block| (Some(block), Syntax::Yaml),
);
// Build the tree (block structure + inline elements).
let frontmatter_span = fm_block.as_ref().map(|b| b.span);
let frontmatter_entries = fm_block.as_ref().map(|b| b.entries.as_slice());
let tree =
block::parse_tree_with_entries(content, frontmatter_span, fm_syntax, frontmatter_entries);
// Extract frontmatter data.
let mut frontmatter = None;
let mut backlink_diagnostics = Vec::new();
let mut parse_diagnostics = Vec::new();
if let Some(block) = &fm_block {
// Collect parse diagnostics (partial recovery).
for diag in &block.diagnostics {
let line = byte_offset_to_line(content, diag.span.start);
parse_diagnostics.push(ParseDiagnostic {
line,
severity: diag.severity,
message: diag.message.clone(),
});
}
let byte_range: Range<usize> = block.span.into();
let start_line = 1;
let end_byte = byte_range.end.min(content.len());
// Step back one byte off the span end (which includes the closing
// delimiter's line ending) so we land on the delimiter line itself
// rather than the line after it. Recognizes all line-ending styles.
let end_line = byte_offset_to_line(content, end_byte.saturating_sub(1));
let backlinks = fm::extract_backlinks(block, content);
let exceptions = fm::extract_exceptions(block, content);
// Validate backlink keys. A key may be any known predicate — an
// inverse value or a forward label (decision 008) — since a forward
// link may now derive a forward-labelled backlink on its target.
for predicate in backlinks.keys() {
if !config.is_known_predicate(predicate) {
let line = fm::find_predicate_line(block, predicate, content);
backlink_diagnostics.push(BacklinkDiagnostic {
line,
predicate: predicate.clone(),
});
}
}
frontmatter = Some(Frontmatter {
byte_range,
start_line,
end_line,
backlinks,
exceptions,
});
}
// Collect parse diagnostics from the tree itself.
for diag in tree.diagnostics() {
let _ = &rel_path; // reserved for future per-file filtering
let _ = diag;
}
// Extract this file's headings (with precomputed slugs) and links once, here
// in the parse path, so the graph validators read a cached vector instead of
// re-deriving it — `headings()` per fragment-link, `links()` per file per
// sync (ticket perf 06). Both are pure functions of this file's own tree and
// path, so the cache refreshes exactly when the file reparses; no
// post-insertion workspace step is needed, unlike `structural`.
let headings = tree.headings();
let links = tree.links(rel_path);
// Explicit in-page anchors (`<a id>` / `<a name>`) — cached like
// headings/links so same-document fragment validation resolves `[…](#x)`
// against explicit anchors as well as heading slugs (issue 025).
let anchors = tree.anchors();
// Build the byte↔position index from the same source the tree carries, so it
// refreshes exactly when the file reparses (ticket perf 01).
let line_index = LineIndex::new(content);
FileData {
tree,
frontmatter,
backlink_diagnostics,
parse_diagnostics,
// Left empty here — `structural::collect` needs workspace membership
// (for bare-path existence) that a standalone parse cannot know. The
// workspace fills it (and the suppression ledger) via
// `recompute_structural` after insertion.
structural: Vec::new(),
suppressions: structural::FileSuppressions::default(),
headings,
links,
anchors,
line_index,
}
}
/// Convert a byte offset to a 1-based line number.
///
/// Recognizes `\n`, `\r\n`, and bare `\r` line endings (delegates to the
/// crate-wide counter in [`crate::fm`]).
fn byte_offset_to_line(content: &str, offset: usize) -> usize {
fm::byte_offset_to_line(content, offset)
}
/// Walk up from `start` looking for `.lattice.toml` or `.git`.
///
/// Returns the directory containing the first marker found. Falls back to the
/// starting directory itself.
fn find_workspace_root(start: &Path) -> Option<PathBuf> {
let dir = if start.is_file() {
start.parent()?.to_path_buf()
} else if start.is_dir() {
start.to_path_buf()
} else {
return None;
};
let mut current = dir.as_path();
loop {
if current.join(".lattice.toml").is_file() || current.join(".git").exists() {
return Some(current.to_path_buf());
}
match current.parent() {
Some(parent) if parent != current => current = parent,
_ => break,
}
}
// Fall back to the starting directory.
Some(dir)
}
/// Discover all `.md` files under `root`, respecting `.gitignore`.
fn discover_markdown_files(root: &Path) -> Vec<PathBuf> {
let mut paths = Vec::new();
let walker = ignore::WalkBuilder::new(root)
.standard_filters(true)
.build();
for entry in walker {
let Ok(entry) = entry else { continue };
let path = entry.path();
if path.is_file()
&& path
.extension()
.is_some_and(|ext| ext.eq_ignore_ascii_case("md"))
{
paths.push(path.to_path_buf());
}
}
paths
}
#[cfg(test)]
#[allow(clippy::expect_used, reason = "tests use expect for clarity")]
mod tests {
use std::fs;
use std::sync::Mutex;
use super::*;
/// Serializes tests that mutate the process-global current working
/// directory. `std::env::set_current_dir` affects the whole process, so two
/// CWD-mutating tests running concurrently (plain `cargo test` shares the
/// process; `cargo nextest` does not) would race. The lock is intentionally
/// poison-tolerant: a panic in one CWD test must not wedge the others.
static CWD_LOCK: Mutex<()> = Mutex::new(());
/// Create a temp directory with `.git` marker and optional files.
fn workspace_with_files(files: &[(&str, &str)]) -> tempfile::TempDir {
let dir = tempfile::tempdir().expect("create temp dir");
fs::create_dir(dir.path().join(".git")).expect("create .git");
for (path, content) in files {
let full = dir.path().join(path);
if let Some(parent) = full.parent() {
fs::create_dir_all(parent).expect("create parent dirs");
}
fs::write(&full, content).expect("write file");
}
dir
}
#[test]
fn discovers_markdown_files() {
let dir = workspace_with_files(&[
("README.md", "# Root"),
("docs/guide.md", "# Guide"),
("src/main.rs", "fn main() {}"),
]);
let ws = Workspace::scan(dir.path()).expect("scan should succeed");
assert_eq!(ws.files().len(), 2, "should find two .md files");
assert!(
ws.file(Path::new("README.md")).is_some(),
"should find README.md"
);
assert!(
ws.file(Path::new("docs/guide.md")).is_some(),
"should find docs/guide.md"
);
}
#[test]
fn respects_gitignore() {
let dir = workspace_with_files(&[
("README.md", "# Root"),
(".gitignore", "build/\n"),
("build/output.md", "# Should be ignored"),
]);
let ws = Workspace::scan(dir.path()).expect("scan should succeed");
assert_eq!(
ws.files().len(),
1,
"should find only README.md, not build/"
);
assert!(
ws.file(Path::new("README.md")).is_some(),
"should find README.md"
);
}
#[test]
fn workspace_root_from_git() {
let dir = workspace_with_files(&[("README.md", "# Root")]);
let ws = Workspace::scan(dir.path()).expect("scan should succeed");
assert_eq!(
ws.root(),
dir.path(),
"root should be the directory with .git"
);
}
#[test]
fn workspace_root_from_lattice_toml() {
let dir = workspace_with_files(&[(".lattice.toml", ""), ("README.md", "# Root")]);
// Remove .git so .lattice.toml is the marker.
fs::remove_dir(dir.path().join(".git")).expect("remove .git");
let ws = Workspace::scan(dir.path()).expect("scan should succeed");
assert_eq!(
ws.root(),
dir.path(),
"root should be the directory with .lattice.toml"
);
}
#[test]
fn workspace_root_from_subdirectory() {
let dir = workspace_with_files(&[("README.md", "# Root"), ("docs/guide.md", "# Guide")]);
let ws = Workspace::scan(&dir.path().join("docs")).expect("scan should succeed");
assert_eq!(
ws.root(),
dir.path(),
"root should be found by walking up to .git"
);
}
#[test]
fn bare_relative_subdir_from_cwd_discovers_root_and_files() {
// Issue 024 (reopened): a bare single-component relative directory
// (`docs`, no leading `./`) used to make `find_workspace_root` walk up to
// the empty path `""` — which `join`s relative to the process CWD and
// matched `.git`/`.lattice.toml`, returning an empty root that
// discovers zero files (a silent false-clean). With `start` absolutized
// the bare-relative form must discover the real root and its files.
//
// This must run with the process CWD at the fixture root and lint a path
// genuinely relative to CWD — the existing `workspace_root_from_subdirectory`
// joins onto an absolute temp path, so it never exercised this branch.
let dir = workspace_with_files(&[("README.md", "# Root"), ("docs/guide.md", "# Guide")]);
let _guard = CWD_LOCK
.lock()
.unwrap_or_else(std::sync::PoisonError::into_inner);
let original = std::env::current_dir().expect("read original cwd");
std::env::set_current_dir(dir.path()).expect("chdir to fixture root");
// `Path::new("docs").parent()` is `Some("")`, the empty-path trap.
let scanned = Workspace::scan(Path::new("docs"));
std::env::set_current_dir(&original).expect("restore original cwd");
let ws = scanned.expect("bare-relative scan should succeed");
assert!(
ws.file(Path::new("docs/guide.md")).is_some(),
"bare-relative `docs` must discover the file under it, not zero files"
);
assert_eq!(
ws.files().len(),
2,
"bare-relative scan must walk the real tree, not an empty path"
);
}
#[test]
fn parses_links_and_headings() {
let dir =
workspace_with_files(&[("doc.md", "# Title\n\n[link](other.md \"references\")\n")]);
let ws = Workspace::scan(dir.path()).expect("scan should succeed");
let data = ws.file(Path::new("doc.md")).expect("should find doc.md");
let headings = data.tree.headings();
let links = data.tree.links(Path::new("doc.md"));
assert_eq!(headings.len(), 1, "should have one heading");
assert_eq!(links.len(), 1, "should have one link");
}
#[test]
fn parses_frontmatter_backlinks() {
let dir = workspace_with_files(&[(
"target.md",
"---\nbacklinks:\n superseded_by:\n - source.md\n---\n# Target\n",
)]);
let ws = Workspace::scan(dir.path()).expect("scan should succeed");
let data = ws
.file(Path::new("target.md"))
.expect("should find target.md");
let fm = data.frontmatter.as_ref().expect("should have frontmatter");
assert_eq!(
fm.backlinks.get("superseded_by"),
Some(&vec!["source.md".to_string()]),
"should parse backlinks"
);
assert!(
data.backlink_diagnostics.is_empty(),
"known predicate should produce no diagnostics"
);
}
#[test]
fn frontmatter_exception_suppresses_structural_diagnostic() {
// End-to-end (issue 031): an `exceptions.stale_references` entry parsed
// from a file's frontmatter is threaded into the structural pass and
// suppresses the matching dangling-reference diagnostic.
let dir = workspace_with_files(&[(
"doc.md",
"---\nexceptions:\n stale_references:\n \"gone.md\": \"deliberately dead\"\n---\nSee `gone.md` here.\n",
)]);
let ws = Workspace::scan(dir.path()).expect("scan should succeed");
let data = ws.file(Path::new("doc.md")).expect("should find doc.md");
let fm = data.frontmatter.as_ref().expect("should have frontmatter");
assert_eq!(
fm.exceptions.stale_references.len(),
1,
"the exceptions block parses into frontmatter"
);
assert!(
!data
.structural
.iter()
.any(|d| d.message.contains("stale reference")),
"the exception suppresses the stale-reference diagnostic: {:?}",
data.structural
);
}
#[test]
fn frontmatter_error_partial_recovery() {
let dir = workspace_with_files(&[("bad.md", "---\n: broken: yaml: [[\n---\n# Bad\n")]);
let ws = Workspace::scan(dir.path()).expect("scan should succeed");
// With partial recovery, the file should still be indexed.
let data = ws
.file(Path::new("bad.md"))
.expect("file should be indexed");
assert!(
!data.parse_diagnostics.is_empty(),
"should have parse diagnostics"
);
}
#[test]
fn incremental_update_adds_file() {
let dir = workspace_with_files(&[("README.md", "# Root")]);
let mut ws = Workspace::scan(dir.path()).expect("scan should succeed");
assert_eq!(ws.files().len(), 1, "should start with one file");
fs::write(dir.path().join("new.md"), "# New").expect("write new file");
ws.update(Path::new("new.md"))
.expect("update should succeed");
assert_eq!(ws.files().len(), 2, "should have two files after update");
assert!(ws.file(Path::new("new.md")).is_some(), "should find new.md");
}
#[test]
fn incremental_update_modifies_file() {
let dir = workspace_with_files(&[("doc.md", "# Original")]);
let mut ws = Workspace::scan(dir.path()).expect("scan should succeed");
let headings = ws
.file(Path::new("doc.md"))
.expect("should find doc.md")
.tree
.headings();
assert_eq!(headings.len(), 1, "should have one heading");
assert_eq!(headings[0].text, "Original", "heading should be Original");
fs::write(dir.path().join("doc.md"), "# Updated\n\n## Section\n")
.expect("overwrite doc.md");
ws.update(Path::new("doc.md"))
.expect("update should succeed");
let headings = ws
.file(Path::new("doc.md"))
.expect("should find doc.md")
.tree
.headings();
assert_eq!(headings.len(), 2, "should have two headings after update");
assert_eq!(
headings[0].text, "Updated",
"first heading should be Updated"
);
}
#[test]
fn incremental_update_removes_deleted_file() {
let dir = workspace_with_files(&[("a.md", "# A"), ("b.md", "# B")]);
let mut ws = Workspace::scan(dir.path()).expect("scan should succeed");
assert_eq!(ws.files().len(), 2, "should start with two files");
fs::remove_file(dir.path().join("b.md")).expect("delete b.md");
ws.update(Path::new("b.md")).expect("update should succeed");
assert_eq!(ws.files().len(), 1, "should have one file after deletion");
assert!(ws.file(Path::new("b.md")).is_none(), "b.md should be gone");
}
#[test]
fn incremental_update_clears_previous_error() {
let dir = workspace_with_files(&[("doc.md", "---\n: broken: yaml\n---\n# Bad\n")]);
let mut ws = Workspace::scan(dir.path()).expect("scan should succeed");
// With partial recovery, the file is now indexed with parse diagnostics.
let data = ws.file(Path::new("doc.md")).expect("file should exist");
assert!(
!data.parse_diagnostics.is_empty(),
"should have parse diagnostics initially"
);
fs::write(dir.path().join("doc.md"), "# Fixed\n").expect("fix doc.md");
ws.update(Path::new("doc.md"))
.expect("update should succeed");
let data = ws.file(Path::new("doc.md")).expect("file should be parsed");
assert!(
data.parse_diagnostics.is_empty(),
"parse diagnostics should be cleared"
);
}
#[test]
fn empty_workspace() {
let dir = workspace_with_files(&[]);
let ws = Workspace::scan(dir.path()).expect("scan should succeed");
assert!(ws.files().is_empty(), "should have no files");
}
#[test]
fn case_insensitive_md_extension() {
let dir = workspace_with_files(&[("lower.md", "# Lower"), ("upper.MD", "# Upper")]);
let ws = Workspace::scan(dir.path()).expect("scan should succeed");
assert_eq!(ws.files().len(), 2, "should find both .md and .MD files");
}
#[test]
fn update_content_replaces_file_data() {
let dir = workspace_with_files(&[("doc.md", "[link](other.md)\n")]);
let mut ws = Workspace::scan(dir.path()).expect("scan should succeed");
let links = ws
.file(Path::new("doc.md"))
.expect("file should exist")
.tree
.links(Path::new("doc.md"));
assert_eq!(links.len(), 1, "initial parse should find one link");
ws.update_content(Path::new("doc.md"), "# No links here\n");
let links = ws
.file(Path::new("doc.md"))
.expect("file should still exist")
.tree
.links(Path::new("doc.md"));
assert!(links.is_empty(), "updated content should have no links");
}
#[test]
fn update_content_adds_new_file() {
let dir = workspace_with_files(&[("a.md", "# A\n")]);
let mut ws = Workspace::scan(dir.path()).expect("scan should succeed");
assert_eq!(ws.files().len(), 1, "should start with one file");
ws.update_content(Path::new("b.md"), "# B\n");
assert_eq!(ws.files().len(), 2, "should have two files after adding");
assert!(
ws.file(Path::new("b.md")).is_some(),
"new file should be indexed"
);
}
#[test]
fn broken_config_surfaces_error() {
let dir = workspace_with_files(&[
(".lattice.toml", "not valid toml {{{}}}"),
("README.md", "# Root"),
]);
let ws = Workspace::scan(dir.path()).expect("scan should still succeed");
assert!(
ws.config_error().is_some(),
"broken config should be surfaced"
);
assert!(
ws.file(Path::new("README.md")).is_some(),
"files should still be parsed with defaults"
);
}
#[test]
fn has_config_true_when_lattice_toml_present() {
let dir = workspace_with_files(&[(".lattice.toml", ""), ("README.md", "# Root")]);
let ws = Workspace::scan(dir.path()).expect("scan should succeed");
assert!(ws.has_config(), "should detect .lattice.toml");
}
#[test]
fn has_config_false_when_no_lattice_toml() {
let dir = workspace_with_files(&[("README.md", "# Root")]);
let ws = Workspace::scan(dir.path()).expect("scan should succeed");
assert!(!ws.has_config(), "should detect absence of .lattice.toml");
}
#[test]
fn has_config_true_when_config_is_broken() {
let dir = workspace_with_files(&[
(".lattice.toml", "not valid toml {{{}}}"),
("README.md", "# Root"),
]);
let ws = Workspace::scan(dir.path()).expect("scan should succeed");
assert!(ws.has_config(), "broken config still means user opted in");
}
// -- Stage-2 structural cache (issue 013) --
/// Stage-2 differential invariant: every file's cached `structural` vector
/// must equal a from-scratch `structural::collect` for that file. A drift
/// here is the "silent stale diagnostic" failure mode the cache risks.
fn assert_cache_matches_recompute(ws: &Workspace) {
for (path, file_data) in ws.files() {
let file_exists = |target: &Path| ws.file(target).is_some();
let external_exists = |p: &Path| p.exists();
let empty_exceptions = fm::Exceptions::default();
let exceptions = file_data
.frontmatter
.as_ref()
.map_or(&empty_exceptions, |fm| &fm.exceptions);
let fresh = structural::collect(
&file_data.tree,
path,
ws.config(),
&file_exists,
&external_exists,
exceptions,
);
assert_eq!(
file_data.structural,
fresh,
"cached structural for {} drifted from a fresh collect",
path.display()
);
}
}
/// Severity of the make-it-a-link bare-path diagnostic on `path`, if any.
fn bare_path_severity(ws: &Workspace, path: &Path) -> Option<crate::validation::Severity> {
ws.file(path)?
.structural
.iter()
.find(|d| d.message.contains("convert to a markdown link"))
.map(|d| d.severity)
}
/// Severity of the stale-reference diagnostic on `path`, if any.
fn stale_reference_severity(
ws: &Workspace,
path: &Path,
) -> Option<crate::validation::Severity> {
ws.file(path)?
.structural
.iter()
.find(|d| d.message.contains("stale reference"))
.map(|d| d.severity)
}
#[test]
fn structural_cache_matches_recompute_across_mutations() {
let dir = workspace_with_files(&[
("a.md", "See docs/page.md for details.\ntrailing \n"),
("docs/page.md", "# Page\n"),
]);
let mut ws = Workspace::scan(dir.path()).expect("scan should succeed");
assert_cache_matches_recompute(&ws);
assert!(
!ws.file(Path::new("a.md"))
.expect("a.md indexed")
.structural
.is_empty(),
"fixture should exercise real structural diagnostics"
);
// Content edit, membership unchanged.
ws.update_content(
Path::new("a.md"),
"# Clean\n\nstill referencing docs/page.md.\n",
);
assert_cache_matches_recompute(&ws);
// Add a file: membership grows.
ws.update_content(Path::new("docs/extra.md"), "# Extra\n");
assert_cache_matches_recompute(&ws);
// Remove a file from disk: membership shrinks.
fs::remove_file(dir.path().join("docs/page.md")).expect("delete page.md");
ws.update(Path::new("docs/page.md"))
.expect("update should succeed");
assert_cache_matches_recompute(&ws);
}
#[test]
fn bare_path_severity_flips_when_target_added() {
// a.md references docs/page.md as a bare path. With the target absent it
// is a dangling reference — the stale-reference warning (issue 028, no
// make-it-a-link nudge yet); adding the target must flip a.md's cached
// diagnostic to the make-it-a-link warning even though a.md itself never
// changed — the membership recompute path.
let dir = workspace_with_files(&[("a.md", "See docs/page.md for details.\n")]);
let mut ws = Workspace::scan(dir.path()).expect("scan should succeed");
assert_eq!(
stale_reference_severity(&ws, Path::new("a.md")),
Some(crate::validation::Severity::Warning),
"absent target should be the stale-reference warning"
);
assert_eq!(
bare_path_severity(&ws, Path::new("a.md")),
None,
"absent target draws no make-it-a-link nudge"
);
ws.update_content(Path::new("docs/page.md"), "# Page\n");
assert_eq!(
bare_path_severity(&ws, Path::new("a.md")),
Some(crate::validation::Severity::Warning),
"adding the target should flip the source's bare-path to the make-it-a-link warning"
);
assert_eq!(
stale_reference_severity(&ws, Path::new("a.md")),
None,
"a resolving target draws no stale-reference warning"
);
}
// -- Parsed-extraction cache (ticket perf 06) --
/// Differential invariant: every file's cached `headings`/`links` vector
/// must equal a fresh `Tree::headings()`/`Tree::links()` extraction. A drift
/// here is the "silent stale extraction" failure mode the cache risks — the
/// stage-2 spirit applied to fragment and forward-link inputs.
fn assert_extraction_cache_matches_recompute(ws: &Workspace) {
for (path, file_data) in ws.files() {
assert_eq!(
file_data.headings,
file_data.tree.headings(),
"cached headings for {} drifted from a fresh extraction",
path.display()
);
assert_eq!(
file_data.links,
file_data.tree.links(path),
"cached links for {} drifted from a fresh extraction",
path.display()
);
assert_eq!(
file_data.anchors,
file_data.tree.anchors(),
"cached anchors for {} drifted from a fresh extraction",
path.display()
);
}
}
#[test]
fn extraction_cache_matches_recompute_across_mutations() {
let dir = workspace_with_files(&[
("a.md", "# A\n\n[to b](b.md \"references\")\n\n## Section\n"),
("b.md", "# B\n"),
]);
let mut ws = Workspace::scan(dir.path()).expect("scan should succeed");
assert_extraction_cache_matches_recompute(&ws);
assert!(
!ws.file(Path::new("a.md"))
.expect("a.md indexed")
.links
.is_empty(),
"fixture should exercise real cached links"
);
assert!(
!ws.file(Path::new("a.md"))
.expect("a.md indexed")
.headings
.is_empty(),
"fixture should exercise real cached headings"
);
// Content edit, membership unchanged.
ws.update_content(
Path::new("a.md"),
"# A renamed\n\n[to b](b.md#b \"references\")\n",
);
assert_extraction_cache_matches_recompute(&ws);
// Add a file: membership grows.
ws.update_content(Path::new("c.md"), "# C\n\n[to a](a.md \"references\")\n");
assert_extraction_cache_matches_recompute(&ws);
// Remove a file from disk: membership shrinks.
fs::remove_file(dir.path().join("b.md")).expect("delete b.md");
ws.update(Path::new("b.md")).expect("update should succeed");
assert_extraction_cache_matches_recompute(&ws);
}
// -- Line index cache (ticket perf 01) --
/// Differential invariant: every file's cached `line_index` must equal a
/// fresh index built from the same source the tree carries — the index is a
/// pure function of the file's text, so any drift is a stale cache.
fn assert_line_index_cache_matches_recompute(ws: &Workspace) {
for (path, file_data) in ws.files() {
assert_eq!(
file_data.line_index,
LineIndex::new(file_data.tree.source()),
"cached line index for {} drifted from a fresh build",
path.display()
);
}
}
#[test]
fn line_index_rebuilt_only_on_reparse() {
// Two files with deliberately different line shapes (CRLF vs LF, multi-
// byte content) so an index swap would be observable.
let dir = workspace_with_files(&[
("a.md", "# A\r\n\r\nfirst café line\r\n"),
("b.md", "# B\n\nsecond λ line\n"),
]);
let mut ws = Workspace::scan(dir.path()).expect("scan should succeed");
assert_line_index_cache_matches_recompute(&ws);
let a_before = ws
.file(Path::new("a.md"))
.expect("a.md indexed")
.line_index
.clone();
let b_before = ws
.file(Path::new("b.md"))
.expect("b.md indexed")
.line_index
.clone();
// Edit only a.md, changing its line structure.
ws.update_content(Path::new("a.md"), "# A renamed\n\nshorter\n");
assert_ne!(
ws.file(Path::new("a.md"))
.expect("a.md still indexed")
.line_index,
a_before,
"the edited file's index must be rebuilt from its new source"
);
assert_eq!(
ws.file(Path::new("b.md"))
.expect("b.md still indexed")
.line_index,
b_before,
"an unrelated file's index must be untouched by another file's edit"
);
assert_line_index_cache_matches_recompute(&ws);
}
#[test]
fn extraction_cache_rebuilt_only_on_reparse() {
// target.md owns headings; three sources each reference a fragment in
// it, so a from-scratch fragment pass would re-derive its headings three
// times. The cache must serve all three from one parse-time extraction.
let dir = workspace_with_files(&[
(".lattice.toml", ""),
("target.md", "# Alpha\n\n## Beta\n"),
("a.md", "[x](target.md#alpha \"references\")\n"),
("b.md", "[y](target.md#beta \"references\")\n"),
("c.md", "[z](target.md#alpha \"references\")\n"),
]);
let mut ws = Workspace::scan(dir.path()).expect("scan should succeed");
// A full forward-link/fragment validation reads the cache: it re-extracts
// nothing, no matter how many fragment-links point at target.md.
block::reset_extract_counts();
let _ = crate::validation::validate_forward_links(&ws);
assert_eq!(
block::headings_extract_count(),
0,
"fragment validation must read cached headings, not re-extract once per fragment-link"
);
assert_eq!(
block::links_extract_count(),
0,
"forward-link validation must read cached links, not re-extract once per file"
);
// Editing one file re-extracts exactly that file's headings/links once,
// leaving every other file's cache untouched.
let a_links_before = format!(
"{:?}",
ws.file(Path::new("a.md")).expect("a.md indexed").links
);
block::reset_extract_counts();
ws.update_content(Path::new("target.md"), "# Alpha\n\n## Gamma\n");
assert_eq!(
block::headings_extract_count(),
1,
"one reparse re-extracts headings exactly once, not once per other file"
);
assert_eq!(
block::links_extract_count(),
1,
"one reparse re-extracts links exactly once, not once per other file"
);
let a_links_after = format!(
"{:?}",
ws.file(Path::new("a.md")).expect("a.md indexed").links
);
assert_eq!(
a_links_before, a_links_after,
"editing target.md must not rebuild another file's link cache"
);
}
}