Skip to main content

vane_core/config/
loader.rs

1//! Filesystem scan: `<config_dir>/rules/*.json` → `Vec<RawRuleFile>`.
2//!
3//! Sub-directories, hidden files, and non-`.json` extensions are
4//! silently skipped — operators frequently leave editor swap files
5//! (`*.swp`), READMEs, or symlinked sub-directories alongside rule
6//! files, and surfacing those as errors would block startup on benign
7//! state.
8
9use std::fs;
10use std::path::Path;
11
12use crate::compile::merge::RawRuleFile;
13use crate::error::Error;
14use crate::preset::RuleEntry;
15use crate::rule::SourceInfo;
16
17/// Scan a directory for `*.json` rule files. Returns one
18/// [`RawRuleFile`] per discovered file with `path` populated from the
19/// on-disk filename. Order of the returned vector is unspecified — the
20/// merge stage sorts by `(order asc, path lex)` so the loader does not
21/// pre-sort.
22///
23/// # Errors
24/// Returns [`Error::compile`] when:
25/// - `rules_dir` does not exist (an empty directory is fine, but a
26///   missing one is operator error and should fail loud).
27/// - `rules_dir` exists but is not a directory.
28/// - any `.json` file fails to parse as `RawRuleFile`.
29///
30/// Returns [`Error::io`] for filesystem-level read failures (permission
31/// denied, broken symlink during traversal, etc.).
32pub fn scan_rules_dir(rules_dir: &Path) -> Result<Vec<RawRuleFile>, Error> {
33	if !rules_dir.exists() {
34		return Err(Error::compile(format!("rules directory not found: {}", rules_dir.display())));
35	}
36	if !rules_dir.is_dir() {
37		return Err(Error::compile(format!("rules path is not a directory: {}", rules_dir.display())));
38	}
39
40	let mut files = Vec::new();
41	let entries = fs::read_dir(rules_dir)
42		.map_err(|e| Error::io(format!("read_dir {}: {e}", rules_dir.display())))?;
43
44	for entry in entries {
45		let entry = entry.map_err(|e| Error::io(format!("dir entry: {e}")))?;
46		let path = entry.path();
47		if !path.is_file() {
48			continue;
49		}
50		if path.extension().and_then(|s| s.to_str()) != Some("json") {
51			continue;
52		}
53		let content =
54			fs::read_to_string(&path).map_err(|e| Error::io(format!("read {}: {e}", path.display())))?;
55		let mut file: RawRuleFile = serde_json::from_str(&content)
56			.map_err(|e| Error::compile(format!("parse {}: {e}", path.display())))?;
57		annotate_rule_source_lines(&content, &path, &mut file.rules);
58		file.path = path;
59		files.push(file);
60	}
61
62	Ok(files)
63}
64
65/// Walk the raw JSON text to find the start line of every entry in the
66/// top-level `rules` array, then stamp `(file, line)` onto each entry's
67/// `SourceInfo`. Without this, every rule starts life with an empty
68/// `SourceInfo`, and `source_prefix` in `core::compile::lower` collapses
69/// to a blank string — diagnostics show errors with no file or line.
70fn annotate_rule_source_lines(content: &str, path: &Path, entries: &mut [RuleEntry]) {
71	let starts = locate_rule_array_element_lines(content);
72	for (idx, entry) in entries.iter_mut().enumerate() {
73		let line = starts.get(idx).copied().unwrap_or(0);
74		let info = SourceInfo { file: path.to_path_buf(), line };
75		match entry {
76			RuleEntry::Raw(rule) => rule.source = info,
77			RuleEntry::Preset(inv) => inv.source = info,
78		}
79	}
80}
81
82/// Locate the starting 1-based line number of each top-level element in
83/// the file-level `"rules": [...]` array. Returns one entry per array
84/// element in source order. Heuristic-but-deterministic byte walk with
85/// depth/string/escape tracking — sufficient for the structured rule
86/// JSON the loader handles.
87fn locate_rule_array_element_lines(content: &str) -> Vec<u32> {
88	let bytes = content.as_bytes();
89	let mut out = Vec::new();
90	let Some(rules_key_pos) = find_top_level_key(content, "rules") else {
91		return out;
92	};
93
94	// Skip `"rules"` + `:` + whitespace to the opening `[`.
95	let mut i = rules_key_pos;
96	while i < bytes.len() && bytes[i] != b'[' {
97		i += 1;
98	}
99	if i >= bytes.len() {
100		return out;
101	}
102	i += 1;
103
104	let mut depth: i32 = 0;
105	let mut in_string = false;
106	let mut escape = false;
107	let mut element_started = false;
108
109	while i < bytes.len() {
110		let c = bytes[i];
111		if in_string {
112			if escape {
113				escape = false;
114			} else if c == b'\\' {
115				escape = true;
116			} else if c == b'"' {
117				in_string = false;
118			}
119			i += 1;
120			continue;
121		}
122		match c {
123			b'"' => {
124				if depth == 0 && !element_started {
125					out.push(line_at(content, i));
126					element_started = true;
127				}
128				in_string = true;
129			}
130			b'{' | b'[' => {
131				if depth == 0 && !element_started {
132					out.push(line_at(content, i));
133					element_started = true;
134				}
135				depth += 1;
136			}
137			b'}' | b']' => {
138				depth -= 1;
139				if depth < 0 {
140					return out;
141				}
142			}
143			b',' if depth == 0 => element_started = false,
144			b' ' | b'\t' | b'\r' | b'\n' => {}
145			_ => {
146				if depth == 0 && !element_started {
147					out.push(line_at(content, i));
148					element_started = true;
149				}
150			}
151		}
152		i += 1;
153	}
154	out
155}
156
157/// Return the byte offset just past the value side of a top-level key
158/// (depth-1) inside the outermost JSON object. Yields `None` if the key
159/// is not present at the file's top level.
160fn find_top_level_key(content: &str, key: &str) -> Option<usize> {
161	let bytes = content.as_bytes();
162	let mut i = 0;
163	// Skip leading whitespace to the outermost '{'.
164	while i < bytes.len() && bytes[i].is_ascii_whitespace() {
165		i += 1;
166	}
167	if i >= bytes.len() || bytes[i] != b'{' {
168		return None;
169	}
170	i += 1;
171	let target = format!("\"{key}\"");
172	let tbytes = target.as_bytes();
173	let mut depth: i32 = 0;
174	let mut in_string = false;
175	let mut escape = false;
176	while i < bytes.len() {
177		let c = bytes[i];
178		if in_string {
179			if escape {
180				escape = false;
181			} else if c == b'\\' {
182				escape = true;
183			} else if c == b'"' {
184				in_string = false;
185			}
186			i += 1;
187			continue;
188		}
189		if c == b'"' {
190			// Check if at depth 0 of the outer object (depth==0 here is
191			// "inside outer object, not yet inside any nested struct").
192			if depth == 0 && i + tbytes.len() <= bytes.len() && &bytes[i..i + tbytes.len()] == tbytes {
193				return Some(i + tbytes.len());
194			}
195			in_string = true;
196		} else if c == b'{' || c == b'[' {
197			depth += 1;
198		} else if c == b'}' || c == b']' {
199			depth -= 1;
200		}
201		i += 1;
202	}
203	None
204}
205
206fn line_at(content: &str, byte_offset: usize) -> u32 {
207	let mut line: u32 = 1;
208	for b in content.as_bytes().iter().take(byte_offset) {
209		if *b == b'\n' {
210			line = line.saturating_add(1);
211		}
212	}
213	line
214}
215
216#[cfg(test)]
217mod tests {
218	use std::fs;
219
220	use super::*;
221
222	fn write_json(dir: &Path, name: &str, body: &str) {
223		fs::write(dir.join(name), body).expect("write json");
224	}
225
226	fn minimal_rule_file_json() -> &'static str {
227		r#"{ "order": 5, "rules": [] }"#
228	}
229
230	#[test]
231	fn scan_rules_dir_reads_multiple_json_files() {
232		let tmp = tempfile::tempdir().expect("tempdir");
233		write_json(tmp.path(), "00-a.json", minimal_rule_file_json());
234		write_json(tmp.path(), "10-b.json", minimal_rule_file_json());
235
236		let files = scan_rules_dir(tmp.path()).expect("scan ok");
237		assert_eq!(files.len(), 2);
238		// Path field is populated from on-disk path.
239		let names: std::collections::HashSet<_> =
240			files.iter().filter_map(|f| f.path.file_name().and_then(|s| s.to_str())).collect();
241		assert!(names.contains("00-a.json"));
242		assert!(names.contains("10-b.json"));
243	}
244
245	#[test]
246	fn scan_rules_dir_skips_non_json_extensions() {
247		let tmp = tempfile::tempdir().expect("tempdir");
248		write_json(tmp.path(), "rule.json", minimal_rule_file_json());
249		fs::write(tmp.path().join("README.md"), "docs").unwrap();
250		fs::write(tmp.path().join(".rule.json.swp"), "vim swap").unwrap();
251
252		let files = scan_rules_dir(tmp.path()).expect("scan ok");
253		assert_eq!(files.len(), 1, "only the .json file is returned");
254	}
255
256	#[test]
257	fn scan_rules_dir_skips_subdirectories() {
258		let tmp = tempfile::tempdir().expect("tempdir");
259		fs::create_dir(tmp.path().join("nested")).unwrap();
260		write_json(&tmp.path().join("nested"), "ignored.json", minimal_rule_file_json());
261		write_json(tmp.path(), "kept.json", minimal_rule_file_json());
262
263		let files = scan_rules_dir(tmp.path()).expect("scan ok");
264		assert_eq!(files.len(), 1);
265	}
266
267	#[test]
268	fn scan_rules_dir_empty_directory_returns_empty_vec() {
269		let tmp = tempfile::tempdir().expect("tempdir");
270		let files = scan_rules_dir(tmp.path()).expect("scan ok");
271		assert!(files.is_empty());
272	}
273
274	#[test]
275	fn scan_rules_dir_missing_directory_errors() {
276		let tmp = tempfile::tempdir().expect("tempdir");
277		let missing = tmp.path().join("does-not-exist");
278		let err = scan_rules_dir(&missing).expect_err("missing dir errors");
279		let msg = err.to_string();
280		assert!(msg.contains("not found"), "{msg}");
281		assert!(msg.contains("does-not-exist"), "error names the path: {msg}");
282	}
283
284	#[test]
285	fn scan_rules_dir_path_pointing_at_file_errors() {
286		let tmp = tempfile::tempdir().expect("tempdir");
287		let file = tmp.path().join("not-a-dir");
288		fs::write(&file, "hi").unwrap();
289		let err = scan_rules_dir(&file).expect_err("file path rejected");
290		assert!(err.to_string().contains("not a directory"), "{err}");
291	}
292
293	#[test]
294	fn scan_rules_dir_invalid_json_errors_with_path() {
295		let tmp = tempfile::tempdir().expect("tempdir");
296		write_json(tmp.path(), "broken.json", "{ this is not json");
297		let err = scan_rules_dir(tmp.path()).expect_err("bad json rejected");
298		let msg = err.to_string();
299		assert!(msg.contains("parse"), "error mentions parse: {msg}");
300		assert!(msg.contains("broken.json"), "error names the offending file: {msg}");
301	}
302
303	#[test]
304	fn scan_rules_dir_threads_rule_source_lines_into_each_entry() {
305		let tmp = tempfile::tempdir().expect("tempdir");
306		// Each rule object starts at a distinct line; the loader must
307		// stamp that line onto the entry's SourceInfo so downstream
308		// `source_prefix` carries `file:line` into diagnostics.
309		let body = "{\n  \"rules\": [\n    { \"name\": \"a\", \"listen\": [\":1\"], \"terminate\": { \"type\": \"http_proxy\" } },\n    { \"name\": \"b\", \"listen\": [\":2\"], \"terminate\": { \"type\": \"http_proxy\" } }\n  ]\n}\n";
310		write_json(tmp.path(), "rules.json", body);
311
312		let files = scan_rules_dir(tmp.path()).expect("scan ok");
313		assert_eq!(files.len(), 1);
314		assert_eq!(files[0].rules.len(), 2);
315		for (entry, expected_line) in files[0].rules.iter().zip([3u32, 4u32]) {
316			match entry {
317				RuleEntry::Raw(rule) => {
318					assert_eq!(rule.source.line, expected_line);
319					assert_eq!(rule.source.file.file_name().and_then(|s| s.to_str()), Some("rules.json"));
320				}
321				RuleEntry::Preset(_) => panic!("expected Raw entry"),
322			}
323		}
324	}
325
326	#[test]
327	fn locate_rule_array_element_lines_handles_nested_args_objects() {
328		let body = r#"{
329  "order": 0,
330  "rules": [
331    { "name": "first", "listen": [":1"], "terminate": { "type": "http_proxy", "args": { "nested": ["x", "y"] } } },
332    {
333      "name": "second",
334      "listen": [":2"],
335      "terminate": { "type": "http_proxy" }
336    }
337  ]
338}
339"#;
340		let lines = locate_rule_array_element_lines(body);
341		assert_eq!(lines, vec![4, 5]);
342	}
343
344	#[test]
345	fn scan_rules_dir_populates_path_field_with_full_path() {
346		let tmp = tempfile::tempdir().expect("tempdir");
347		write_json(tmp.path(), "abs.json", minimal_rule_file_json());
348		let files = scan_rules_dir(tmp.path()).expect("scan ok");
349		assert_eq!(files.len(), 1);
350		assert!(files[0].path.is_absolute() || files[0].path.starts_with(tmp.path()));
351		assert_eq!(files[0].path.file_name().and_then(|s| s.to_str()), Some("abs.json"));
352	}
353}