Skip to main content

surql_parser/
filesystem.rs

1/// Maximum `.surql` file size (10 MB). Files larger than this are rejected to
2/// prevent accidental memory exhaustion.
3pub const MAX_SURQL_FILE_SIZE: u64 = 10 * 1024 * 1024;
4
5/// Read a `.surql` file with a size guard.
6///
7/// Returns an error if the file is larger than [`MAX_SURQL_FILE_SIZE`] or
8/// cannot be read for any other reason.
9pub fn read_surql_file(path: &std::path::Path) -> Result<String, String> {
10	let meta = std::fs::metadata(path).map_err(|e| format!("{}: {e}", path.display()))?;
11	if meta.len() > MAX_SURQL_FILE_SIZE {
12		return Err(format!(
13			"{}: file too large ({} bytes, max {})",
14			path.display(),
15			meta.len(),
16			MAX_SURQL_FILE_SIZE
17		));
18	}
19	std::fs::read_to_string(path).map_err(|e| format!("{}: {e}", path.display()))
20}
21
22/// Recursively collect all `.surql` files from a directory tree.
23///
24/// Skips known large or irrelevant directories: `target`, `node_modules`, `.git`,
25/// `build`, `fixtures`, `dist`, `.cache`, `surql-lsp-out`, and any directory
26/// whose name starts with `.`.
27///
28/// Collected paths are absolute. Callers that need relative paths can use
29/// [`Path::strip_prefix`] on the results.
30pub fn collect_surql_files(dir: &std::path::Path, out: &mut Vec<std::path::PathBuf>) {
31	let mut visited = std::collections::HashSet::new();
32	if let Ok(canonical) = dir.canonicalize() {
33		visited.insert(canonical);
34	}
35	collect_surql_files_recursive(dir, out, 0, &mut visited);
36}
37
38pub(crate) fn collect_surql_files_recursive(
39	dir: &std::path::Path,
40	out: &mut Vec<std::path::PathBuf>,
41	depth: u32,
42	visited: &mut std::collections::HashSet<std::path::PathBuf>,
43) {
44	if depth > 32 {
45		warn!(
46			"Max directory depth (32) exceeded at {}, skipping",
47			dir.display()
48		);
49		return;
50	}
51	let entries = match std::fs::read_dir(dir) {
52		Ok(e) => e,
53		Err(e) => {
54			warn!("Cannot read directory {}: {e}", dir.display());
55			return;
56		}
57	};
58	for entry in entries {
59		let entry = match entry {
60			Ok(e) => e,
61			Err(e) => {
62				warn!("Skipping unreadable entry in {}: {e}", dir.display());
63				continue;
64			}
65		};
66		let path = entry.path();
67		if path
68			.symlink_metadata()
69			.map(|m| m.is_symlink())
70			.unwrap_or(false)
71		{
72			warn!("Skipping symlink: {}", path.display());
73			continue;
74		}
75		if path.is_dir() {
76			let name = path.file_name().and_then(|n| n.to_str()).unwrap_or("");
77			if matches!(
78				name,
79				"target"
80					| "node_modules"
81					| ".git" | "build"
82					| "fixtures" | "dist"
83					| ".cache" | "surql-lsp-out"
84			) || name.starts_with('.')
85			{
86				continue;
87			}
88			if let Ok(canonical) = path.canonicalize()
89				&& !visited.insert(canonical)
90			{
91				warn!(
92					"Skipping already-visited directory (symlink cycle?): {}",
93					path.display()
94				);
95				continue;
96			}
97			collect_surql_files_recursive(&path, out, depth + 1, visited);
98		} else if path.extension().is_some_and(|ext| ext == "surql") {
99			out.push(path);
100		}
101	}
102}