debian_workspace/workspace.rs
1//! `Workspace`: an abstraction over the on-disk or in-editor state of a
2//! Debian source package.
3//!
4//! Fixers historically reached into the working tree directly via
5//! `std::fs`. That ties them to a particular host (the lintian-brush CLI,
6//! which writes the tree to disk before invoking fixers). The
7//! `Workspace` trait abstracts that access so the same fixer code can
8//! also run inside an editor host (debian-lsp), where the source of truth for
9//! a file is the open buffer rather than the path on disk.
10//!
11//! Two implementations are intended:
12//!
13//! * [`FsWorkspace`] — pure-`std` shim that operates on a base
14//! directory on disk. Used by the lintian-brush CLI; preserves the
15//! existing semantics where the harness writes the tree to disk, the
16//! fixer mutates files there, and the harness diffs the result.
17//! * `LspWorkspace` (lives in debian-lsp) — wraps a salsa-backed
18//! in-memory workspace. Mutations are accumulated as a single
19//! `WorkspaceEdit` rather than being written back to disk.
20//!
21//! The trait is deliberately `breezyshim`-free so that hosts that don't want
22//! a Python runtime (notably debian-lsp) can depend on it without pulling in
23//! PyO3.
24
25use std::path::{Path, PathBuf};
26
27use debian_changelog::ChangeLog;
28use debian_control::lossless::Control;
29use debian_copyright::lossless::Copyright;
30use debian_watch::parse::ParsedWatchFile;
31use dep3::lossless::PatchHeader;
32use makefile_lossless::Makefile;
33use patchkit::edit::Patch;
34use patchkit::quilt::Series;
35use toml_edit::DocumentMut;
36
37use crate::{Error, Version};
38
39/// An editor handle for a single file in a [`Workspace`].
40///
41/// The parsed value is reachable via `Deref`/`DerefMut`; mutate it as you
42/// would the bare type. Changes are persisted by calling
43/// [`commit`](Self::commit). Dropping an editor without committing discards
44/// the changes (and emits a warning) — explicit commit is required so that
45/// serialisation failures can be reported.
46///
47/// `T` is the parsed representation (e.g.
48/// [`debian_control::lossless::Control`]).
49pub trait Editor<T>: std::ops::Deref<Target = T> + std::ops::DerefMut<Target = T> {
50 /// Persist any modifications to the underlying workspace.
51 ///
52 /// For a tree-backed workspace this writes the file back to disk; for an
53 /// editor-backed workspace it records a `TextEdit` against the buffer.
54 /// Calling `commit` more than once is a no-op.
55 fn commit(self: Box<Self>) -> Result<(), Error>;
56}
57
58/// Access to a Debian source package, as seen by a fixer.
59///
60/// Each typed accessor returns an editor for a well-known file. Callers can
61/// also reach less-common files via [`read_file`](Self::read_file) /
62/// [`write_file`](Self::write_file).
63pub trait Workspace {
64 /// The source package name, as read from `debian/changelog`.
65 ///
66 /// Returns `None` when the changelog is missing or unreadable. Hosts
67 /// that legitimately don't have a changelog (e.g. an LSP that lost
68 /// access to it) should return `None` rather than fabricating a name.
69 fn package(&self) -> Option<&str>;
70
71 /// The current version of the package, as read from `debian/changelog`.
72 ///
73 /// Returns `None` when the changelog is missing or unreadable.
74 fn current_version(&self) -> Option<&Version>;
75
76 /// Read `debian/control` and return a parsed value.
77 ///
78 /// Returns `Err(Error::NotFound)` if the file is missing —
79 /// detectors typically want that exact response.
80 ///
81 /// Parsing is relaxed: syntax errors are tolerated and the resulting
82 /// AST may have missing or partially-recovered nodes. Detectors that
83 /// need to reject malformed input should validate the structure they
84 /// care about (e.g. that the source paragraph or a particular field
85 /// exists) rather than expecting `Err`.
86 ///
87 /// Implementations may cache the parse; the returned value is owned
88 /// (`Control` is cheap to clone — its rowan green nodes are shared
89 /// internally).
90 fn parsed_control(&self) -> Result<Control, Error>;
91
92 /// Read `debian/changelog` and return a parsed value.
93 ///
94 /// Returns `Err(Error::NotFound)` if the file is missing. Parsing is
95 /// relaxed; see [`parsed_control`](Self::parsed_control) for details
96 /// on what that means.
97 fn parsed_changelog(&self) -> Result<ChangeLog, Error>;
98
99 /// Read `debian/copyright` and return a parsed value.
100 ///
101 /// Returns `Err(Error::NotFound)` if the file is missing, and
102 /// `Err(Error::Parse)` only when the file isn't a machine-readable
103 /// DEP-5 document at all (i.e. doesn't start with `Format:`).
104 /// Parsing is otherwise relaxed; see
105 /// [`parsed_control`](Self::parsed_control) for details on what that
106 /// means.
107 fn parsed_copyright(&self) -> Result<Copyright, Error>;
108
109 /// Read `debian/upstream/metadata` and return its parsed YAML.
110 ///
111 /// Returns `Err(Error::NotFound)` if the file is missing or
112 /// unparseable.
113 fn parsed_upstream_metadata(&self) -> Result<yaml_edit::YamlFile, Error>;
114
115 /// Read `debian/watch` and return a parsed value.
116 ///
117 /// Returns `Err(Error::NotFound)` if the file is missing.
118 fn parsed_watch(&self) -> Result<ParsedWatchFile, Error>;
119
120 /// Read `debian/rules` and return the parsed Makefile.
121 ///
122 /// Returns `Err(Error::NotFound)` if the file is missing. Uses
123 /// `Makefile::read_relaxed`, mirroring the behaviour every fixer
124 /// currently expects from `debian/rules` parsing.
125 fn parsed_rules(&self) -> Result<Makefile, Error>;
126
127 /// Read and parse `debian/patches/series`, the quilt patch series.
128 ///
129 /// Returns `Ok(None)` if the file does not exist (the package ships
130 /// no quilt patches). Returns `Err` only if the file exists but
131 /// cannot be read as a series.
132 fn parsed_patches_series(&self) -> Result<Option<Series>, Error> {
133 let rel = Path::new("debian/patches/series");
134 match self.read_file(rel)? {
135 None => Ok(None),
136 Some(bytes) => {
137 let series = Series::read(&bytes[..]).map_err(Error::Io)?;
138 Ok(Some(series))
139 }
140 }
141 }
142
143 /// Read a quilt patch file and return its parsed DEP-3 header
144 /// together with the parsed diff.
145 ///
146 /// `rel` is the patch's path relative to the package root (e.g.
147 /// `debian/patches/fix-foo.patch`), as obtained by joining
148 /// `debian/patches` with a name from [`parsed_patches_series`].
149 ///
150 /// Returns `Ok(None)` when the file does not exist. On success the
151 /// tuple's first element is the patch's DEP-3 header, or `None` when
152 /// the patch carries no header (a bare diff) or its header does not
153 /// parse — the header is optional metadata. The second element is
154 /// the lossless parse of the diff body; that parser is
155 /// error-recovering, so a [`Patch`] is produced even for a malformed
156 /// diff.
157 ///
158 /// Returns `Err(Error::Parse)` if the file exists but is not valid
159 /// UTF-8.
160 ///
161 /// [`parsed_patches_series`]: Self::parsed_patches_series
162 fn parsed_patch(&self, rel: &Path) -> Result<Option<(Option<PatchHeader>, Patch)>, Error> {
163 let Some(bytes) = self.read_file(rel)? else {
164 return Ok(None);
165 };
166 let content = std::str::from_utf8(&bytes)
167 .map_err(|e| Error::Parse(format!("{} is not valid UTF-8: {}", rel.display(), e)))?;
168 let header_end = dep3::lossless::header_end(content);
169 let header_text = &content[..header_end];
170 let header = if header_text.trim().is_empty() {
171 None
172 } else {
173 header_text.parse::<PatchHeader>().ok()
174 };
175 let patch = patchkit::edit::parse(&content[header_end..]).tree();
176 Ok(Some((header, patch)))
177 }
178
179 /// Read the trimmed contents of `debian/source/format`.
180 ///
181 /// Returns `Ok(None)` if the file is missing. The default format
182 /// (`1.0`) is *not* substituted — callers see exactly what is on
183 /// disk so they can distinguish "no file" from "explicit 1.0".
184 fn source_format(&self) -> Result<Option<String>, Error>;
185
186 /// Open `debian/control` for editing.
187 ///
188 /// Takes `&self` so that fixers can hold an editor and still call
189 /// other workspace methods (`read_file`, …). Implementations
190 /// that need to record edits on the workspace itself should use interior
191 /// mutability.
192 ///
193 /// Detectors don't need this — they emit `Action`s for the appliers to
194 /// run. Use [`parsed_control`](Self::parsed_control) instead.
195 fn control(&self) -> Result<Box<dyn Editor<Control> + '_>, Error>;
196
197 /// Open `debian/changelog` for editing. See [`control`](Self::control).
198 fn changelog(&self) -> Result<Box<dyn Editor<ChangeLog> + '_>, Error>;
199
200 /// Read `debian/debcargo.toml` and return a parsed TOML document.
201 ///
202 /// Returns `Ok(None)` if the file does not exist (package is not a
203 /// debcargo-managed crate). Returns `Err` if the file exists but cannot
204 /// be parsed.
205 fn parsed_debcargo(&self) -> Result<Option<DocumentMut>, Error> {
206 let rel = Path::new("debian/debcargo.toml");
207 match self.read_file(rel)? {
208 None => Ok(None),
209 Some(bytes) => {
210 let text = String::from_utf8(bytes.into_owned()).map_err(|e| {
211 Error::Parse(format!("debcargo.toml is not valid UTF-8: {}", e))
212 })?;
213 let doc: DocumentMut = text
214 .parse()
215 .map_err(|e| Error::Parse(format!("Failed to parse debcargo.toml: {}", e)))?;
216 Ok(Some(doc))
217 }
218 }
219 }
220
221 /// Open `debian/debcargo.toml` for editing.
222 ///
223 /// Returns `Ok(None)` if the file does not exist.
224 /// Returns `Err` if the file exists but cannot be parsed.
225 fn debcargo(&self) -> Result<Option<Box<dyn Editor<DocumentMut> + '_>>, Error>;
226
227 /// Read raw bytes of an arbitrary file relative to the package root.
228 ///
229 /// Returns `Ok(None)` if the file does not exist.
230 ///
231 /// The returned `Cow` is borrowed when the host has the bytes
232 /// already in memory (an LSP host with the file open in an editor
233 /// buffer) and owned when they had to be fetched (a disk read).
234 /// Detectors that need owned bytes can call `.into_owned()`.
235 fn read_file(&self, rel: &Path) -> Result<Option<std::borrow::Cow<'_, [u8]>>, Error>;
236
237 /// Write raw bytes to an arbitrary file relative to the package root.
238 ///
239 /// Creates the file if it does not exist.
240 fn write_file(&self, rel: &Path, content: &[u8]) -> Result<(), Error>;
241
242 /// List the entries of a directory relative to the package root.
243 ///
244 /// Returns the file (and subdirectory) names within `rel`, without any
245 /// path prefix. Returns `Ok(None)` if the directory does not exist.
246 ///
247 /// The order of returned entries is unspecified — a non-`Tree` host
248 /// (an LSP) may not have a meaningful directory ordering.
249 fn list_dir(&self, rel: &Path) -> Result<Option<Vec<String>>, Error>;
250
251 /// Recursively walk `rel`, returning the relative paths of every
252 /// regular file beneath it (paths are relative to the package root,
253 /// not to `rel`).
254 ///
255 /// Symbolic links and other non-regular entries are skipped. Returns
256 /// `Ok(None)` if `rel` does not exist.
257 ///
258 /// The order of returned paths is unspecified. Hosts that can't
259 /// meaningfully walk a tree (e.g. an LSP that only knows about open
260 /// buffers) may return only the files they currently track.
261 fn walk_dir(&self, rel: &Path) -> Result<Option<Vec<PathBuf>>, Error> {
262 // Default impl: depth-first walk via list_dir + read_file.
263 // Hosts that have a faster path can override.
264 let Some(top_entries) = self.list_dir(rel)? else {
265 return Ok(None);
266 };
267 let mut out = Vec::new();
268 let mut stack: Vec<(PathBuf, Vec<String>)> = vec![(rel.to_path_buf(), top_entries)];
269 while let Some((dir, entries)) = stack.pop() {
270 for name in entries {
271 let child = dir.join(&name);
272 match self.list_dir(&child)? {
273 Some(sub) => stack.push((child, sub)),
274 None => out.push(child),
275 }
276 }
277 }
278 Ok(Some(out))
279 }
280
281 /// Read the Unix file mode of `rel`, or `None` if the file is missing.
282 ///
283 /// Hosts that don't track a meaningful mode (e.g. an LSP serving an
284 /// in-memory buffer) may return `Ok(None)` even when the file exists.
285 /// Detectors that key off mode (e.g. checking that `debian/rules` is
286 /// executable) treat that the same as "not present" and skip.
287 fn file_mode(&self, rel: &Path) -> Result<Option<u32>, Error>;
288
289 /// On-disk root for hosts that have one.
290 ///
291 /// Returns `Some` for the lintian-brush CLI ([`FsWorkspace`])
292 /// where the package has been materialised to disk. Returns `None`
293 /// for in-memory hosts (an LSP serving open buffers); detectors that
294 /// genuinely need to walk the source tree (e.g. an upstream-metadata
295 /// guesser, a license scanner) should treat `None` as "skip — we
296 /// can't help here".
297 ///
298 /// Prefer the typed accessors ([`read_file`](Self::read_file),
299 /// [`list_dir`](Self::list_dir), …) wherever possible. Reach for
300 /// this only when an external library insists on a `&Path` for the
301 /// whole tree.
302 fn base_path(&self) -> Option<&Path> {
303 None
304 }
305}
306
307/// Read the debhelper compat level from a workspace.
308///
309/// Looks at `debian/compat` first, then falls back to the `X-DH-Compat`
310/// field or a `debhelper-compat` build dependency in `debian/control`.
311/// Returns `Ok(None)` when neither source is present or parseable.
312pub fn compat_level(ws: &dyn Workspace) -> Result<Option<u8>, Error> {
313 if let Some(bytes) = ws.read_file(Path::new("debian/compat"))? {
314 if let Ok(text) = std::str::from_utf8(&bytes) {
315 let trimmed = text
316 .split_once('#')
317 .map_or(text, |(before, _)| before)
318 .trim();
319 if let Ok(level) = trimmed.parse::<u8>() {
320 return Ok(Some(level));
321 }
322 }
323 }
324
325 let control = match ws.parsed_control() {
326 Ok(c) => c,
327 Err(Error::NotFound) => return Ok(None),
328 Err(e) => return Err(e),
329 };
330 let Some(source) = control.source() else {
331 return Ok(None);
332 };
333
334 if let Some(dh_compat) = source.as_deb822().get("X-DH-Compat") {
335 let trimmed = dh_compat
336 .split_once('#')
337 .map_or(dh_compat.as_str(), |(before, _)| before)
338 .trim();
339 if let Ok(level) = trimmed.parse::<u8>() {
340 return Ok(Some(level));
341 }
342 }
343
344 let Some(build_depends) = source.build_depends() else {
345 return Ok(None);
346 };
347 let Some(rel) = build_depends
348 .entries()
349 .flat_map(|entry| entry.relations().collect::<Vec<_>>())
350 .find(|r| r.try_name().as_deref() == Some("debhelper-compat"))
351 else {
352 return Ok(None);
353 };
354 Ok(rel
355 .version()
356 .and_then(|(_op, v)| v.to_string().parse::<u8>().ok()))
357}