jj_lib/
fsmonitor.rs

1// Copyright 2023 The Jujutsu Authors
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// https://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15//! Filesystem monitor tool interface.
16//!
17//! Interfaces with a filesystem monitor tool to efficiently query for
18//! filesystem updates, without having to crawl the entire working copy. This is
19//! particularly useful for large working copies, or for working copies for
20//! which it's expensive to materialize files, such those backed by a network or
21//! virtualized filesystem.
22
23#![warn(missing_docs)]
24
25use std::path::PathBuf;
26
27use crate::config::ConfigGetError;
28use crate::settings::UserSettings;
29
30/// Config for Watchman filesystem monitor (<https://facebook.github.io/watchman/>).
31#[derive(Eq, PartialEq, Clone, Debug)]
32pub struct WatchmanConfig {
33    /// Whether to use triggers to monitor for changes in the background.
34    pub register_trigger: bool,
35}
36
37/// The recognized kinds of filesystem monitors.
38#[derive(Eq, PartialEq, Clone, Debug)]
39pub enum FsmonitorSettings {
40    /// The Watchman filesystem monitor (<https://facebook.github.io/watchman/>).
41    Watchman(WatchmanConfig),
42
43    /// Only used in tests.
44    Test {
45        /// The set of changed files to pretend that the filesystem monitor is
46        /// reporting.
47        changed_files: Vec<PathBuf>,
48    },
49
50    /// No filesystem monitor. This is the default if nothing is configured, but
51    /// also makes it possible to turn off the monitor on a case-by-case basis
52    /// when the user gives an option like `--config=core.fsmonitor=none`;
53    /// useful when e.g. when doing analysis of snapshot performance.
54    None,
55}
56
57impl FsmonitorSettings {
58    /// Creates an `FsmonitorSettings` from a `config`.
59    pub fn from_settings(settings: &UserSettings) -> Result<FsmonitorSettings, ConfigGetError> {
60        let name = "core.fsmonitor";
61        match settings.get_string(name)?.as_ref() {
62            "watchman" => Ok(Self::Watchman(WatchmanConfig {
63                // TODO: rename to "register-snapshot-trigger" for consistency?
64                register_trigger: settings.get_bool("core.watchman.register_snapshot_trigger")?,
65            })),
66            "test" => Err(ConfigGetError::Type {
67                name: name.to_owned(),
68                error: "Cannot use test fsmonitor in real repository".into(),
69                source_path: None,
70            }),
71            "none" => Ok(Self::None),
72            other => Err(ConfigGetError::Type {
73                name: name.to_owned(),
74                error: format!("Unknown fsmonitor kind: {other}").into(),
75                source_path: None,
76            }),
77        }
78    }
79}
80
81/// Filesystem monitor integration using Watchman
82/// (<https://facebook.github.io/watchman/>). Requires `watchman` to already be
83/// installed on the system.
84#[cfg(feature = "watchman")]
85pub mod watchman {
86    use std::path::Path;
87    use std::path::PathBuf;
88
89    use itertools::Itertools;
90    use thiserror::Error;
91    use tracing::info;
92    use tracing::instrument;
93    use watchman_client::expr;
94    use watchman_client::prelude::Clock as InnerClock;
95    use watchman_client::prelude::ClockSpec;
96    use watchman_client::prelude::NameOnly;
97    use watchman_client::prelude::QueryRequestCommon;
98    use watchman_client::prelude::QueryResult;
99    use watchman_client::prelude::TriggerRequest;
100
101    /// Represents an instance in time from the perspective of the filesystem
102    /// monitor.
103    ///
104    /// This can be used to perform incremental queries. When making a query,
105    /// the result will include an associated "clock" representing the time
106    /// that the query was made.  By passing the same clock into a future
107    /// query, we inform the filesystem monitor that we only wish to get
108    /// changed files since the previous point in time.
109    #[derive(Clone, Debug)]
110    pub struct Clock(InnerClock);
111
112    impl From<crate::protos::working_copy::WatchmanClock> for Clock {
113        fn from(clock: crate::protos::working_copy::WatchmanClock) -> Self {
114            use crate::protos::working_copy::watchman_clock::WatchmanClock;
115            let watchman_clock = clock.watchman_clock.unwrap();
116            let clock = match watchman_clock {
117                WatchmanClock::StringClock(string_clock) => {
118                    InnerClock::Spec(ClockSpec::StringClock(string_clock))
119                }
120                WatchmanClock::UnixTimestamp(unix_timestamp) => {
121                    InnerClock::Spec(ClockSpec::UnixTimestamp(unix_timestamp))
122                }
123            };
124            Self(clock)
125        }
126    }
127
128    impl From<Clock> for crate::protos::working_copy::WatchmanClock {
129        fn from(clock: Clock) -> Self {
130            use crate::protos::working_copy::watchman_clock;
131            use crate::protos::working_copy::WatchmanClock;
132            let Clock(clock) = clock;
133            let watchman_clock = match clock {
134                InnerClock::Spec(ClockSpec::StringClock(string_clock)) => {
135                    watchman_clock::WatchmanClock::StringClock(string_clock)
136                }
137                InnerClock::Spec(ClockSpec::UnixTimestamp(unix_timestamp)) => {
138                    watchman_clock::WatchmanClock::UnixTimestamp(unix_timestamp)
139                }
140                InnerClock::ScmAware(_) => {
141                    unimplemented!("SCM-aware Watchman clocks not supported")
142                }
143            };
144            WatchmanClock {
145                watchman_clock: Some(watchman_clock),
146            }
147        }
148    }
149
150    #[allow(missing_docs)]
151    #[derive(Debug, Error)]
152    pub enum Error {
153        #[error("Could not connect to Watchman")]
154        WatchmanConnectError(#[source] watchman_client::Error),
155
156        #[error("Could not canonicalize working copy root path")]
157        CanonicalizeRootError(#[source] std::io::Error),
158
159        #[error("Watchman failed to resolve the working copy root path")]
160        ResolveRootError(#[source] watchman_client::Error),
161
162        #[error("Failed to query Watchman")]
163        WatchmanQueryError(#[source] watchman_client::Error),
164
165        #[error("Failed to register Watchman trigger")]
166        WatchmanTriggerError(#[source] watchman_client::Error),
167    }
168
169    /// Handle to the underlying Watchman instance.
170    pub struct Fsmonitor {
171        client: watchman_client::Client,
172        resolved_root: watchman_client::ResolvedRoot,
173    }
174
175    impl Fsmonitor {
176        /// Initialize the Watchman filesystem monitor. If it's not already
177        /// running, this will start it and have it crawl the working
178        /// copy to build up its in-memory representation of the
179        /// filesystem, which may take some time.
180        #[instrument]
181        pub async fn init(
182            working_copy_path: &Path,
183            config: &super::WatchmanConfig,
184        ) -> Result<Self, Error> {
185            info!("Initializing Watchman filesystem monitor...");
186            let connector = watchman_client::Connector::new();
187            let client = connector
188                .connect()
189                .await
190                .map_err(Error::WatchmanConnectError)?;
191            let working_copy_root = watchman_client::CanonicalPath::canonicalize(working_copy_path)
192                .map_err(Error::CanonicalizeRootError)?;
193            let resolved_root = client
194                .resolve_root(working_copy_root)
195                .await
196                .map_err(Error::ResolveRootError)?;
197
198            let monitor = Fsmonitor {
199                client,
200                resolved_root,
201            };
202
203            // Registering the trigger causes an unconditional evaluation of the query, so
204            // test if it is already registered first.
205            if !config.register_trigger {
206                monitor.unregister_trigger().await?;
207            } else if !monitor.is_trigger_registered().await? {
208                monitor.register_trigger().await?;
209            }
210            Ok(monitor)
211        }
212
213        /// Query for changed files since the previous point in time.
214        ///
215        /// The returned list of paths is relative to the `working_copy_path`.
216        /// If it is `None`, then the caller must crawl the entire working copy
217        /// themselves.
218        #[instrument(skip(self))]
219        pub async fn query_changed_files(
220            &self,
221            previous_clock: Option<Clock>,
222        ) -> Result<(Clock, Option<Vec<PathBuf>>), Error> {
223            // TODO: might be better to specify query options by caller, but we
224            // shouldn't expose the underlying watchman API too much.
225            info!("Querying Watchman for changed files...");
226            let QueryResult {
227                version: _,
228                is_fresh_instance,
229                files,
230                clock,
231                state_enter: _,
232                state_leave: _,
233                state_metadata: _,
234                saved_state_info: _,
235                debug: _,
236            }: QueryResult<NameOnly> = self
237                .client
238                .query(
239                    &self.resolved_root,
240                    QueryRequestCommon {
241                        since: previous_clock.map(|Clock(clock)| clock),
242                        expression: Some(self.build_exclude_expr()),
243                        ..Default::default()
244                    },
245                )
246                .await
247                .map_err(Error::WatchmanQueryError)?;
248
249            let clock = Clock(clock);
250            if is_fresh_instance {
251                // The Watchman documentation states that if it was a fresh
252                // instance, we need to delete any tree entries that didn't appear
253                // in the returned list of changed files. For now, the caller will
254                // handle this by manually crawling the working copy again.
255                Ok((clock, None))
256            } else {
257                let paths = files
258                    .unwrap_or_default()
259                    .into_iter()
260                    .map(|NameOnly { name }| name.into_inner())
261                    .collect_vec();
262                Ok((clock, Some(paths)))
263            }
264        }
265
266        /// Return whether or not a trigger has been registered already.
267        #[instrument(skip(self))]
268        pub async fn is_trigger_registered(&self) -> Result<bool, Error> {
269            info!("Checking for an existing Watchman trigger...");
270            Ok(self
271                .client
272                .list_triggers(&self.resolved_root)
273                .await
274                .map_err(Error::WatchmanTriggerError)?
275                .triggers
276                .iter()
277                .any(|t| t.name == "jj-background-monitor"))
278        }
279
280        /// Register trigger for changed files.
281        #[instrument(skip(self))]
282        async fn register_trigger(&self) -> Result<(), Error> {
283            info!("Registering Watchman trigger...");
284            self.client
285                .register_trigger(
286                    &self.resolved_root,
287                    TriggerRequest {
288                        name: "jj-background-monitor".to_string(),
289                        command: vec![
290                            "jj".to_string(),
291                            "debug".to_string(),
292                            "snapshot".to_string(),
293                        ],
294                        expression: Some(self.build_exclude_expr()),
295                        ..Default::default()
296                    },
297                )
298                .await
299                .map_err(Error::WatchmanTriggerError)?;
300            Ok(())
301        }
302
303        /// Register trigger for changed files.
304        #[instrument(skip(self))]
305        async fn unregister_trigger(&self) -> Result<(), Error> {
306            info!("Unregistering Watchman trigger...");
307            self.client
308                .remove_trigger(&self.resolved_root, "jj-background-monitor")
309                .await
310                .map_err(Error::WatchmanTriggerError)?;
311            Ok(())
312        }
313
314        /// Build an exclude expr for `working_copy_path`.
315        fn build_exclude_expr(&self) -> expr::Expr {
316            // TODO: consider parsing `.gitignore`.
317            let exclude_dirs = [Path::new(".git"), Path::new(".jj")];
318            let excludes = itertools::chain(
319                // the directories themselves
320                [expr::Expr::Name(expr::NameTerm {
321                    paths: exclude_dirs.iter().map(|&name| name.to_owned()).collect(),
322                    wholename: true,
323                })],
324                // and all files under the directories
325                exclude_dirs.iter().map(|&name| {
326                    expr::Expr::DirName(expr::DirNameTerm {
327                        path: name.to_owned(),
328                        depth: None,
329                    })
330                }),
331            )
332            .collect();
333            expr::Expr::Not(Box::new(expr::Expr::Any(excludes)))
334        }
335    }
336}