Skip to main content

journal_core/file/
lock.rs

1use std::fs::{self, File, OpenOptions};
2use std::io::{self, Write};
3use std::path::{Path, PathBuf};
4use std::time::{Duration, SystemTime};
5
6const LOCK_VERSION: &str = "systemd-journal-sdk-lock-v1";
7const STALE_GRACE: Duration = Duration::from_secs(2);
8
9#[derive(Debug, Clone, PartialEq, Eq)]
10struct LockOwner {
11    pid: u32,
12    boot_id: String,
13    start_time: String,
14}
15
16/// Optional cooperating-writer lock helper.
17///
18/// The journal file format does not define a lock protocol. Callers that want
19/// SDK-to-SDK writer exclusion can acquire this helper before opening a writer
20/// and release it after closing that writer.
21#[derive(Debug)]
22pub struct WriterLock {
23    path: Option<PathBuf>,
24}
25
26impl WriterLock {
27    pub fn acquire(journal_path: &str) -> io::Result<Self> {
28        let lock_path = PathBuf::from(format!("{journal_path}.lock"));
29        let owner = current_owner()?;
30
31        loop {
32            if let Some(parent) = lock_path.parent().filter(|p| !p.as_os_str().is_empty()) {
33                fs::create_dir_all(parent)?;
34            }
35            match OpenOptions::new()
36                .write(true)
37                .create_new(true)
38                .open(&lock_path)
39            {
40                Ok(mut file) => {
41                    write_owner(&mut file, &owner)?;
42                    file.sync_all()?;
43                    return Ok(Self {
44                        path: Some(lock_path),
45                    });
46                }
47                Err(err) if err.kind() == io::ErrorKind::AlreadyExists => {
48                    let (stale, holder) = lock_file_is_stale(&lock_path);
49                    if !stale {
50                        return Err(io::Error::new(
51                            io::ErrorKind::WouldBlock,
52                            format!("journal writer lock held by {holder}"),
53                        ));
54                    }
55                    match fs::remove_file(&lock_path) {
56                        Ok(()) => {}
57                        Err(err) if err.kind() == io::ErrorKind::NotFound => {}
58                        Err(err) => return Err(err),
59                    }
60                }
61                Err(err) => return Err(err),
62            }
63        }
64    }
65
66    pub fn release(&mut self) -> io::Result<()> {
67        let Some(path) = self.path.take() else {
68            return Ok(());
69        };
70        let current = current_owner()?;
71        match read_owner(&path) {
72            Ok(owner) if owner == current => match fs::remove_file(&path) {
73                Ok(()) => Ok(()),
74                Err(err) if err.kind() == io::ErrorKind::NotFound => Ok(()),
75                Err(err) => Err(err),
76            },
77            Ok(_) => Ok(()),
78            Err(err) if err.kind() == io::ErrorKind::NotFound => Ok(()),
79            Err(err) => Err(err),
80        }
81    }
82}
83
84impl Drop for WriterLock {
85    fn drop(&mut self) {
86        let _ = self.release();
87    }
88}
89
90fn write_owner(file: &mut File, owner: &LockOwner) -> io::Result<()> {
91    write!(
92        file,
93        "{LOCK_VERSION}\npid={}\nboot_id={}\nstart_time={}\n",
94        owner.pid, owner.boot_id, owner.start_time
95    )
96}
97
98fn lock_file_is_stale(path: &Path) -> (bool, String) {
99    let owner = match read_owner(path) {
100        Ok(owner) => owner,
101        Err(_) => {
102            if let Ok(metadata) = fs::metadata(path)
103                && let Ok(modified) = metadata.modified()
104                && SystemTime::now()
105                    .duration_since(modified)
106                    .unwrap_or_default()
107                    <= STALE_GRACE
108            {
109                return (false, "partially-created lock".to_string());
110            }
111            return (true, "malformed stale lock".to_string());
112        }
113    };
114
115    if owner.boot_id != boot_id() {
116        return (true, format!("pid {} from previous boot", owner.pid));
117    }
118    match owner_process_is_alive(&owner) {
119        Ok(true) => (false, format!("pid {}", owner.pid)),
120        Ok(false) => (true, format!("stale pid {}", owner.pid)),
121        Err(_) => (false, format!("pid {} with unknown liveness", owner.pid)),
122    }
123}
124
125fn current_owner() -> io::Result<LockOwner> {
126    let pid = std::process::id();
127    Ok(LockOwner {
128        pid,
129        boot_id: boot_id(),
130        start_time: process_start_time(pid)?,
131    })
132}
133
134fn boot_id() -> String {
135    platform_boot_id()
136}
137
138#[cfg(target_os = "linux")]
139fn platform_boot_id() -> String {
140    fs::read_to_string("/proc/sys/kernel/random/boot_id")
141        .map(|s| s.trim().to_string())
142        .unwrap_or_default()
143}
144
145#[cfg(any(target_os = "macos", target_os = "freebsd"))]
146fn platform_boot_id() -> String {
147    let name = b"kern.boottime\0";
148    // SAFETY: `timeval` is a plain C value that is fully initialized by
149    // `sysctlbyname` before it is read below.
150    // nosemgrep: rust.lang.security.unsafe-usage.unsafe-usage
151    let mut boottime: libc::timeval = unsafe { std::mem::zeroed() };
152    let mut len = std::mem::size_of::<libc::timeval>();
153    // SAFETY: The name is NUL-terminated, output buffer points to `boottime`,
154    // and `len` is initialized to the buffer size expected by sysctl.
155    // nosemgrep: rust.lang.security.unsafe-usage.unsafe-usage
156    let rc = unsafe {
157        libc::sysctlbyname(
158            name.as_ptr() as *const libc::c_char,
159            &mut boottime as *mut _ as *mut libc::c_void,
160            &mut len,
161            std::ptr::null_mut(),
162            0,
163        )
164    };
165    if rc != 0 || len < std::mem::size_of::<libc::timeval>() {
166        return String::new();
167    }
168    format!("{}:{}", boottime.tv_sec, boottime.tv_usec)
169}
170
171#[cfg(not(any(target_os = "linux", target_os = "macos", target_os = "freebsd")))]
172fn platform_boot_id() -> String {
173    String::new()
174}
175
176fn process_start_time(pid: u32) -> io::Result<String> {
177    platform_process_start_time(pid)
178}
179
180fn owner_process_is_alive(owner: &LockOwner) -> io::Result<bool> {
181    platform_owner_process_is_alive(owner)
182}
183
184#[cfg(target_os = "linux")]
185fn platform_process_start_time(pid: u32) -> io::Result<String> {
186    let stat = fs::read_to_string(format!("/proc/{pid}/stat"))?;
187    let end = stat
188        .rfind(')')
189        .ok_or_else(|| io::Error::new(io::ErrorKind::InvalidData, "cannot parse proc stat"))?;
190    let fields: Vec<&str> = stat[end + 2..].split_whitespace().collect();
191    if fields.len() < 20 {
192        return Err(io::Error::new(
193            io::ErrorKind::InvalidData,
194            "cannot parse process start time",
195        ));
196    }
197    Ok(fields[19].to_string())
198}
199
200#[cfg(target_os = "linux")]
201fn platform_owner_process_is_alive(owner: &LockOwner) -> io::Result<bool> {
202    match platform_process_start_time(owner.pid) {
203        Ok(start_time) => Ok(start_time == owner.start_time),
204        Err(err) if err.kind() == io::ErrorKind::NotFound => Ok(false),
205        Err(_) => Ok(false),
206    }
207}
208
209#[cfg(all(unix, not(target_os = "linux")))]
210fn platform_process_start_time(_pid: u32) -> io::Result<String> {
211    Ok("process-start-unavailable".to_string())
212}
213
214#[cfg(all(unix, not(target_os = "linux")))]
215fn platform_owner_process_is_alive(owner: &LockOwner) -> io::Result<bool> {
216    let pid = match libc::pid_t::try_from(owner.pid) {
217        Ok(pid) if pid > 0 => pid,
218        _ => return Ok(false),
219    };
220    // SAFETY: `kill(pid, 0)` does not signal the process; it only asks the OS
221    // whether this positive PID exists or is inaccessible.
222    // nosemgrep: rust.lang.security.unsafe-usage.unsafe-usage
223    let rc = unsafe { libc::kill(pid, 0) };
224    if rc == 0 {
225        return Ok(true);
226    }
227
228    let err = io::Error::last_os_error();
229    match err.raw_os_error() {
230        Some(code) if code == libc::ESRCH || code == libc::EINVAL => Ok(false),
231        Some(code) if code == libc::EPERM => Ok(true),
232        _ => Err(err),
233    }
234}
235
236#[cfg(windows)]
237fn platform_process_start_time(pid: u32) -> io::Result<String> {
238    use windows_sys::Win32::Foundation::{
239        CloseHandle, ERROR_ACCESS_DENIED, ERROR_INVALID_PARAMETER, FILETIME,
240    };
241    use windows_sys::Win32::System::Threading::{
242        GetProcessTimes, OpenProcess, PROCESS_QUERY_LIMITED_INFORMATION,
243    };
244
245    // SAFETY: FFI call receives a value PID and returns either a null handle or
246    // an owned process handle closed below.
247    // nosemgrep: rust.lang.security.unsafe-usage.unsafe-usage
248    let handle = unsafe { OpenProcess(PROCESS_QUERY_LIMITED_INFORMATION, 0, pid) };
249    if handle.is_null() {
250        let err = io::Error::last_os_error();
251        return match err.raw_os_error() {
252            Some(code) if code == ERROR_INVALID_PARAMETER as i32 => {
253                Err(io::Error::new(io::ErrorKind::NotFound, err))
254            }
255            Some(code) if code == ERROR_ACCESS_DENIED as i32 => {
256                Err(io::Error::new(io::ErrorKind::PermissionDenied, err))
257            }
258            _ => Err(err),
259        };
260    }
261
262    let mut creation = FILETIME {
263        dwLowDateTime: 0,
264        dwHighDateTime: 0,
265    };
266    let mut exit = FILETIME {
267        dwLowDateTime: 0,
268        dwHighDateTime: 0,
269    };
270    let mut kernel = FILETIME {
271        dwLowDateTime: 0,
272        dwHighDateTime: 0,
273    };
274    let mut user = FILETIME {
275        dwLowDateTime: 0,
276        dwHighDateTime: 0,
277    };
278
279    // SAFETY: `handle` is a valid process handle and all FILETIME pointers
280    // point to initialized stack storage for Windows to fill.
281    // nosemgrep: rust.lang.security.unsafe-usage.unsafe-usage
282    let ok = unsafe { GetProcessTimes(handle, &mut creation, &mut exit, &mut kernel, &mut user) };
283    // SAFETY: `handle` is the owned handle returned by OpenProcess above and is
284    // closed exactly once on this path.
285    // nosemgrep: rust.lang.security.unsafe-usage.unsafe-usage
286    unsafe {
287        CloseHandle(handle);
288    }
289    if ok == 0 {
290        return Err(io::Error::last_os_error());
291    }
292
293    let creation_ticks = ((creation.dwHighDateTime as u64) << 32) | creation.dwLowDateTime as u64;
294    Ok(creation_ticks.to_string())
295}
296
297#[cfg(windows)]
298fn platform_owner_process_is_alive(owner: &LockOwner) -> io::Result<bool> {
299    use windows_sys::Win32::Foundation::{
300        CloseHandle, ERROR_ACCESS_DENIED, ERROR_INVALID_PARAMETER, WAIT_FAILED, WAIT_OBJECT_0,
301        WAIT_TIMEOUT,
302    };
303    use windows_sys::Win32::System::Threading::{
304        OpenProcess, PROCESS_SYNCHRONIZE, WaitForSingleObject,
305    };
306
307    match platform_process_start_time(owner.pid) {
308        Ok(start_time) if start_time != owner.start_time => return Ok(false),
309        Ok(_) => {}
310        Err(err) if err.kind() == io::ErrorKind::NotFound => return Ok(false),
311        Err(err) if err.kind() == io::ErrorKind::PermissionDenied => return Ok(true),
312        Err(err) => return Err(err),
313    }
314
315    // SAFETY: FFI call receives a value PID and returns either a null handle or
316    // an owned process handle closed below.
317    // nosemgrep: rust.lang.security.unsafe-usage.unsafe-usage
318    let handle = unsafe { OpenProcess(PROCESS_SYNCHRONIZE, 0, owner.pid) };
319    if handle.is_null() {
320        let err = io::Error::last_os_error();
321        return match err.raw_os_error() {
322            Some(code) if code == ERROR_INVALID_PARAMETER as i32 => Ok(false),
323            Some(code) if code == ERROR_ACCESS_DENIED as i32 => Ok(true),
324            _ => Err(err),
325        };
326    }
327
328    // SAFETY: `handle` is a valid synchronization handle from OpenProcess.
329    // Zero timeout makes this a non-blocking liveness probe.
330    // nosemgrep: rust.lang.security.unsafe-usage.unsafe-usage
331    let wait = unsafe { WaitForSingleObject(handle, 0) };
332    // SAFETY: `handle` is the owned handle returned by OpenProcess above and is
333    // closed exactly once on this path.
334    // nosemgrep: rust.lang.security.unsafe-usage.unsafe-usage
335    unsafe {
336        CloseHandle(handle);
337    }
338    match wait {
339        WAIT_TIMEOUT => Ok(true),
340        WAIT_OBJECT_0 => Ok(false),
341        WAIT_FAILED => Err(io::Error::last_os_error()),
342        _ => Err(io::Error::new(
343            io::ErrorKind::Other,
344            format!("unexpected WaitForSingleObject result {wait}"),
345        )),
346    }
347}
348
349fn read_owner(path: &Path) -> io::Result<LockOwner> {
350    let text = fs::read_to_string(path)?;
351    let mut lines = text.lines();
352    if lines.next() != Some(LOCK_VERSION) {
353        return Err(io::Error::new(
354            io::ErrorKind::InvalidData,
355            "invalid lock metadata",
356        ));
357    }
358    let mut pid = None;
359    let mut boot_id = None;
360    let mut start_time = None;
361    for line in lines {
362        let Some((key, value)) = line.split_once('=') else {
363            continue;
364        };
365        match key {
366            "pid" => {
367                pid =
368                    Some(value.parse::<u32>().map_err(|err| {
369                        io::Error::new(io::ErrorKind::InvalidData, err.to_string())
370                    })?)
371            }
372            "boot_id" => boot_id = Some(value.to_string()),
373            "start_time" => start_time = Some(value.to_string()),
374            _ => {}
375        }
376    }
377    let Some(pid) = pid else {
378        return Err(io::Error::new(
379            io::ErrorKind::InvalidData,
380            "missing lock pid",
381        ));
382    };
383    let Some(start_time) = start_time.filter(|s| !s.is_empty()) else {
384        return Err(io::Error::new(
385            io::ErrorKind::InvalidData,
386            "missing lock start time",
387        ));
388    };
389    Ok(LockOwner {
390        pid,
391        boot_id: boot_id.unwrap_or_default(),
392        start_time,
393    })
394}
395
396#[cfg(test)]
397mod tests {
398    use super::*;
399
400    #[test]
401    fn stale_dead_pid_lock_is_reclaimed() {
402        let dir = tempfile::tempdir().expect("create temp dir");
403        let journal_path = dir.path().join("stale.journal");
404        let lock_path = PathBuf::from(format!("{}.lock", journal_path.display()));
405        let owner = LockOwner {
406            pid: u32::MAX,
407            boot_id: boot_id(),
408            start_time: "not-a-real-process-start".to_string(),
409        };
410        let mut file = File::create(&lock_path).expect("create stale lock");
411        write_owner(&mut file, &owner).expect("write stale lock");
412        drop(file);
413
414        let _lock = WriterLock::acquire(journal_path.to_str().expect("utf8 path"))
415            .expect("stale lock should be reclaimed");
416        let live_owner = read_owner(&lock_path).expect("read new lock");
417        assert_eq!(live_owner.pid, std::process::id());
418    }
419}