gix_glob/search/pattern.rs
1use std::{
2 io::Read,
3 path::{Path, PathBuf},
4};
5
6use bstr::{BStr, BString, ByteSlice, ByteVec};
7
8use crate::{pattern::Case, search::Pattern};
9
10/// A list of patterns which optionally know where they were loaded from and what their base is.
11///
12/// Knowing their base which is relative to a source directory, it will ignore all path to match against
13/// that don't also start with said base.
14#[derive(PartialEq, Eq, Debug, Hash, Ord, PartialOrd, Clone, Default)]
15pub struct List<T: Pattern> {
16 /// Patterns and their associated data in the order they were loaded in or specified,
17 /// the line number in its source file or its sequence number (_`(pattern, value, line_number)`_).
18 ///
19 /// During matching, this order is reversed.
20 pub patterns: Vec<Mapping<T::Value>>,
21
22 /// The path from which the patterns were read, or `None` if the patterns
23 /// don't originate in a file on disk.
24 pub source: Option<PathBuf>,
25
26 /// The parent directory of source, or `None` if the patterns are _global_ to match against the repository root.
27 /// It's processed to contain slashes only and to end with a trailing slash, and is relative to the repository root.
28 pub base: Option<BString>,
29}
30
31/// An association of a pattern with its value, along with a sequence number providing a sort order in relation to its peers.
32#[derive(PartialEq, Eq, Debug, Hash, Ord, PartialOrd, Clone)]
33pub struct Mapping<T> {
34 /// The pattern itself, like `/target/*`
35 pub pattern: crate::Pattern,
36 /// The value associated with the pattern.
37 pub value: T,
38 /// Typically the line number in the file the pattern was parsed from.
39 pub sequence_number: usize,
40}
41
42fn read_in_full_ignore_missing(path: &Path, follow_symlinks: bool, buf: &mut Vec<u8>) -> std::io::Result<bool> {
43 buf.clear();
44 let file = if follow_symlinks {
45 std::fs::File::open(path)
46 } else {
47 gix_features::fs::open_options_no_follow().read(true).open(path)
48 };
49 Ok(match file {
50 Ok(mut file) => {
51 if let Err(err) = file.read_to_end(buf) {
52 if io_err_is_dir(&err) {
53 false
54 } else {
55 return Err(err);
56 }
57 } else {
58 true
59 }
60 }
61 Err(err) if err.kind() == std::io::ErrorKind::NotFound || io_err_is_dir(&err) => false,
62 Err(err) => return Err(err),
63 })
64}
65
66fn io_err_is_dir(err: &std::io::Error) -> bool {
67 // TODO: use the enum variant NotADirectory for this once stabilized
68 let raw = err.raw_os_error();
69 raw == Some(if cfg!(windows) { 5 } else { 21 }) /* Not a directory */
70 /* Also that, but under different circumstances */
71 || raw == Some(20)
72}
73
74/// Instantiation
75impl<T> List<T>
76where
77 T: Pattern,
78{
79 /// `source_file` is the location of the `bytes` which represents a list of patterns, one pattern per line.
80 /// If `root` is `Some(…)` it's used to see `source_file` as relative to itself, if `source_file` is absolute.
81 /// If source is relative and should be treated as base, set `root` to `Some("")`.
82 /// `parse` is a way to parse bytes to pattern.
83 pub fn from_bytes(bytes: &[u8], source_file: PathBuf, root: Option<&Path>, parse: T) -> Self {
84 let patterns = parse.bytes_to_patterns(bytes, source_file.as_path());
85 let base = root
86 .and_then(|root| source_file.parent().expect("file").strip_prefix(root).ok())
87 .and_then(|base| {
88 (!base.as_os_str().is_empty()).then(|| {
89 let mut base: BString =
90 gix_path::to_unix_separators_on_windows(gix_path::into_bstr(base)).into_owned();
91
92 base.push_byte(b'/');
93 base
94 })
95 });
96 List {
97 patterns,
98 source: Some(source_file),
99 base,
100 }
101 }
102
103 /// Create a pattern list from the `source` file, which may be located underneath `root`, while optionally
104 /// following symlinks with `follow_symlinks`, providing `buf` to temporarily store the data contained in the file.
105 /// `parse` is a way to parse bytes to pattern.
106 pub fn from_file(
107 source: impl Into<PathBuf>,
108 root: Option<&Path>,
109 follow_symlinks: bool,
110 buf: &mut Vec<u8>,
111 parse: T,
112 ) -> std::io::Result<Option<Self>> {
113 let source = source.into();
114 Ok(read_in_full_ignore_missing(&source, follow_symlinks, buf)?
115 .then(|| Self::from_bytes(buf, source, root, parse)))
116 }
117}
118
119/// Utilities
120impl<T> List<T>
121where
122 T: Pattern,
123{
124 /// If this list is anchored to a base path, return `relative_path` as being relative to our base and return
125 /// an updated `basename_pos` as well if it was set.
126 /// `case` is respected for the comparison.
127 ///
128 /// This is useful to turn repository-relative paths into paths relative to a particular search base.
129 pub fn strip_base_handle_recompute_basename_pos<'a>(
130 &self,
131 relative_path: &'a BStr,
132 basename_pos: Option<usize>,
133 case: Case,
134 ) -> Option<(&'a BStr, Option<usize>)> {
135 match self.base.as_deref() {
136 Some(base) => strip_base_handle_recompute_basename_pos(base.as_bstr(), relative_path, basename_pos, case)?,
137 None => (relative_path, basename_pos),
138 }
139 .into()
140 }
141}
142
143/// Return`relative_path` as being relative to `base` along with an updated `basename_pos` if it was set.
144/// `case` is respected for the comparison.
145///
146/// This is useful to turn repository-relative paths into paths relative to a particular search base.
147pub fn strip_base_handle_recompute_basename_pos<'a>(
148 base: &BStr,
149 relative_path: &'a BStr,
150 basename_pos: Option<usize>,
151 case: Case,
152) -> Option<(&'a BStr, Option<usize>)> {
153 Some((
154 match case {
155 Case::Sensitive => relative_path.strip_prefix(base.as_bytes())?.as_bstr(),
156 Case::Fold => {
157 let rela_dir = relative_path.get(..base.len())?;
158 if !rela_dir.eq_ignore_ascii_case(base) {
159 return None;
160 }
161 &relative_path[base.len()..]
162 }
163 },
164 basename_pos.and_then(|pos| {
165 let pos = pos - base.len();
166 (pos != 0).then_some(pos)
167 }),
168 ))
169}