ripgrep_all/
adapters.rs

1pub mod custom;
2pub mod decompress;
3pub mod ffmpeg;
4pub mod mbox;
5pub mod postproc;
6use std::sync::Arc;
7pub mod sqlite;
8pub mod tar;
9pub mod writing;
10pub mod zip;
11use crate::{adapted_iter::AdaptedFilesIterBox, config::RgaConfig, matching::*};
12use anyhow::{Context, Result, format_err};
13use async_trait::async_trait;
14use custom::BUILTIN_SPAWNING_ADAPTERS;
15use custom::CustomAdapterConfig;
16use log::*;
17use tokio::io::AsyncRead;
18
19use core::fmt::Debug;
20use std::borrow::Cow;
21use std::collections::HashMap;
22use std::iter::Iterator;
23use std::path::PathBuf;
24use std::pin::Pin;
25
26use self::postproc::PostprocPageBreaks;
27
28pub type ReadBox = Pin<Box<dyn AsyncRead + Send>>;
29pub struct AdapterMeta {
30    /// unique short name of this adapter (a-z0-9 only)
31    pub name: String,
32    /// version identifier. used to key cache entries, change if your output format changes
33    pub version: i32,
34    pub description: String,
35    /// indicates whether this adapter can descend (=call rga_preproc again). if true, the cache key needs to include the list of active adapters
36    pub recurses: bool,
37    /// list of matchers (interpreted as a OR b OR ...)
38    pub fast_matchers: Vec<FastFileMatcher>,
39    /// list of matchers when we have mime type detection active (interpreted as ORed)
40    /// warning: this *overrides* the fast matchers
41    pub slow_matchers: Option<Vec<FileMatcher>>,
42    /// if true, slow_matchers is merged with fast matchers if accurate is enabled
43    /// for example, in sqlite you want this disabled since the db extension can mean other things and the mime type matching is very accurate for sqlite.
44    /// but for tar you want it enabled, since the tar extension is very accurate but the tar mime matcher can have false negatives
45    pub keep_fast_matchers_if_accurate: bool,
46    // if true, adapter is only used when user lists it in `--rga-adapters`
47    pub disabled_by_default: bool,
48}
49impl AdapterMeta {
50    // todo: this is pretty ugly
51    pub fn get_matchers<'a>(
52        &'a self,
53        slow: bool,
54    ) -> Box<dyn Iterator<Item = Cow<'a, FileMatcher>> + 'a> {
55        match (
56            slow,
57            self.keep_fast_matchers_if_accurate,
58            &self.slow_matchers,
59        ) {
60            (true, false, Some(sm)) => Box::new(sm.iter().map(Cow::Borrowed)),
61            (true, true, Some(sm)) => Box::new(
62                sm.iter().map(Cow::Borrowed).chain(
63                    self.fast_matchers
64                        .iter()
65                        .map(|e| Cow::Owned(FileMatcher::Fast(e.clone()))),
66                ),
67            ),
68            // don't have slow matchers or slow matching disabled
69            (true, _, None) | (false, _, _) => Box::new(
70                self.fast_matchers
71                    .iter()
72                    .map(|e| Cow::Owned(FileMatcher::Fast(e.clone()))),
73            ),
74        }
75    }
76}
77
78pub trait GetMetadata {
79    fn metadata(&self) -> &AdapterMeta;
80}
81
82#[async_trait]
83pub trait FileAdapter: GetMetadata + Send + Sync {
84    /// adapt a file.
85    ///
86    /// detection_reason is the Matcher that was used to identify this file. Unless --rga-accurate was given, it is always a FastMatcher
87    async fn adapt(
88        &self,
89        a: AdaptInfo,
90        detection_reason: &FileMatcher,
91    ) -> Result<AdaptedFilesIterBox>;
92}
93
94pub struct AdaptInfo {
95    /// file path. May not be an actual file on the file system (e.g. in an archive). Used for matching file extensions.
96    pub filepath_hint: PathBuf,
97    /// true if filepath_hint is an actual file on the file system
98    pub is_real_file: bool,
99    /// depth at which this file is in archives. 0 for real filesystem
100    pub archive_recursion_depth: i32,
101    /// stream to read the file from. can be from a file or from some decoder
102    pub inp: ReadBox,
103    /// prefix every output line with this string to better indicate the file's location if it is in some archive
104    pub line_prefix: String,
105    pub postprocess: bool,
106    pub config: RgaConfig,
107}
108
109/// (enabledAdapters, disabledAdapters)
110type AdaptersTuple = (Vec<Arc<dyn FileAdapter>>, Vec<Arc<dyn FileAdapter>>);
111
112pub fn get_all_adapters(custom_adapters: Option<Vec<CustomAdapterConfig>>) -> AdaptersTuple {
113    // order in descending priority
114    let mut adapters: Vec<Arc<dyn FileAdapter>> = vec![];
115    if let Some(custom_adapters) = custom_adapters {
116        for adapter_config in custom_adapters {
117            adapters.push(Arc::new(adapter_config.to_adapter()));
118        }
119    }
120
121    let internal_adapters: Vec<Arc<dyn FileAdapter>> = vec![
122        Arc::new(PostprocPageBreaks::default()),
123        Arc::new(ffmpeg::FFmpegAdapter::new()),
124        Arc::new(zip::ZipAdapter::new()),
125        Arc::new(decompress::DecompressAdapter::new()),
126        Arc::new(mbox::MboxAdapter::new()),
127        Arc::new(tar::TarAdapter::new()),
128        Arc::new(sqlite::SqliteAdapter::new()),
129    ];
130    adapters.extend(
131        BUILTIN_SPAWNING_ADAPTERS
132            .iter()
133            .map(|e| -> Arc<dyn FileAdapter> { Arc::new(e.to_adapter()) }),
134    );
135    adapters.extend(internal_adapters);
136
137    adapters
138        .into_iter()
139        .partition(|e| !e.metadata().disabled_by_default)
140}
141
142/**
143 * filter adapters by given names:
144 *
145 *  - "" means use default enabled adapter list
146 *  - "a,b" means use adapters a,b
147 *  - "-a,b" means use default list except for a and b
148 *  - "+a,b" means use default list but also a and b (a,b will be prepended to the list so given higher priority)
149 */
150pub fn get_adapters_filtered<T: AsRef<str>>(
151    custom_adapters: Option<Vec<CustomAdapterConfig>>,
152    adapter_names: &[T],
153) -> Result<Vec<Arc<dyn FileAdapter>>> {
154    let (def_enabled_adapters, def_disabled_adapters) = get_all_adapters(custom_adapters);
155    let adapters = if !adapter_names.is_empty() {
156        let adapters_map: HashMap<_, _> = def_enabled_adapters
157            .iter()
158            .chain(def_disabled_adapters.iter())
159            .map(|e| (e.metadata().name.clone(), e.clone()))
160            .collect();
161        let mut adapters = vec![];
162        let mut subtractive = false;
163        let mut additive = false;
164        for (i, name) in adapter_names.iter().enumerate() {
165            let mut name = name.as_ref();
166            if i == 0 && (name.starts_with('-')) {
167                subtractive = true;
168                name = &name[1..];
169                adapters = def_enabled_adapters.clone();
170            } else if i == 0 && (name.starts_with('+')) {
171                name = &name[1..];
172                adapters = def_enabled_adapters.clone();
173                additive = true;
174            }
175            if subtractive {
176                let inx = adapters
177                    .iter()
178                    .position(|a| a.metadata().name == name)
179                    .ok_or_else(|| format_err!("Could not remove adapter {}: Not in list", name))?;
180                adapters.remove(inx);
181            } else {
182                let adapter = adapters_map
183                    .get(name)
184                    .ok_or_else(|| {
185                        format_err!(
186                            "Unknown adapter: \"{}\". Known adapters: {}",
187                            name,
188                            adapters_map
189                                .keys()
190                                .map(|e| e.as_ref())
191                                .collect::<Vec<&str>>()
192                                .join(", ")
193                        )
194                    })?
195                    .clone();
196                if additive {
197                    adapters.insert(0, adapter);
198                } else {
199                    adapters.push(adapter);
200                }
201            }
202        }
203        adapters
204    } else {
205        def_enabled_adapters
206    };
207    debug!(
208        "Chosen available adapters: {}",
209        adapters
210            .iter()
211            .map(|a| a.metadata().name.clone())
212            .collect::<Vec<String>>()
213            .join(",")
214    );
215    Ok(adapters)
216}