gen_completions/parse_man/
mod.rs

1//! For parsing command information from man pages
2mod darwin;
3pub mod error;
4mod podman;
5mod scdoc;
6mod type1;
7mod type2;
8mod type3;
9mod type4;
10mod util;
11
12use std::{
13  collections::{hash_map::Entry, HashMap},
14  fs::File,
15  io::{BufReader, Read},
16  path::{Path, PathBuf},
17};
18
19use bzip2::bufread::BzDecoder;
20use flate2::bufread::GzDecoder;
21use log::{debug, trace};
22
23use crate::{parse_man::error::Error, CommandInfo, Flag};
24
25pub type Result<T> = std::result::Result<T, Error>;
26
27/// Information about a command and its detected subcommands before being parsed
28pub struct CmdPreInfo {
29  path: Option<PathBuf>,
30  subcmds: HashMap<String, CmdPreInfo>,
31}
32
33/// Get the command that a manpage is for, given its path
34///
35/// e.g. `/foo/cowsay.1.txt -> "cowsay"`
36#[must_use]
37pub fn get_cmd_name(manpage_path: impl AsRef<Path>) -> String {
38  let file_name = manpage_path
39    .as_ref()
40    .file_name()
41    .expect("Manpage should've had a valid file name")
42    .to_string_lossy()
43    .replace(std::char::REPLACEMENT_CHARACTER, "");
44  // The file name will be something like foo.1.gz, we only want foo
45  if let Some(ind) = file_name.find('.') {
46    file_name[..ind].to_string()
47  } else {
48    file_name.to_string()
49  }
50}
51
52/// Parse flags from a man page, trying all of the different parsers and merging
53/// their results if multiple parsers could parse the man page.
54pub fn parse_manpage_text(cmd_name: &str, text: impl AsRef<str>) -> Vec<Flag> {
55  let text = text.as_ref();
56
57  // TODO remove duplicate flags
58  [
59    type1::parse(cmd_name, text),
60    type2::parse(cmd_name, text),
61    type3::parse(cmd_name, text),
62    type4::parse(cmd_name, text),
63    scdoc::parse(cmd_name, text),
64    podman::parse(cmd_name, text),
65    darwin::parse(cmd_name, text),
66  ]
67  .into_iter()
68  .flatten()
69  .collect::<Vec<_>>()
70}
71
72/// Decompress a manpage if necessary
73///
74/// # Errors
75///
76/// Fails if the manpage could not beo pened, or if it was a .gz or .bz2 file
77/// and could not be decompressed.
78pub fn read_manpage(manpage_path: impl AsRef<Path>) -> std::io::Result<String> {
79  let path = manpage_path.as_ref();
80  trace!("Reading man page at {}", path.display());
81  match path.extension() {
82    Some(ext) => {
83      let file = File::open(path)?;
84      let mut reader = BufReader::new(file);
85      let mut str = String::new();
86      // TODO GzDecoder and BzDecoder seem to only work with UTF-8?
87      if ext == "gz" {
88        GzDecoder::new(reader).read_to_string(&mut str)?;
89      } else if ext == "bz2" {
90        BzDecoder::new(reader).read_to_string(&mut str)?;
91      } else {
92        reader.read_to_string(&mut str)?;
93      }
94      Ok(str)
95    }
96    None => todo!(),
97  }
98}
99
100/// Take a `CmdPreInfo` representing the path to a command and its subcommands
101/// and try parsing that command and its subcommands. Also returns a list of
102/// errors encountered along the way.
103#[must_use]
104pub fn parse_from(
105  cmd_name: &str,
106  pre_info: CmdPreInfo,
107) -> (Option<CommandInfo>, Vec<Error>) {
108  // todo actually parse arg types
109  let args = Vec::new();
110  let mut subcommands = Vec::new();
111  let mut errors = Vec::new();
112
113  let flags = if let Some(path) = pre_info.path {
114    match read_manpage(path.clone()) {
115      Ok(text) => {
116        let all_flags = parse_manpage_text(cmd_name, text);
117        if all_flags.is_empty() {
118          errors.push(Error::UnsupportedFormat { path });
119          Vec::new()
120        } else {
121          all_flags
122        }
123      }
124      Err(e) => {
125        errors.push(e.into());
126        Vec::new()
127      }
128    }
129  } else {
130    errors.push(Error::ManpageNotFound {
131      cmd_name: cmd_name.to_string(),
132    });
133    Vec::new()
134  };
135
136  for (sub_name, sub_info) in pre_info.subcmds {
137    let (sub_cmd, mut sub_errors) =
138      parse_from(&format!("{cmd_name} {sub_name}"), sub_info);
139    if let Some(cmd) = sub_cmd {
140      subcommands.push(cmd);
141    }
142    errors.append(&mut sub_errors);
143  }
144
145  let cmd_info = if flags.is_empty() && subcommands.is_empty() {
146    None
147  } else {
148    subcommands.sort_by(|a, b| a.name.cmp(&b.name));
149    Some(CommandInfo {
150      name: cmd_name.split(' ').last().unwrap().to_string(),
151      desc: None,
152      flags,
153      args,
154      subcommands,
155    })
156  };
157  (cmd_info, errors)
158}
159
160/// Make a tree relating commands to their subcommands
161#[must_use]
162pub fn detect_subcommands(
163  manpages: impl IntoIterator<Item = impl AsRef<Path>>,
164  explicit_subcmds: impl IntoIterator<Item = (String, Vec<String>)>,
165) -> HashMap<String, CmdPreInfo> {
166  let mut explicit_subcmds: HashMap<_, _> =
167    explicit_subcmds.into_iter().collect();
168
169  let mut res = HashMap::new();
170
171  for page in manpages {
172    let page = PathBuf::from(page.as_ref());
173    let cmd_name = get_cmd_name(&page);
174    match explicit_subcmds.remove(&cmd_name) {
175      Some(as_subcmd) => insert_subcmd(&mut res, as_subcmd, page),
176      None => {
177        if let Ok(text) = read_manpage(&page) {
178          insert_subcmd(&mut res, detect_subcommand(&cmd_name, &text), page);
179        }
180      }
181    }
182  }
183
184  res
185}
186
187/// Insert a subcommand into a tree of subcommands
188fn insert_subcmd(
189  subcommands: &mut HashMap<String, CmdPreInfo>,
190  mut cmd_parts: Vec<String>,
191  path: PathBuf,
192) {
193  let head = cmd_parts.remove(0);
194  let cmd = match subcommands.entry(head) {
195    Entry::Occupied(o) => o.into_mut(),
196    Entry::Vacant(v) => v.insert(CmdPreInfo {
197      path: None,
198      subcmds: HashMap::new(),
199    }),
200  };
201  if cmd_parts.is_empty() {
202    cmd.path = Some(path);
203  } else {
204    insert_subcmd(&mut cmd.subcmds, cmd_parts, path);
205  }
206}
207
208/// Try to detect if the given command is actually a subcommand and break it up
209/// into its pieces.
210///
211/// Given command `git-log`, the result would be `vec!["git", "log"]`. A single
212/// command like `git` would be `vec!["git"]`.
213fn detect_subcommand(cmd_name: &str, text: &str) -> Vec<String> {
214  let mut chars = cmd_name.chars();
215  let mut hyphens = vec![0];
216  for i in 0..cmd_name.len() {
217    if chars.next().unwrap() == '-' {
218      hyphens.push(i + 1);
219    }
220  }
221  hyphens.push(cmd_name.len() + 1);
222
223  if hyphens.len() > 2 {
224    for poss in all_possible_subcommands(&hyphens, cmd_name) {
225      let as_sub_cmd = poss.join(" ").replace('-', r"\-");
226      if text.contains(&as_sub_cmd) {
227        debug!("Detected {} as subcommand {}", cmd_name, as_sub_cmd);
228        return poss.into_iter().map(String::from).collect();
229      }
230    }
231  }
232
233  vec![cmd_name.to_string()]
234}
235
236/// Find all possible subcommands that might have the given hyphenated man page
237/// name
238///
239/// ## Arguments
240/// * `hyphens` - The locations of the hyphens in the string (also, the first
241///   element is the index of the start of the current substring, and the last
242///   element is the index of the end of the current substring)
243fn all_possible_subcommands<'a>(
244  hyphens: &[usize],
245  cmd: &'a str,
246) -> Vec<Vec<&'a str>> {
247  if hyphens.len() == 2 {
248    Vec::new()
249  } else {
250    let mut res = Vec::new();
251
252    for i in 1..hyphens.len() - 1 {
253      let mid = hyphens[i];
254      let mut all_right = all_possible_subcommands(&hyphens[i..], cmd);
255      all_right.push(vec![&cmd[mid..hyphens[hyphens.len() - 1] - 1]]);
256      for right in all_right {
257        let mut all_left = all_possible_subcommands(&hyphens[..=i], cmd);
258        all_left.push(vec![&cmd[hyphens[0]..mid - 1]]);
259        for mut left in all_left {
260          left.extend_from_slice(&right);
261          res.push(left);
262        }
263      }
264    }
265
266    res
267  }
268}