1use std::collections::HashMap;
2use std::ffi::OsString;
3use std::fmt::Display;
4use std::io::{BufRead, BufReader};
5use std::path::Path;
6
7#[cfg(feature = "matcher")]
8use regex::Regex;
9
10use crate::container::Container;
11use crate::error::LinguistError;
12use crate::utils::{determine_multiline_exec, has_shebang, is_binary};
13
14#[derive(Debug, Clone, PartialEq, Eq)]
16#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
17pub struct Language {
18 pub parent: Option<String>,
19 pub name: String,
20 pub aliases: Vec<String>,
21 pub scope: Scope,
22 pub extensions: Vec<OsString>,
23 pub filenames: Vec<OsString>,
24 pub interpreters: Vec<String>,
25 pub color: Option<String>,
26}
27
28impl Display for Language {
29 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
30 write!(f, "{}", self.name)
31 }
32}
33
34#[derive(Debug, Clone, PartialEq, Eq)]
36#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
37pub enum Scope {
38 Programming,
39 Markup,
40 Data,
41 Prose,
42 Unknown,
43}
44
45impl From<String> for Scope {
46 fn from(value: String) -> Self {
47 match value.to_lowercase().as_str() {
48 "programming" => Scope::Programming,
49 "markup" => Scope::Markup,
50 "data" => Scope::Data,
51 "prose" => Scope::Prose,
52 _ => Scope::Unknown,
53 }
54 }
55}
56
57impl From<&str> for Scope {
58 fn from(value: &str) -> Self {
59 match value.to_lowercase().as_str() {
60 "programming" => Scope::Programming,
61 "markup" => Scope::Markup,
62 "data" => Scope::Data,
63 "prose" => Scope::Prose,
64 _ => Scope::Unknown,
65 }
66 }
67}
68
69impl std::fmt::Display for Scope {
70 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
71 match self {
72 Scope::Programming => write!(f, "Programming"),
73 Scope::Markup => write!(f, "Markup"),
74 Scope::Data => write!(f, "Data"),
75 Scope::Prose => write!(f, "Prose"),
76 Scope::Unknown => write!(f, "Unknown"),
77 }
78 }
79}
80
81#[derive(Debug, Clone, PartialEq, Eq)]
83#[cfg_attr(feature = "matcher", derive(serde::Serialize, serde::Deserialize))]
84pub struct HeuristicRule {
85 pub language: String,
87 pub extensions: Vec<OsString>,
89 pub patterns: Vec<String>,
91}
92
93pub fn resolve_languages_by_filename(
95 file: impl AsRef<Path>,
96 container: &impl Container,
97) -> Result<Vec<&Language>, LinguistError> {
98 match container.get_languages_by_filename(file) {
99 Some(langs) => Ok(langs),
100 _ => Err(LinguistError::LanguageNotFound),
101 }
102}
103
104pub fn resolve_languages_by_extension(
106 file: impl AsRef<Path>,
107 container: &impl Container,
108) -> Result<Vec<&Language>, LinguistError> {
109 match container.get_languages_by_extension(file) {
110 Some(langs) => Ok(langs),
111 _ => Err(LinguistError::LanguageNotFound),
112 }
113}
114
115#[cfg(feature = "matcher")]
117pub fn resolve_language_by_content(
118 file: impl AsRef<Path>,
119 container: &impl Container,
120) -> Result<Option<&Language>, LinguistError> {
121 let content = match std::fs::read_to_string(file.as_ref()) {
122 Ok(content) => content,
123 _ => return Err(LinguistError::FileNotFound),
124 };
125
126 if let Some(rules) = container.get_heuristics_by_extension(file.as_ref()) {
127 for rule in rules {
128 let matcher = Regex::new(&rule.patterns.join("|"))?;
129
130 if matcher.is_match(&content) {
131 return Ok(container.get_language_by_name(&rule.language));
132 }
133 }
134 }
135
136 Err(LinguistError::LanguageNotFound)
137}
138
139pub fn resolve_languages_by_shebang(
141 file: impl AsRef<Path>,
142 container: &impl Container,
143) -> Result<Option<Vec<&Language>>, LinguistError> {
144 let file = match std::fs::File::open(&file) {
146 Ok(file) => file,
147 Err(err) => return Err(LinguistError::IOError(err)),
148 };
149 let mut buf = BufReader::new(file);
150 let mut line = String::new();
151 let _ = buf.read_line(&mut line);
152
153 if !has_shebang(line.as_bytes()) {
155 return Ok(None);
156 }
157
158 let line = line[2..].trim();
159 let mut fields = line.split_whitespace().collect::<Vec<&str>>();
160 if fields.is_empty() {
161 return Ok(None);
162 }
163
164 let mut interpreter = Path::new(fields[0])
165 .file_name()
166 .unwrap()
167 .to_str()
168 .unwrap()
169 .to_owned();
170
171 if interpreter == "env" {
172 if fields.len() == 1 {
173 return Ok(None);
174 }
175
176 let env_opt_args = Regex::new(r"^-[a-zA-Z]+$").unwrap();
177 let env_var_args = Regex::new(r"^\$[a-zA-Z_]+$").unwrap();
178
179 let _i = 1;
180 while fields.len() > 2 {
181 if env_opt_args.is_match(fields[1]) || env_var_args.is_match(fields[1]) {
182 fields.remove(1);
183 continue;
184 }
185 break;
186 }
187 interpreter = Path::new(fields[1])
188 .file_name()
189 .unwrap()
190 .to_str()
191 .unwrap()
192 .to_owned();
193 }
194
195 let mut interpreter = interpreter;
196 if interpreter == "sh" {
197 interpreter = determine_multiline_exec(buf.buffer()).unwrap();
198 }
199
200 let python_version = Regex::new(r"^python[0-9]*\.[0-9]*").unwrap();
201 if python_version.is_match(&interpreter) {
202 interpreter = interpreter.split('.').next().unwrap().to_owned();
203 }
204 if interpreter == "osascript" && line.contains("-l") {
207 interpreter = "".to_string();
208 }
209
210 let results = container.get_languages_by_interpreter(&interpreter);
211 if results.is_some() {
212 Ok(results)
213 } else {
214 Ok(None)
215 }
216}
217
218pub fn resolve_language(
221 file: impl AsRef<Path>,
222 container: &impl Container,
223) -> Result<Option<&Language>, LinguistError> {
224 if is_binary(&file)? {
225 return Ok(None);
226 }
227
228 let mut probabilities: HashMap<String, usize> = HashMap::new();
229
230 if let Ok(candidates) = resolve_languages_by_filename(&file, container) {
231 for candidate in candidates {
232 *probabilities
233 .entry(candidate.name.clone().to_lowercase())
234 .or_insert(1) += 1;
235 }
236 }
237
238 if let Ok(Some(candidate)) = resolve_languages_by_shebang(&file, container) {
239 for lang in candidate {
240 *probabilities
241 .entry(lang.name.clone().to_lowercase())
242 .or_insert(1) += 1;
243 }
244 }
245
246 if let Ok(candidates) = resolve_languages_by_extension(&file, container) {
247 for candidate in candidates {
248 *probabilities
249 .entry(candidate.name.clone().to_lowercase())
250 .or_insert(1) += 1;
251 }
252 }
253
254 if let Ok(Some(candidate)) = resolve_language_by_content(&file, container) {
255 *probabilities
256 .entry(candidate.name.clone().to_lowercase())
257 .or_insert(1) += 1;
258 }
259
260 let mut ordered: Vec<(&String, &usize)> = probabilities.iter().collect();
261 ordered.sort_by_key(|&(_, v)| v);
262 ordered.reverse();
263
264 if !ordered.is_empty() {
265 return Ok(Some(
266 container
267 .get_language_by_name(ordered.get(0).unwrap().0)
268 .unwrap(),
269 ));
270 }
271 Err(LinguistError::LanguageNotFound)
272}