1use regex::Regex;
2use serde::{Deserialize, Serialize};
3use std::collections::HashSet;
4
5use goblin::{elf::Elf, pe::PE};
6use wasmparser::{Parser, Payload};
7
8#[derive(Debug, Serialize, Deserialize, Clone)]
9pub struct VersionInfo {
10 pub version_strings: Vec<String>,
11 pub file_version: Option<String>,
12 pub product_version: Option<String>,
13 pub company: Option<String>,
14 pub product_name: Option<String>,
15 pub confidence: f64,
16}
17
18#[derive(Debug, Serialize, Deserialize, Clone)]
19pub struct LicenseInfo {
20 pub licenses: Vec<String>,
21 pub copyright_notices: Vec<String>,
22 pub spdx_identifiers: Vec<String>,
23 pub license_texts: Vec<String>,
24 pub confidence: f64,
25}
26
27pub fn extract_version_info(contents: &[u8], strings: &[String], format: &str) -> VersionInfo {
28 let mut version_strings = HashSet::new();
29 let mut file_version = None;
30 let mut product_version = None;
31 let mut company = None;
32 let mut product_name = None;
33
34 let version_patterns = [
35 Regex::new(r"\b(\d+\.\d+\.\d+(?:\.\d+)?)\b").unwrap(),
36 Regex::new(r"\bv(\d+\.\d+\.\d+(?:\.\d+)?)\b").unwrap(),
37 Regex::new(r"\bversion\s*[:=]\s*([^\s,;]+)").unwrap(),
38 Regex::new(r"\bVERSION\s*[:=]\s*([^\s,;]+)").unwrap(),
39 Regex::new(r"\b(\d+\.\d+(?:\.\d+)?(?:\.\d+)?)\b").unwrap(),
40 ];
41
42 for string in strings {
43 for pattern in &version_patterns {
44 for captures in pattern.captures_iter(string) {
45 if let Some(version) = captures.get(1) {
46 if is_valid_version(version.as_str()) {
47 version_strings.insert(version.as_str().to_string());
48 }
49 }
50 }
51 }
52
53 if company.is_none() {
54 if let Some(comp) = extract_company_name(string) {
55 company = Some(comp);
56 }
57 }
58
59 if product_name.is_none() {
60 if let Some(prod) = extract_product_name(string) {
61 product_name = Some(prod);
62 }
63 }
64 }
65
66 match format {
67 "application/x-msdownload" => {
68 if let Some(pe_version) = extract_pe_version_info(contents) {
69 file_version = file_version.or(pe_version.file_version);
70 product_version = product_version.or(pe_version.product_version);
71 company = company.or(pe_version.company);
72 product_name = product_name.or(pe_version.product_name);
73 }
74 }
75 "application/x-elf" => {
76 if let Some(elf_versions) = extract_elf_version_info(contents) {
77 version_strings.extend(elf_versions);
78 }
79 }
80 "application/x-mach-binary" => {
81 if let Some(macho_versions) = extract_macho_version_info(contents) {
82 version_strings.extend(macho_versions);
83 }
84 }
85 "application/wasm" => {
86 if let Some(wasm_versions) = extract_wasm_version_info(contents) {
87 version_strings.extend(wasm_versions);
88 }
89 }
90 _ => {}
91 }
92
93 if file_version.is_none() && !version_strings.is_empty() {
94 file_version = version_strings
95 .iter()
96 .max_by_key(|v| v.matches('.').count())
97 .cloned();
98 }
99
100 let confidence = calculate_version_confidence(&version_strings, &file_version);
101
102 VersionInfo {
103 version_strings: version_strings.into_iter().collect(),
104 file_version,
105 product_version,
106 company,
107 product_name,
108 confidence,
109 }
110}
111
112pub fn extract_license_info(strings: &[String]) -> LicenseInfo {
113 let mut licenses = HashSet::new();
114 let mut copyright_notices = Vec::new();
115 let mut spdx_identifiers = HashSet::new();
116 let mut license_texts = Vec::new();
117
118 let license_patterns = [
119 (
120 Regex::new(r"(?i)\b(MIT|BSD|GPL|LGPL|Apache|Mozilla|ISC|Unlicense)\b").unwrap(),
121 "identifier",
122 ),
123 (
124 Regex::new(r"(?i)licensed under the ([^.,;]+)").unwrap(),
125 "phrase",
126 ),
127 (
128 Regex::new(r"(?i)license:\s*([^.,;\n]+)").unwrap(),
129 "declaration",
130 ),
131 (Regex::new(r"(?i)copyright\s+.*").unwrap(), "copyright"),
132 (
133 Regex::new(r"SPDX-License-Identifier:\s*([^\s]+)").unwrap(),
134 "spdx",
135 ),
136 ];
137
138 let license_text_patterns = [
139 Regex::new(r"(?i)permission is hereby granted.*").unwrap(),
140 Regex::new(r"(?i)redistribution and use in source and binary forms.*").unwrap(),
141 Regex::new(r"(?i)this program is free software.*").unwrap(),
142 Regex::new(r"(?i)licensed under the apache license.*").unwrap(),
143 ];
144
145 for string in strings {
146 if string.len() < 10 {
147 continue;
148 }
149
150 for (pattern, kind) in &license_patterns {
151 for captures in pattern.captures_iter(string) {
152 match *kind {
153 "identifier" | "phrase" | "declaration" => {
154 if let Some(license) = captures.get(1) {
155 let license_str = normalize_license_name(license.as_str());
156 if !license_str.is_empty() {
157 licenses.insert(license_str);
158 }
159 }
160 }
161 "copyright" => {
162 copyright_notices.push(string.clone());
163 }
164 "spdx" => {
165 if let Some(spdx) = captures.get(1) {
166 spdx_identifiers.insert(spdx.as_str().to_string());
167 }
168 }
169 _ => {}
170 }
171 }
172 }
173
174 for pattern in &license_text_patterns {
175 if pattern.is_match(string) && string.len() > 100 {
176 license_texts.push(string.clone());
177 if let Some(inferred) = infer_license_from_text(string) {
178 licenses.insert(inferred);
179 }
180 }
181 }
182 }
183
184 let confidence = calculate_license_confidence(&licenses, &spdx_identifiers, &license_texts);
185
186 LicenseInfo {
187 licenses: licenses.into_iter().collect(),
188 copyright_notices,
189 spdx_identifiers: spdx_identifiers.into_iter().collect(),
190 license_texts,
191 confidence,
192 }
193}
194
195pub fn is_valid_version(version: &str) -> bool {
196 if version.len() < 3 || version.len() > 20 || !version.contains('.') {
197 return false;
198 }
199
200 let parts: Vec<&str> = version.split('.').collect();
201 if parts.len() > 5 {
202 return false;
203 }
204
205 for part in parts {
206 if let Ok(num) = part.parse::<u32>() {
207 if num > 9999 {
208 return false;
209 }
210 }
211 }
212
213 true
214}
215
216pub fn extract_company_name(string: &str) -> Option<String> {
217 let patterns = [
218 Regex::new(r"(?i)company:\s*([^.,;\n]+)").unwrap(),
219 Regex::new(r"(?i)corporation:\s*([^.,;\n]+)").unwrap(),
220 Regex::new(r"(?i)© \d{4}\s+([^.,;\n]+)").unwrap(),
221 Regex::new(
222 r"(?i)copyright.*?(\w+(?:\s+\w+){0,3})(?:\s+inc\.?|\s+corp\.?|\s+ltd\.?|\s+llc)",
223 )
224 .unwrap(),
225 ];
226
227 for pattern in &patterns {
228 if let Some(caps) = pattern.captures(string) {
229 if let Some(m) = caps.get(1) {
230 let s = m.as_str().trim();
231 if s.len() > 2 && s.len() < 100 {
232 return Some(s.to_string());
233 }
234 }
235 }
236 }
237 None
238}
239
240pub fn extract_product_name(string: &str) -> Option<String> {
241 let patterns = [
242 Regex::new(r"(?i)product:\s*([^.,;\n]+)").unwrap(),
243 Regex::new(r"(?i)application:\s*([^.,;\n]+)").unwrap(),
244 Regex::new(r"(?i)program:\s*([^.,;\n]+)").unwrap(),
245 ];
246
247 for pattern in &patterns {
248 if let Some(caps) = pattern.captures(string) {
249 if let Some(m) = caps.get(1) {
250 let s = m.as_str().trim();
251 if s.len() > 2 && s.len() < 100 {
252 return Some(s.to_string());
253 }
254 }
255 }
256 }
257 None
258}
259
260pub fn normalize_license_name(license: &str) -> String {
261 match license.to_lowercase().as_str() {
262 "mit" => "MIT".to_string(),
263 "bsd" => "BSD".to_string(),
264 "gpl" => "GPL".to_string(),
265 "lgpl" => "LGPL".to_string(),
266 "apache" => "Apache-2.0".to_string(),
267 "mozilla" => "MPL-2.0".to_string(),
268 "isc" => "ISC".to_string(),
269 "unlicense" => "Unlicense".to_string(),
270 other => other.to_string(),
271 }
272}
273
274pub fn infer_license_from_text(text: &str) -> Option<String> {
275 let t = text.to_lowercase();
276 if t.contains("permission is hereby granted") && t.contains("mit") {
277 Some("MIT".to_string())
278 } else if t.contains("redistribution and use in source and binary forms") {
279 Some("BSD".to_string())
280 } else if t.contains("apache license") {
281 Some("Apache-2.0".to_string())
282 } else if t.contains("gnu general public license") {
283 Some("GPL".to_string())
284 } else {
285 None
286 }
287}
288
289pub fn calculate_version_confidence(
290 version_strings: &HashSet<String>,
291 file_version: &Option<String>,
292) -> f64 {
293 let mut confidence: f64 = 0.0;
294 if !version_strings.is_empty() {
295 confidence += 0.3;
296 }
297 if file_version.is_some() {
298 confidence += 0.4;
299 }
300 if version_strings.len() == 1 {
301 confidence += 0.3;
302 } else if version_strings.len() > 1 {
303 confidence += 0.1;
304 }
305 confidence.min(1.0)
306}
307
308pub fn calculate_license_confidence(
309 licenses: &HashSet<String>,
310 spdx: &HashSet<String>,
311 texts: &[String],
312) -> f64 {
313 let mut confidence: f64 = 0.0;
314 if !spdx.is_empty() {
315 confidence += 0.5;
316 }
317 if !licenses.is_empty() {
318 confidence += 0.3;
319 }
320 if !texts.is_empty() {
321 confidence += 0.2;
322 }
323 confidence.min(1.0)
324}
325
326#[derive(Debug)]
331pub struct PeVersionInfo {
332 file_version: Option<String>,
333 product_version: Option<String>,
334 company: Option<String>,
335 product_name: Option<String>,
336}
337
338pub fn extract_pe_version_info(contents: &[u8]) -> Option<PeVersionInfo> {
339 if let Ok(pe) = PE::parse(contents) {
341 if let Some(ref opt_header) = pe.header.optional_header {
342 let windows = &opt_header.windows_fields;
343
344 let file_version =
346 if windows.major_image_version != 0 || windows.minor_image_version != 0 {
347 Some(format!(
348 "{}.{}",
349 windows.major_image_version, windows.minor_image_version
350 ))
351 } else {
352 None
353 };
354
355 let product_version =
357 if windows.major_subsystem_version != 0 || windows.minor_subsystem_version != 0 {
358 Some(format!(
359 "{}.{}",
360 windows.major_subsystem_version, windows.minor_subsystem_version
361 ))
362 } else {
363 None
364 };
365
366 return Some(PeVersionInfo {
367 file_version,
368 product_version,
369 company: None, product_name: None, });
372 }
373 }
374 None
375}
376
377pub fn extract_elf_version_info(contents: &[u8]) -> Option<Vec<String>> {
378 if let Ok(elf) = Elf::parse(contents) {
379 let mut versions = Vec::new();
380 if let Some(note_iter) = elf.iter_note_headers(contents) {
381 for note_result in note_iter {
382 if let Ok(n) = note_result {
383 if n.name == "GNU" && n.n_type == goblin::elf::note::NT_GNU_BUILD_ID {
384 let hex = n
385 .desc
386 .iter()
387 .map(|b| format!("{:02x}", b))
388 .collect::<String>();
389 versions.push(hex);
390 }
391 }
392 }
393 }
394 Some(versions)
395 } else {
396 None
397 }
398}
399
400pub fn extract_macho_version_info(_contents: &[u8]) -> Option<Vec<String>> {
401 None }
403
404pub fn extract_wasm_version_info(contents: &[u8]) -> Option<Vec<String>> {
405 let mut versions = Vec::new();
406 let parser = Parser::new(0);
407 for payload in parser.parse_all(contents) {
408 if let Ok(Payload::CustomSection(s)) = payload {
409 if s.name().contains("version") || s.name().contains("meta") {
410 let text = String::from_utf8_lossy(s.data());
411 for line in text.lines() {
412 if let Some(v) = line.split_whitespace().find(|w| is_valid_version(w)) {
413 versions.push(v.to_string());
414 }
415 }
416 }
417 }
418 }
419 Some(versions)
420}