1use std::collections::{HashMap, HashSet};
26use std::fs;
27use std::path::{Path, PathBuf};
28
29use log::warn;
30use packageurl::PackageUrl;
31use serde_json::Value as JsonValue;
32
33use crate::models::{DatasourceId, Dependency, PackageData, PackageType};
34use crate::parsers::pep508::{Pep508Requirement, parse_pep508_requirement};
35
36use super::PackageParser;
37
38pub struct RequirementsTxtParser;
43
44impl PackageParser for RequirementsTxtParser {
45 const PACKAGE_TYPE: PackageType = PackageType::Pypi;
46
47 fn extract_packages(path: &Path) -> Vec<PackageData> {
48 vec![extract_from_requirements_txt(path)]
49 }
50
51 fn is_match(path: &Path) -> bool {
52 let filename = path.file_name().and_then(|name| name.to_str());
53 let parent_name = path
54 .parent()
55 .and_then(|parent| parent.file_name())
56 .and_then(|name| name.to_str());
57
58 if let Some(name) = filename {
59 if name == "requirements.txt" {
60 return true;
61 }
62 if name.starts_with("requirements-") && name.ends_with(".txt") {
63 return true;
64 }
65 if parent_name == Some("requirements") && name.ends_with(".txt") {
66 return true;
67 }
68 }
69
70 false
71 }
72}
73
74struct ParseState {
75 dependencies: Vec<Dependency>,
76 extra_index_urls: Vec<String>,
77 index_url: Option<String>,
78 includes: Vec<String>,
79 constraints: Vec<String>,
80 visited: HashSet<PathBuf>,
81}
82
83fn extract_from_requirements_txt(path: &Path) -> PackageData {
84 let mut state = ParseState {
85 dependencies: Vec::new(),
86 extra_index_urls: Vec::new(),
87 index_url: None,
88 includes: Vec::new(),
89 constraints: Vec::new(),
90 visited: HashSet::new(),
91 };
92
93 let (scope, is_runtime) = scope_from_filename(path);
94
95 parse_requirements_with_includes(path, &mut state, &scope, is_runtime);
96
97 let mut extra_data = HashMap::new();
98 if let Some(url) = state.index_url {
99 extra_data.insert("index_url".to_string(), JsonValue::String(url));
100 }
101 if !state.extra_index_urls.is_empty() {
102 extra_data.insert(
103 "extra_index_urls".to_string(),
104 JsonValue::Array(
105 state
106 .extra_index_urls
107 .into_iter()
108 .map(JsonValue::String)
109 .collect(),
110 ),
111 );
112 }
113 if !state.includes.is_empty() {
114 extra_data.insert(
115 "requirements_includes".to_string(),
116 JsonValue::Array(state.includes.into_iter().map(JsonValue::String).collect()),
117 );
118 }
119 if !state.constraints.is_empty() {
120 extra_data.insert(
121 "constraints".to_string(),
122 JsonValue::Array(
123 state
124 .constraints
125 .into_iter()
126 .map(JsonValue::String)
127 .collect(),
128 ),
129 );
130 }
131
132 let extra_data = if extra_data.is_empty() {
133 None
134 } else {
135 Some(extra_data)
136 };
137
138 default_package_data(state.dependencies, extra_data)
139}
140
141fn parse_requirements_with_includes(
142 path: &Path,
143 state: &mut ParseState,
144 scope: &str,
145 is_runtime: bool,
146) {
147 let abs_path = match path.canonicalize() {
148 Ok(p) => p,
149 Err(_) => {
150 warn!("Cannot resolve path: {:?}", path);
151 return;
152 }
153 };
154
155 if state.visited.contains(&abs_path) {
156 warn!("Circular include detected: {:?}", path);
157 return;
158 }
159
160 state.visited.insert(abs_path.clone());
161
162 let content = match fs::read_to_string(&abs_path) {
163 Ok(c) => c,
164 Err(e) => {
165 warn!("Cannot read file {:?}: {}", abs_path, e);
166 return;
167 }
168 };
169
170 for line in collect_logical_lines(&content) {
171 let cleaned = strip_inline_comment(&line);
172 let trimmed = cleaned.trim();
173 if trimmed.is_empty() || trimmed.starts_with('#') {
174 continue;
175 }
176
177 if let Some(url) = parse_option_value(trimmed, "--extra-index-url") {
178 state.extra_index_urls.push(url);
179 continue;
180 }
181
182 if let Some(url) = parse_option_value(trimmed, "--index-url") {
183 state.index_url = Some(url);
184 continue;
185 }
186
187 if let Some(path_value) = parse_option_value(trimmed, "-r")
188 .or_else(|| parse_option_value(trimmed, "--requirement"))
189 {
190 state.includes.push(path_value.clone());
191 let included_path = abs_path
192 .parent()
193 .unwrap_or_else(|| Path::new("."))
194 .join(&path_value);
195
196 if included_path.exists() {
197 parse_requirements_with_includes(&included_path, state, scope, is_runtime);
198 } else {
199 warn!("Included file not found: {:?}", included_path);
200 }
201 continue;
202 }
203
204 if let Some(path_value) = parse_option_value(trimmed, "-c")
205 .or_else(|| parse_option_value(trimmed, "--constraint"))
206 {
207 state.constraints.push(path_value.clone());
208 let constraint_path = abs_path
209 .parent()
210 .unwrap_or_else(|| Path::new("."))
211 .join(&path_value);
212
213 if constraint_path.exists() {
214 parse_requirements_with_includes(&constraint_path, state, scope, is_runtime);
215 } else {
216 warn!("Constraint file not found: {:?}", constraint_path);
217 }
218 continue;
219 }
220
221 if trimmed.starts_with('-')
222 && !trimmed.starts_with("-e")
223 && !trimmed.starts_with("--editable")
224 {
225 continue;
226 }
227
228 if let Some(dependency) = build_dependency(trimmed, scope, is_runtime) {
229 state.dependencies.push(dependency);
230 }
231 }
232}
233
234fn default_package_data(
235 dependencies: Vec<Dependency>,
236 extra_data: Option<HashMap<String, JsonValue>>,
237) -> PackageData {
238 PackageData {
239 package_type: Some(RequirementsTxtParser::PACKAGE_TYPE),
240 primary_language: Some("Python".to_string()),
241 extra_data,
242 dependencies,
243 datasource_id: Some(DatasourceId::PipRequirements),
244 ..Default::default()
245 }
246}
247
248fn collect_logical_lines(content: &str) -> Vec<String> {
249 let mut lines = Vec::new();
250 let mut current = String::new();
251
252 for raw_line in content.lines() {
253 let line = raw_line.trim_end_matches('\r');
254 let trimmed = line.trim_end();
255 let is_continuation = trimmed.ends_with('\\');
256 let line_without = if is_continuation {
257 trimmed.trim_end_matches('\\')
258 } else {
259 line
260 };
261
262 if !line_without.trim().is_empty() {
263 if !current.is_empty() {
264 current.push(' ');
265 }
266 current.push_str(line_without.trim());
267 }
268
269 if !is_continuation && !current.is_empty() {
270 lines.push(current.trim().to_string());
271 current.clear();
272 }
273 }
274
275 if !current.is_empty() {
276 lines.push(current.trim().to_string());
277 }
278
279 lines
280}
281
282fn strip_inline_comment(line: &str) -> String {
283 let mut in_single = false;
284 let mut in_double = false;
285 for (idx, ch) in line.char_indices() {
286 match ch {
287 '\'' if !in_double => in_single = !in_single,
288 '"' if !in_single => in_double = !in_double,
289 '#' if !in_single && !in_double => {
290 let prefix = &line[..idx];
291 if prefix.trim_end().is_empty() || prefix.ends_with(char::is_whitespace) {
292 return prefix.trim_end().to_string();
293 }
294 }
295 _ => {}
296 }
297 }
298 line.to_string()
299}
300
301fn parse_option_value(line: &str, option: &str) -> Option<String> {
302 let stripped = line.strip_prefix(option)?;
303 let mut rest = stripped.trim();
304 if let Some(rest_stripped) = rest.strip_prefix('=') {
305 rest = rest_stripped.trim();
306 }
307 if rest.is_empty() {
308 None
309 } else {
310 Some(rest.to_string())
311 }
312}
313
314fn scope_from_filename(path: &Path) -> (String, bool) {
315 let filename = path
316 .file_name()
317 .and_then(|name| name.to_str())
318 .unwrap_or_default()
319 .to_ascii_lowercase();
320
321 if filename.contains("dev") {
322 return ("develop".to_string(), false);
323 }
324 if filename.contains("test") {
325 return ("test".to_string(), false);
326 }
327 if filename.contains("doc") {
328 return ("docs".to_string(), false);
329 }
330
331 ("install".to_string(), true)
332}
333
334fn build_dependency(line: &str, scope: &str, is_runtime: bool) -> Option<Dependency> {
335 let trimmed = line.trim();
336 if trimmed.is_empty() {
337 return None;
338 }
339
340 let mut is_editable = false;
341 let mut requirement = trimmed.to_string();
342 let mut extracted_requirement = trimmed.to_string();
343
344 if let Some(rest) = trimmed.strip_prefix("-e") {
345 is_editable = true;
346 requirement = rest.trim().to_string();
347 extracted_requirement = format!("--editable {}", requirement);
348 } else if let Some(rest) = trimmed.strip_prefix("--editable") {
349 is_editable = true;
350 requirement = rest.trim().to_string();
351 extracted_requirement = format!("--editable {}", requirement);
352 }
353
354 let (requirement, hash_options) = split_hash_options(&requirement);
355 let requirement = requirement.trim();
356 if requirement.is_empty() {
357 return None;
358 }
359
360 let parsed = parse_requirement(requirement);
361
362 let pinned_version = parsed
363 .specifiers
364 .as_deref()
365 .and_then(extract_pinned_version);
366 let is_pinned = pinned_version.is_some();
367
368 let purl = parsed
369 .name
370 .as_ref()
371 .and_then(|name| create_pypi_purl(name, pinned_version.as_deref()));
372
373 let mut extra_data = HashMap::new();
374 extra_data.insert("is_editable".to_string(), JsonValue::Bool(is_editable));
375 extra_data.insert(
376 "link".to_string(),
377 parsed
378 .link
379 .clone()
380 .map(JsonValue::String)
381 .unwrap_or(JsonValue::Null),
382 );
383 extra_data.insert(
384 "hash_options".to_string(),
385 JsonValue::Array(hash_options.into_iter().map(JsonValue::String).collect()),
386 );
387 extra_data.insert("is_constraint".to_string(), JsonValue::Bool(false));
388 extra_data.insert(
389 "is_archive".to_string(),
390 parsed
391 .is_archive
392 .map(JsonValue::Bool)
393 .unwrap_or(JsonValue::Null),
394 );
395 extra_data.insert("is_wheel".to_string(), JsonValue::Bool(parsed.is_wheel));
396 extra_data.insert(
397 "is_url".to_string(),
398 parsed
399 .is_url
400 .map(JsonValue::Bool)
401 .unwrap_or(JsonValue::Null),
402 );
403 extra_data.insert(
404 "is_vcs_url".to_string(),
405 parsed
406 .is_vcs_url
407 .map(JsonValue::Bool)
408 .unwrap_or(JsonValue::Null),
409 );
410 extra_data.insert(
411 "is_name_at_url".to_string(),
412 JsonValue::Bool(parsed.is_name_at_url),
413 );
414 extra_data.insert(
415 "is_local_path".to_string(),
416 parsed
417 .is_local_path
418 .map(|value| value || is_editable)
419 .map(JsonValue::Bool)
420 .unwrap_or(JsonValue::Null),
421 );
422
423 if let Some(marker) = parsed.marker {
424 extra_data.insert("markers".to_string(), JsonValue::String(marker));
425 }
426
427 Some(Dependency {
428 purl,
429 extracted_requirement: Some(extracted_requirement),
430 scope: Some(scope.to_string()),
431 is_runtime: Some(is_runtime),
432 is_optional: Some(false),
433 is_pinned: Some(is_pinned),
434 is_direct: Some(true),
435 resolved_package: None,
436 extra_data: Some(extra_data),
437 })
438}
439
440fn split_hash_options(input: &str) -> (String, Vec<String>) {
441 let mut filtered = Vec::new();
442 let mut hashes = Vec::new();
443
444 for token in input.split_whitespace() {
445 if let Some(value) = token.strip_prefix("--hash=") {
446 if !value.is_empty() {
447 hashes.push(value.to_string());
448 }
449 } else {
450 filtered.push(token);
451 }
452 }
453
454 (filtered.join(" "), hashes)
455}
456
457struct ParsedRequirement {
458 name: Option<String>,
459 specifiers: Option<String>,
460 marker: Option<String>,
461 link: Option<String>,
462 is_url: Option<bool>,
463 is_vcs_url: Option<bool>,
464 is_local_path: Option<bool>,
465 is_name_at_url: bool,
466 is_archive: Option<bool>,
467 is_wheel: bool,
468}
469
470fn parse_requirement(input: &str) -> ParsedRequirement {
471 if let Some(parsed) = parse_pep508_requirement(input) {
472 if let Some(url) = parsed.url.clone() {
473 return parsed_with_link(parsed, &url);
474 }
475
476 if !is_link_like(input) {
477 let name = Some(normalize_pypi_name(&parsed.name));
478 return ParsedRequirement {
479 name,
480 specifiers: parsed.specifiers,
481 marker: parsed.marker,
482 link: None,
483 is_url: None,
484 is_vcs_url: None,
485 is_local_path: None,
486 is_name_at_url: false,
487 is_archive: None,
488 is_wheel: false,
489 };
490 }
491 }
492
493 if let Some((name, link)) = parse_link_with_name(input) {
494 let normalized_name = normalize_pypi_name(&name);
495 let link_info = parse_link_flags(&link);
496 return ParsedRequirement {
497 name: Some(normalized_name),
498 specifiers: None,
499 marker: None,
500 link: Some(link),
501 is_url: Some(link_info.is_url),
502 is_vcs_url: Some(link_info.is_vcs_url),
503 is_local_path: Some(link_info.is_local_path),
504 is_name_at_url: link_info.is_name_at_url,
505 is_archive: link_info.is_archive,
506 is_wheel: link_info.is_wheel,
507 };
508 }
509
510 let link_info = parse_link_flags(input);
511 ParsedRequirement {
512 name: None,
513 specifiers: None,
514 marker: None,
515 link: Some(input.to_string()),
516 is_url: Some(link_info.is_url),
517 is_vcs_url: Some(link_info.is_vcs_url),
518 is_local_path: Some(link_info.is_local_path),
519 is_name_at_url: link_info.is_name_at_url,
520 is_archive: link_info.is_archive,
521 is_wheel: link_info.is_wheel,
522 }
523}
524
525fn parsed_with_link(parsed: Pep508Requirement, link: &str) -> ParsedRequirement {
526 let name = normalize_pypi_name(&parsed.name);
527 let link_info = parse_link_flags(link);
528 ParsedRequirement {
529 name: Some(name),
530 specifiers: parsed.specifiers,
531 marker: parsed.marker,
532 link: Some(link.to_string()),
533 is_url: Some(link_info.is_url),
534 is_vcs_url: Some(link_info.is_vcs_url),
535 is_local_path: Some(link_info.is_local_path),
536 is_name_at_url: parsed.is_name_at_url,
537 is_archive: link_info.is_archive,
538 is_wheel: link_info.is_wheel,
539 }
540}
541
542fn parse_link_with_name(input: &str) -> Option<(String, String)> {
543 if let Some(egg) = extract_egg_name(input) {
544 return Some((egg, input.to_string()));
545 }
546 None
547}
548
549fn extract_egg_name(input: &str) -> Option<String> {
550 let fragment = input.split('#').nth(1)?;
551 let egg_part = fragment.strip_prefix("egg=")?;
552 let name_part = egg_part.split('&').next()?.trim();
553 if name_part.is_empty() {
554 return None;
555 }
556 let (name, _extras, _) = parse_pep508_requirement(name_part)
557 .map(|parsed| (parsed.name, parsed.extras, parsed.specifiers))
558 .unwrap_or_else(|| (name_part.to_string(), Vec::new(), None));
559 Some(name)
560}
561
562struct LinkFlags {
563 is_url: bool,
564 is_vcs_url: bool,
565 is_local_path: bool,
566 is_name_at_url: bool,
567 is_archive: Option<bool>,
568 is_wheel: bool,
569}
570
571fn parse_link_flags(link: &str) -> LinkFlags {
572 let trimmed = link.trim();
573 let is_vcs_url = trimmed.starts_with("git+")
574 || trimmed.starts_with("hg+")
575 || trimmed.starts_with("svn+")
576 || trimmed.starts_with("bzr+");
577 let has_scheme = trimmed.contains("://") || trimmed.starts_with("file:");
578 let is_local_path = trimmed.starts_with("./")
579 || trimmed.starts_with("../")
580 || trimmed.starts_with('/')
581 || trimmed.starts_with('~')
582 || trimmed.starts_with("file:");
583
584 let is_wheel = trimmed.ends_with(".whl");
585 let is_archive = if is_wheel
586 || trimmed.ends_with(".zip")
587 || trimmed.ends_with(".tar.gz")
588 || trimmed.ends_with(".tgz")
589 || trimmed.ends_with(".tar.bz2")
590 || trimmed.ends_with(".tar")
591 {
592 Some(true)
593 } else if has_scheme || is_local_path {
594 Some(false)
595 } else {
596 None
597 };
598
599 LinkFlags {
600 is_url: has_scheme || is_vcs_url,
601 is_vcs_url,
602 is_local_path,
603 is_name_at_url: false,
604 is_archive,
605 is_wheel,
606 }
607}
608
609fn is_link_like(input: &str) -> bool {
610 let trimmed = input.trim();
611 trimmed.starts_with("git+")
612 || trimmed.starts_with("hg+")
613 || trimmed.starts_with("svn+")
614 || trimmed.starts_with("bzr+")
615 || trimmed.starts_with("file:")
616 || trimmed.contains("://")
617 || trimmed.starts_with("./")
618 || trimmed.starts_with("../")
619 || trimmed.starts_with('/')
620 || trimmed.starts_with('~')
621}
622
623fn extract_pinned_version(specifiers: &str) -> Option<String> {
624 let trimmed = specifiers.trim();
625 if trimmed.contains(',') {
626 return None;
627 }
628
629 let stripped = if let Some(version) = trimmed.strip_prefix("==") {
630 version
631 } else if let Some(version) = trimmed.strip_prefix("===") {
632 version
633 } else {
634 return None;
635 };
636
637 let version = stripped.trim();
638 if version.is_empty() || version.contains('*') {
639 None
640 } else {
641 Some(version.to_string())
642 }
643}
644
645fn create_pypi_purl(name: &str, version: Option<&str>) -> Option<String> {
646 let mut purl = PackageUrl::new(RequirementsTxtParser::PACKAGE_TYPE.as_str(), name).ok()?;
647 if let Some(version) = version {
648 purl.with_version(version).ok()?;
649 }
650 Some(purl.to_string())
651}
652
653fn normalize_pypi_name(name: &str) -> String {
654 let lower = name.trim().to_ascii_lowercase();
655 let mut normalized = String::new();
656 let mut last_was_sep = false;
657 for ch in lower.chars() {
658 let is_sep = matches!(ch, '-' | '_' | '.');
659 if is_sep {
660 if !last_was_sep {
661 normalized.push('-');
662 last_was_sep = true;
663 }
664 } else {
665 normalized.push(ch);
666 last_was_sep = false;
667 }
668 }
669 normalized
670}
671
672crate::register_parser!(
673 "pip requirements file",
674 &[
675 "**/requirements*.txt",
676 "**/requirements*.in",
677 "**/requirements/*.txt"
678 ],
679 "pypi",
680 "Python",
681 Some("https://pip.pypa.io/en/latest/reference/requirements-file-format/"),
682);