1use std::collections::{HashMap, HashSet};
26use std::path::{Path, PathBuf};
27
28use crate::parser_warn as warn;
29use packageurl::PackageUrl;
30use serde_json::Value as JsonValue;
31
32use crate::models::{DatasourceId, Dependency, PackageData, PackageType};
33use crate::parsers::pep508::{Pep508Requirement, parse_pep508_requirement};
34use crate::parsers::utils::{MAX_ITERATION_COUNT, read_file_to_string, truncate_field};
35
36use super::PackageParser;
37
38const MAX_RECURSION_DEPTH: usize = 50;
39
40pub struct RequirementsTxtParser;
45
46impl PackageParser for RequirementsTxtParser {
47 const PACKAGE_TYPE: PackageType = PackageType::Pypi;
48
49 fn extract_packages(path: &Path) -> Vec<PackageData> {
50 vec![extract_from_requirements_txt(path)]
51 }
52
53 fn is_match(path: &Path) -> bool {
54 let filename = path.file_name().and_then(|name| name.to_str());
55 let Some(name) = filename else {
56 return false;
57 };
58
59 is_requirements_txt_filename(name)
60 || (is_requirements_like_extension(name) && has_requirements_like_ancestor(path))
61 }
62}
63
64fn is_requirements_txt_filename(name: &str) -> bool {
65 if name == "requirements.txt" || name == "requires.txt" {
66 return true;
67 }
68
69 let (stem, extension) = if let Some(stem) = name.strip_suffix(".txt") {
70 (stem, "txt")
71 } else if let Some(stem) = name.strip_suffix(".in") {
72 (stem, "in")
73 } else {
74 return false;
75 };
76
77 stem == "requirements"
81 || stem.starts_with("requirements")
82 || stem.ends_with("requirements")
83 || (extension == "txt" && is_reqs_alias_stem(stem))
84}
85
86fn is_reqs_alias_stem(stem: &str) -> bool {
87 stem == "reqs" || stem.ends_with("-reqs") || stem.ends_with("_reqs") || stem.ends_with(".reqs")
88}
89
90fn is_requirements_like_extension(name: &str) -> bool {
91 name.ends_with(".txt") || name.ends_with(".in")
92}
93
94fn has_requirements_like_ancestor(path: &Path) -> bool {
95 path.parent()
96 .into_iter()
97 .flat_map(Path::ancestors)
98 .filter_map(|ancestor| ancestor.file_name())
99 .filter_map(|name| name.to_str())
100 .any(is_requirements_like_dir_name)
101}
102
103fn is_requirements_like_dir_name(name: &str) -> bool {
104 name == "requirements" || name.starts_with("requirements") || name.ends_with("requirements")
105}
106
107struct ParseState {
108 dependencies: Vec<Dependency>,
109 extra_index_urls: Vec<String>,
110 index_url: Option<String>,
111 includes: Vec<String>,
112 constraints: Vec<String>,
113 visited: HashSet<PathBuf>,
114}
115
116fn extract_from_requirements_txt(path: &Path) -> PackageData {
117 let mut state = ParseState {
118 dependencies: Vec::new(),
119 extra_index_urls: Vec::new(),
120 index_url: None,
121 includes: Vec::new(),
122 constraints: Vec::new(),
123 visited: HashSet::new(),
124 };
125
126 let (scope, is_runtime) = scope_from_filename(path);
127
128 parse_requirements_with_includes(path, &mut state, &scope, is_runtime, 0);
129
130 let mut extra_data = HashMap::new();
131 if let Some(url) = state.index_url {
132 extra_data.insert(
133 "index_url".to_string(),
134 JsonValue::String(truncate_field(url)),
135 );
136 }
137 if !state.extra_index_urls.is_empty() {
138 extra_data.insert(
139 "extra_index_urls".to_string(),
140 JsonValue::Array(
141 state
142 .extra_index_urls
143 .into_iter()
144 .map(|u| JsonValue::String(truncate_field(u)))
145 .collect(),
146 ),
147 );
148 }
149 if !state.includes.is_empty() {
150 extra_data.insert(
151 "requirements_includes".to_string(),
152 JsonValue::Array(
153 state
154 .includes
155 .into_iter()
156 .map(|i| JsonValue::String(truncate_field(i)))
157 .collect(),
158 ),
159 );
160 }
161 if !state.constraints.is_empty() {
162 extra_data.insert(
163 "constraints".to_string(),
164 JsonValue::Array(
165 state
166 .constraints
167 .into_iter()
168 .map(|c| JsonValue::String(truncate_field(c)))
169 .collect(),
170 ),
171 );
172 }
173
174 let extra_data = if extra_data.is_empty() {
175 None
176 } else {
177 Some(extra_data)
178 };
179
180 default_package_data(state.dependencies, extra_data)
181}
182
183fn parse_requirements_with_includes(
184 path: &Path,
185 state: &mut ParseState,
186 scope: &str,
187 is_runtime: bool,
188 depth: usize,
189) {
190 if depth > MAX_RECURSION_DEPTH {
191 warn!(
192 "Maximum recursion depth ({}) exceeded for include: {:?}",
193 MAX_RECURSION_DEPTH, path
194 );
195 return;
196 }
197
198 let abs_path = match path.canonicalize() {
199 Ok(p) => p,
200 Err(_) => {
201 warn!("Cannot resolve path: {:?}", path);
202 return;
203 }
204 };
205
206 if state.visited.contains(&abs_path) {
207 warn!("Circular include detected: {:?}", path);
208 return;
209 }
210
211 state.visited.insert(abs_path.clone());
212
213 let content = match read_file_to_string(&abs_path, None) {
214 Ok(c) => c,
215 Err(e) => {
216 warn!("Cannot read file {:?}: {}", abs_path, e);
217 return;
218 }
219 };
220
221 for line in collect_logical_lines(&content)
222 .into_iter()
223 .take(MAX_ITERATION_COUNT)
224 {
225 let cleaned = strip_inline_comment(&line);
226 let trimmed = cleaned.trim();
227 if trimmed.is_empty() || trimmed.starts_with('#') {
228 continue;
229 }
230
231 if let Some(url) = parse_option_value(trimmed, "--extra-index-url") {
232 state.extra_index_urls.push(truncate_field(url));
233 continue;
234 }
235
236 if let Some(url) = parse_option_value(trimmed, "--index-url") {
237 state.index_url = Some(truncate_field(url));
238 continue;
239 }
240
241 if let Some(path_value) = parse_option_value(trimmed, "-r")
242 .or_else(|| parse_option_value(trimmed, "--requirement"))
243 {
244 state.includes.push(truncate_field(path_value.clone()));
245 let included_path = abs_path
246 .parent()
247 .unwrap_or_else(|| Path::new("."))
248 .join(&path_value);
249
250 if included_path.exists() {
251 parse_requirements_with_includes(
252 &included_path,
253 state,
254 scope,
255 is_runtime,
256 depth + 1,
257 );
258 } else {
259 warn!("Included file not found: {:?}", included_path);
260 }
261 continue;
262 }
263
264 if let Some(path_value) = parse_option_value(trimmed, "-c")
265 .or_else(|| parse_option_value(trimmed, "--constraint"))
266 {
267 state.constraints.push(truncate_field(path_value.clone()));
268 let constraint_path = abs_path
269 .parent()
270 .unwrap_or_else(|| Path::new("."))
271 .join(&path_value);
272
273 if constraint_path.exists() {
274 parse_requirements_with_includes(
275 &constraint_path,
276 state,
277 scope,
278 is_runtime,
279 depth + 1,
280 );
281 } else {
282 warn!("Constraint file not found: {:?}", constraint_path);
283 }
284 continue;
285 }
286
287 if trimmed.starts_with('-')
288 && !trimmed.starts_with("-e")
289 && !trimmed.starts_with("--editable")
290 {
291 continue;
292 }
293
294 if let Some(dependency) = build_dependency(trimmed, scope, is_runtime) {
295 if state.dependencies.len() >= MAX_ITERATION_COUNT {
296 warn!(
297 "Reached maximum dependency count ({}) in {:?}",
298 MAX_ITERATION_COUNT, abs_path
299 );
300 break;
301 }
302 state.dependencies.push(dependency);
303 }
304 }
305}
306
307fn default_package_data(
308 dependencies: Vec<Dependency>,
309 extra_data: Option<HashMap<String, JsonValue>>,
310) -> PackageData {
311 PackageData {
312 package_type: Some(RequirementsTxtParser::PACKAGE_TYPE),
313 primary_language: Some("Python".to_string()),
314 extra_data,
315 dependencies,
316 datasource_id: Some(DatasourceId::PipRequirements),
317 ..Default::default()
318 }
319}
320
321fn collect_logical_lines(content: &str) -> Vec<String> {
322 let mut lines = Vec::new();
323 let mut current = String::new();
324
325 for raw_line in content.lines().take(MAX_ITERATION_COUNT) {
326 let line = raw_line.trim_end_matches('\r');
327 let trimmed = line.trim_end();
328 let is_continuation = trimmed.ends_with('\\');
329 let line_without = if is_continuation {
330 trimmed.trim_end_matches('\\')
331 } else {
332 line
333 };
334
335 if !line_without.trim().is_empty() {
336 if !current.is_empty() {
337 current.push(' ');
338 }
339 current.push_str(line_without.trim());
340 }
341
342 if !is_continuation && !current.is_empty() {
343 lines.push(current.trim().to_string());
344 current.clear();
345 }
346 }
347
348 if !current.is_empty() {
349 lines.push(current.trim().to_string());
350 }
351
352 lines
353}
354
355fn strip_inline_comment(line: &str) -> String {
356 let mut in_single = false;
357 let mut in_double = false;
358 for (idx, ch) in line.char_indices() {
359 match ch {
360 '\'' if !in_double => in_single = !in_single,
361 '"' if !in_single => in_double = !in_double,
362 '#' if !in_single && !in_double => {
363 let prefix = &line[..idx];
364 if prefix.trim_end().is_empty() || prefix.ends_with(char::is_whitespace) {
365 return prefix.trim_end().to_string();
366 }
367 }
368 _ => {}
369 }
370 }
371 line.to_string()
372}
373
374fn parse_option_value(line: &str, option: &str) -> Option<String> {
375 let stripped = line.strip_prefix(option)?;
376 let mut rest = stripped.trim();
377 if let Some(rest_stripped) = rest.strip_prefix('=') {
378 rest = rest_stripped.trim();
379 }
380 if rest.is_empty() {
381 None
382 } else {
383 Some(rest.to_string())
384 }
385}
386
387fn scope_from_filename(path: &Path) -> (String, bool) {
388 let filename = path
389 .file_name()
390 .and_then(|name| name.to_str())
391 .unwrap_or_default()
392 .to_ascii_lowercase();
393
394 if filename.contains("dev") {
395 return ("develop".to_string(), false);
396 }
397 if filename.contains("test") {
398 return ("test".to_string(), false);
399 }
400 if filename.contains("doc") {
401 return ("docs".to_string(), false);
402 }
403
404 ("install".to_string(), true)
405}
406
407fn build_dependency(line: &str, scope: &str, is_runtime: bool) -> Option<Dependency> {
408 let trimmed = line.trim();
409 if trimmed.is_empty() {
410 return None;
411 }
412
413 let mut is_editable = false;
414 let mut requirement = truncate_field(trimmed.to_string());
415 let mut extracted_requirement = truncate_field(trimmed.to_string());
416
417 if let Some(rest) = trimmed.strip_prefix("-e") {
418 is_editable = true;
419 requirement = truncate_field(rest.trim().to_string());
420 extracted_requirement = truncate_field(format!("--editable {}", requirement));
421 } else if let Some(rest) = trimmed.strip_prefix("--editable") {
422 is_editable = true;
423 requirement = truncate_field(rest.trim().to_string());
424 extracted_requirement = truncate_field(format!("--editable {}", requirement));
425 }
426
427 let (requirement, hash_options) = split_hash_options(&requirement);
428 let requirement = requirement.trim();
429 if requirement.is_empty() {
430 return None;
431 }
432
433 if looks_like_hash_only_requirement(requirement) {
434 return None;
435 }
436
437 let parsed = parse_requirement(requirement);
438
439 let pinned_version = parsed
440 .specifiers
441 .as_deref()
442 .and_then(extract_pinned_version);
443 let is_pinned = pinned_version.is_some();
444
445 let purl = parsed
446 .name
447 .as_ref()
448 .and_then(|name| create_pypi_purl(name, pinned_version.as_deref()));
449
450 let mut extra_data = HashMap::new();
451 extra_data.insert("is_editable".to_string(), JsonValue::Bool(is_editable));
452 extra_data.insert(
453 "link".to_string(),
454 parsed
455 .link
456 .clone()
457 .map(|l| JsonValue::String(truncate_field(l)))
458 .unwrap_or(JsonValue::Null),
459 );
460 extra_data.insert(
461 "hash_options".to_string(),
462 JsonValue::Array(
463 hash_options
464 .into_iter()
465 .map(|h| JsonValue::String(truncate_field(h)))
466 .collect(),
467 ),
468 );
469 extra_data.insert("is_constraint".to_string(), JsonValue::Bool(false));
470 extra_data.insert(
471 "is_archive".to_string(),
472 parsed
473 .is_archive
474 .map(JsonValue::Bool)
475 .unwrap_or(JsonValue::Null),
476 );
477 extra_data.insert("is_wheel".to_string(), JsonValue::Bool(parsed.is_wheel));
478 extra_data.insert(
479 "is_url".to_string(),
480 parsed
481 .is_url
482 .map(JsonValue::Bool)
483 .unwrap_or(JsonValue::Null),
484 );
485 extra_data.insert(
486 "is_vcs_url".to_string(),
487 parsed
488 .is_vcs_url
489 .map(JsonValue::Bool)
490 .unwrap_or(JsonValue::Null),
491 );
492 extra_data.insert(
493 "is_name_at_url".to_string(),
494 JsonValue::Bool(parsed.is_name_at_url),
495 );
496 extra_data.insert(
497 "is_local_path".to_string(),
498 parsed
499 .is_local_path
500 .map(|value| value || is_editable)
501 .map(JsonValue::Bool)
502 .unwrap_or(JsonValue::Null),
503 );
504
505 if let Some(marker) = parsed.marker {
506 extra_data.insert(
507 "markers".to_string(),
508 JsonValue::String(truncate_field(marker)),
509 );
510 }
511
512 Some(Dependency {
513 purl,
514 extracted_requirement: Some(truncate_field(extracted_requirement)),
515 scope: Some(scope.to_string()),
516 is_runtime: Some(is_runtime),
517 is_optional: Some(false),
518 is_pinned: Some(is_pinned),
519 is_direct: Some(true),
520 resolved_package: None,
521 extra_data: Some(extra_data),
522 })
523}
524
525fn looks_like_hash_only_requirement(requirement: &str) -> bool {
526 let trimmed = requirement.trim();
527 if !matches!(trimmed.len(), 32 | 40 | 64 | 96 | 128) {
528 return false;
529 }
530
531 if trimmed.contains(char::is_whitespace)
532 || trimmed.contains(['[', ']', '@', ';', '/', '\\'])
533 || trimmed.contains("==")
534 || trimmed.contains("://")
535 || trimmed.contains("git+")
536 {
537 return false;
538 }
539
540 trimmed.chars().all(|ch| ch.is_ascii_hexdigit())
541}
542
543fn split_hash_options(input: &str) -> (String, Vec<String>) {
544 let mut filtered = Vec::new();
545 let mut hashes = Vec::new();
546
547 for token in input.split_whitespace() {
548 if let Some(value) = token.strip_prefix("--hash=") {
549 if !value.is_empty() {
550 hashes.push(value.to_string());
551 }
552 } else {
553 filtered.push(token);
554 }
555 }
556
557 (filtered.join(" "), hashes)
558}
559
560struct ParsedRequirement {
561 name: Option<String>,
562 specifiers: Option<String>,
563 marker: Option<String>,
564 link: Option<String>,
565 is_url: Option<bool>,
566 is_vcs_url: Option<bool>,
567 is_local_path: Option<bool>,
568 is_name_at_url: bool,
569 is_archive: Option<bool>,
570 is_wheel: bool,
571}
572
573fn parse_requirement(input: &str) -> ParsedRequirement {
574 if let Some(parsed) = parse_pep508_requirement(input) {
575 if let Some(url) = parsed.url.clone() {
576 return parsed_with_link(parsed, &url);
577 }
578
579 if !is_link_like(input) {
580 let name = Some(normalize_pypi_name(&parsed.name));
581 return ParsedRequirement {
582 name,
583 specifiers: parsed.specifiers.map(truncate_field),
584 marker: parsed.marker.map(truncate_field),
585 link: None,
586 is_url: None,
587 is_vcs_url: None,
588 is_local_path: None,
589 is_name_at_url: false,
590 is_archive: None,
591 is_wheel: false,
592 };
593 }
594 }
595
596 if let Some((name, link)) = parse_link_with_name(input) {
597 let normalized_name = normalize_pypi_name(&name);
598 let link_info = parse_link_flags(&link);
599 return ParsedRequirement {
600 name: Some(normalized_name),
601 specifiers: None,
602 marker: None,
603 link: Some(truncate_field(link)),
604 is_url: Some(link_info.is_url),
605 is_vcs_url: Some(link_info.is_vcs_url),
606 is_local_path: Some(link_info.is_local_path),
607 is_name_at_url: link_info.is_name_at_url,
608 is_archive: link_info.is_archive,
609 is_wheel: link_info.is_wheel,
610 };
611 }
612
613 let link_info = parse_link_flags(input);
614 ParsedRequirement {
615 name: None,
616 specifiers: None,
617 marker: None,
618 link: Some(truncate_field(input.to_string())),
619 is_url: Some(link_info.is_url),
620 is_vcs_url: Some(link_info.is_vcs_url),
621 is_local_path: Some(link_info.is_local_path),
622 is_name_at_url: link_info.is_name_at_url,
623 is_archive: link_info.is_archive,
624 is_wheel: link_info.is_wheel,
625 }
626}
627
628fn parsed_with_link(parsed: Pep508Requirement, link: &str) -> ParsedRequirement {
629 let name = normalize_pypi_name(&parsed.name);
630 let link_info = parse_link_flags(link);
631 ParsedRequirement {
632 name: Some(name),
633 specifiers: parsed.specifiers.map(truncate_field),
634 marker: parsed.marker.map(truncate_field),
635 link: Some(truncate_field(link.to_string())),
636 is_url: Some(link_info.is_url),
637 is_vcs_url: Some(link_info.is_vcs_url),
638 is_local_path: Some(link_info.is_local_path),
639 is_name_at_url: parsed.is_name_at_url,
640 is_archive: link_info.is_archive,
641 is_wheel: link_info.is_wheel,
642 }
643}
644
645fn parse_link_with_name(input: &str) -> Option<(String, String)> {
646 if let Some(egg) = extract_egg_name(input) {
647 return Some((egg, input.to_string()));
648 }
649 None
650}
651
652fn extract_egg_name(input: &str) -> Option<String> {
653 let fragment = input.split('#').nth(1)?;
654 let egg_part = fragment.strip_prefix("egg=")?;
655 let name_part = egg_part.split('&').next()?.trim();
656 if name_part.is_empty() {
657 return None;
658 }
659 let (name, _extras, _) = parse_pep508_requirement(name_part)
660 .map(|parsed| (parsed.name, parsed.extras, parsed.specifiers))
661 .unwrap_or_else(|| (name_part.to_string(), Vec::new(), None));
662 Some(name)
663}
664
665struct LinkFlags {
666 is_url: bool,
667 is_vcs_url: bool,
668 is_local_path: bool,
669 is_name_at_url: bool,
670 is_archive: Option<bool>,
671 is_wheel: bool,
672}
673
674fn parse_link_flags(link: &str) -> LinkFlags {
675 let trimmed = link.trim();
676 let is_vcs_url = trimmed.starts_with("git+")
677 || trimmed.starts_with("hg+")
678 || trimmed.starts_with("svn+")
679 || trimmed.starts_with("bzr+");
680 let has_scheme = trimmed.contains("://") || trimmed.starts_with("file:");
681 let is_local_path = trimmed.starts_with("./")
682 || trimmed.starts_with("../")
683 || trimmed.starts_with('/')
684 || trimmed.starts_with('~')
685 || trimmed.starts_with("file:");
686
687 let is_wheel = trimmed.ends_with(".whl");
688 let is_archive = if is_wheel
689 || trimmed.ends_with(".zip")
690 || trimmed.ends_with(".tar.gz")
691 || trimmed.ends_with(".tgz")
692 || trimmed.ends_with(".tar.bz2")
693 || trimmed.ends_with(".tar")
694 {
695 Some(true)
696 } else if has_scheme || is_local_path {
697 Some(false)
698 } else {
699 None
700 };
701
702 LinkFlags {
703 is_url: has_scheme || is_vcs_url,
704 is_vcs_url,
705 is_local_path,
706 is_name_at_url: false,
707 is_archive,
708 is_wheel,
709 }
710}
711
712fn is_link_like(input: &str) -> bool {
713 let trimmed = input.trim();
714 trimmed.starts_with("git+")
715 || trimmed.starts_with("hg+")
716 || trimmed.starts_with("svn+")
717 || trimmed.starts_with("bzr+")
718 || trimmed.starts_with("file:")
719 || trimmed.contains("://")
720 || trimmed.starts_with("./")
721 || trimmed.starts_with("../")
722 || trimmed.starts_with('/')
723 || trimmed.starts_with('~')
724}
725
726fn extract_pinned_version(specifiers: &str) -> Option<String> {
727 let trimmed = specifiers.trim();
728 if trimmed.contains(',') {
729 return None;
730 }
731
732 let stripped = if let Some(version) = trimmed.strip_prefix("==") {
733 version
734 } else if let Some(version) = trimmed.strip_prefix("===") {
735 version
736 } else {
737 return None;
738 };
739
740 let version = stripped.trim();
741 if version.is_empty() {
742 None
743 } else {
744 Some(version.to_string())
745 }
746}
747
748fn create_pypi_purl(name: &str, version: Option<&str>) -> Option<String> {
749 PackageUrl::new(RequirementsTxtParser::PACKAGE_TYPE.as_str(), name)
750 .ok()
751 .map(|_| match version {
752 Some(version) => format!("pkg:pypi/{name}@{}", encode_pypi_purl_version(version)),
753 None => format!("pkg:pypi/{name}"),
754 })
755}
756
757fn encode_pypi_purl_version(version: &str) -> String {
758 version.replace('*', "%2A")
759}
760
761fn normalize_pypi_name(name: &str) -> String {
762 let lower = name.trim().to_ascii_lowercase();
763 let mut normalized = String::new();
764 let mut last_was_sep = false;
765 for ch in lower.chars() {
766 let is_sep = matches!(ch, '-' | '_' | '.');
767 if is_sep {
768 if !last_was_sep {
769 normalized.push('-');
770 last_was_sep = true;
771 }
772 } else {
773 normalized.push(ch);
774 last_was_sep = false;
775 }
776 }
777 normalized
778}
779
780crate::register_parser!(
781 "pip requirements file",
782 &[
783 "**/requirements*.txt",
784 "**/*requirements.txt",
785 "**/reqs.txt",
786 "**/*-reqs.txt",
787 "**/*_reqs.txt",
788 "**/*.reqs.txt",
789 "**/requirements*.in",
790 "**/*requirements.in",
791 "**/requires.txt",
792 "**/requirements/*.txt",
793 "**/requirements/*.in",
794 "**/requirements/**/*.txt",
795 "**/requirements/**/*.in",
796 "**/requirements*/*.txt",
797 "**/requirements*/*.in",
798 "**/requirements*/**/*.txt",
799 "**/requirements*/**/*.in"
800 ],
801 "pypi",
802 "Python",
803 Some("https://pip.pypa.io/en/latest/reference/requirements-file-format/"),
804);