1use std::collections::HashMap;
29use std::path::{Path, PathBuf};
30
31use crate::parser_warn as warn;
32use packageurl::PackageUrl;
33use serde_json::Value as JsonValue;
34
35use crate::models::{DatasourceId, Dependency, PackageData, PackageType};
36use crate::parsers::pep508::{Pep508Requirement, parse_pep508_requirement};
37use crate::parsers::utils::{
38 MAX_ITERATION_COUNT, MAX_RECURSION_DEPTH, RecursionGuard, read_file_to_string, truncate_field,
39};
40
41use super::PackageParser;
42
43pub struct RequirementsTxtParser;
48
49impl PackageParser for RequirementsTxtParser {
50 const PACKAGE_TYPE: PackageType = PackageType::Pypi;
51
52 fn extract_packages(path: &Path) -> Vec<PackageData> {
53 vec![extract_from_requirements_txt(path)]
54 }
55
56 fn is_match(path: &Path) -> bool {
57 let filename = path.file_name().and_then(|name| name.to_str());
58 let Some(name) = filename else {
59 return false;
60 };
61
62 is_requirements_txt_filename(name)
63 || (is_requirements_like_extension(name) && has_requirements_like_ancestor(path))
64 }
65}
66
67fn is_requirements_txt_filename(name: &str) -> bool {
68 if name == "requirements.txt" || name == "requires.txt" {
69 return true;
70 }
71
72 let (stem, extension) = if let Some(stem) = name.strip_suffix(".txt") {
73 (stem, "txt")
74 } else if let Some(stem) = name.strip_suffix(".in") {
75 (stem, "in")
76 } else {
77 return false;
78 };
79
80 stem == "requirements"
84 || stem.starts_with("requirements")
85 || stem.ends_with("requirements")
86 || (extension == "txt" && is_reqs_alias_stem(stem))
87}
88
89fn is_reqs_alias_stem(stem: &str) -> bool {
90 matches_requirement_alias_stem(stem, "reqs") || matches_requirement_alias_stem(stem, "minreqs")
91}
92
93fn matches_requirement_alias_stem(stem: &str, alias: &str) -> bool {
94 stem == alias
95 || stem
96 .strip_suffix(alias)
97 .is_some_and(|prefix| matches!(prefix.chars().last(), Some('-' | '_' | '.')))
98}
99
100fn is_requirements_like_extension(name: &str) -> bool {
101 name.ends_with(".txt") || name.ends_with(".in")
102}
103
104fn has_requirements_like_ancestor(path: &Path) -> bool {
105 path.parent()
106 .into_iter()
107 .flat_map(Path::ancestors)
108 .filter_map(|ancestor| ancestor.file_name())
109 .filter_map(|name| name.to_str())
110 .any(is_requirements_like_dir_name)
111}
112
113fn is_requirements_like_dir_name(name: &str) -> bool {
114 name == "requirements" || name.starts_with("requirements") || name.ends_with("requirements")
115}
116
117struct ParseState {
118 dependencies: Vec<Dependency>,
119 extra_index_urls: Vec<String>,
120 index_url: Option<String>,
121 includes: Vec<String>,
122 constraints: Vec<String>,
123 guard: RecursionGuard<PathBuf>,
124}
125
126fn extract_from_requirements_txt(path: &Path) -> PackageData {
127 let mut state = ParseState {
128 dependencies: Vec::new(),
129 extra_index_urls: Vec::new(),
130 index_url: None,
131 includes: Vec::new(),
132 constraints: Vec::new(),
133 guard: RecursionGuard::new(),
134 };
135
136 let (scope, is_runtime) = scope_from_filename(path);
137
138 parse_requirements_with_includes(path, &mut state, &scope, is_runtime);
139
140 let mut extra_data = HashMap::new();
141 if let Some(url) = state.index_url {
142 extra_data.insert(
143 "index_url".to_string(),
144 JsonValue::String(truncate_field(url)),
145 );
146 }
147 if !state.extra_index_urls.is_empty() {
148 extra_data.insert(
149 "extra_index_urls".to_string(),
150 JsonValue::Array(
151 state
152 .extra_index_urls
153 .into_iter()
154 .map(|u| JsonValue::String(truncate_field(u)))
155 .collect(),
156 ),
157 );
158 }
159 if !state.includes.is_empty() {
160 extra_data.insert(
161 "requirements_includes".to_string(),
162 JsonValue::Array(
163 state
164 .includes
165 .into_iter()
166 .map(|i| JsonValue::String(truncate_field(i)))
167 .collect(),
168 ),
169 );
170 }
171 if !state.constraints.is_empty() {
172 extra_data.insert(
173 "constraints".to_string(),
174 JsonValue::Array(
175 state
176 .constraints
177 .into_iter()
178 .map(|c| JsonValue::String(truncate_field(c)))
179 .collect(),
180 ),
181 );
182 }
183
184 let extra_data = if extra_data.is_empty() {
185 None
186 } else {
187 Some(extra_data)
188 };
189
190 default_package_data(state.dependencies, extra_data)
191}
192
193fn parse_requirements_with_includes(
194 path: &Path,
195 state: &mut ParseState,
196 scope: &str,
197 is_runtime: bool,
198) {
199 if state.guard.exceeded() {
200 warn!(
201 "Maximum recursion depth ({}) exceeded for include: {:?}",
202 MAX_RECURSION_DEPTH, path
203 );
204 return;
205 }
206
207 let abs_path = match path.canonicalize() {
208 Ok(p) => p,
209 Err(_) => {
210 warn!("Cannot resolve path: {:?}", path);
211 return;
212 }
213 };
214
215 if state.guard.enter(abs_path.clone()) {
216 warn!("Circular include detected: {:?}", path);
217 return;
218 }
219
220 let content = match read_file_to_string(&abs_path, None) {
221 Ok(c) => c,
222 Err(e) => {
223 warn!("Cannot read file {:?}: {}", abs_path, e);
224 return;
225 }
226 };
227
228 for line in collect_logical_lines(&content)
229 .into_iter()
230 .take(MAX_ITERATION_COUNT)
231 {
232 let cleaned = strip_inline_comment(&line);
233 let trimmed = cleaned.trim();
234 if trimmed.is_empty() || trimmed.starts_with('#') {
235 continue;
236 }
237
238 if let Some(url) = parse_option_value(trimmed, "--extra-index-url") {
239 state.extra_index_urls.push(truncate_field(url));
240 continue;
241 }
242
243 if let Some(url) = parse_option_value(trimmed, "--index-url") {
244 state.index_url = Some(truncate_field(url));
245 continue;
246 }
247
248 if let Some(path_value) = parse_option_value(trimmed, "-r")
249 .or_else(|| parse_option_value(trimmed, "--requirement"))
250 {
251 state.includes.push(truncate_field(path_value.clone()));
252 let included_path = abs_path
253 .parent()
254 .unwrap_or_else(|| Path::new("."))
255 .join(&path_value);
256
257 if included_path.exists() {
258 parse_requirements_with_includes(&included_path, state, scope, is_runtime);
259 } else {
260 warn!("Included file not found: {:?}", included_path);
261 }
262 continue;
263 }
264
265 if let Some(path_value) = parse_option_value(trimmed, "-c")
266 .or_else(|| parse_option_value(trimmed, "--constraint"))
267 {
268 state.constraints.push(truncate_field(path_value.clone()));
269 let constraint_path = abs_path
270 .parent()
271 .unwrap_or_else(|| Path::new("."))
272 .join(&path_value);
273
274 if constraint_path.exists() {
275 parse_requirements_with_includes(&constraint_path, state, scope, is_runtime);
276 } else {
277 warn!("Constraint file not found: {:?}", constraint_path);
278 }
279 continue;
280 }
281
282 if trimmed.starts_with('-')
283 && !trimmed.starts_with("-e")
284 && !trimmed.starts_with("--editable")
285 {
286 continue;
287 }
288
289 if let Some(dependency) = build_dependency(trimmed, scope, is_runtime) {
290 if state.dependencies.len() >= MAX_ITERATION_COUNT {
291 warn!(
292 "Reached maximum dependency count ({}) in {:?}",
293 MAX_ITERATION_COUNT, abs_path
294 );
295 break;
296 }
297 state.dependencies.push(dependency);
298 }
299 }
300
301 state.guard.leave(abs_path);
302}
303
304fn default_package_data(
305 dependencies: Vec<Dependency>,
306 extra_data: Option<HashMap<String, JsonValue>>,
307) -> PackageData {
308 PackageData {
309 package_type: Some(RequirementsTxtParser::PACKAGE_TYPE),
310 primary_language: Some("Python".to_string()),
311 extra_data,
312 dependencies,
313 datasource_id: Some(DatasourceId::PipRequirements),
314 ..Default::default()
315 }
316}
317
318fn collect_logical_lines(content: &str) -> Vec<String> {
319 let mut lines = Vec::new();
320 let mut current = String::new();
321
322 for raw_line in content.lines().take(MAX_ITERATION_COUNT) {
323 let line = raw_line.trim_end_matches('\r');
324 let trimmed = line.trim_end();
325 let is_continuation = trimmed.ends_with('\\');
326 let line_without = if is_continuation {
327 trimmed.trim_end_matches('\\')
328 } else {
329 line
330 };
331
332 if !line_without.trim().is_empty() {
333 if !current.is_empty() {
334 current.push(' ');
335 }
336 current.push_str(line_without.trim());
337 }
338
339 if !is_continuation && !current.is_empty() {
340 lines.push(current.trim().to_string());
341 current.clear();
342 }
343 }
344
345 if !current.is_empty() {
346 lines.push(current.trim().to_string());
347 }
348
349 lines
350}
351
352fn strip_inline_comment(line: &str) -> String {
353 let mut in_single = false;
354 let mut in_double = false;
355 for (idx, ch) in line.char_indices() {
356 match ch {
357 '\'' if !in_double => in_single = !in_single,
358 '"' if !in_single => in_double = !in_double,
359 '#' if !in_single && !in_double => {
360 let prefix = &line[..idx];
361 if prefix.trim_end().is_empty() || prefix.ends_with(char::is_whitespace) {
362 return prefix.trim_end().to_string();
363 }
364 }
365 _ => {}
366 }
367 }
368 line.to_string()
369}
370
371fn parse_option_value(line: &str, option: &str) -> Option<String> {
372 let stripped = line.strip_prefix(option)?;
373 let mut rest = stripped.trim();
374 if let Some(rest_stripped) = rest.strip_prefix('=') {
375 rest = rest_stripped.trim();
376 }
377 if rest.is_empty() {
378 None
379 } else {
380 Some(rest.to_string())
381 }
382}
383
384fn scope_from_filename(path: &Path) -> (String, bool) {
385 let filename = path
386 .file_name()
387 .and_then(|name| name.to_str())
388 .unwrap_or_default()
389 .to_ascii_lowercase();
390
391 if filename.contains("dev") {
392 return ("develop".to_string(), false);
393 }
394 if filename.contains("test") {
395 return ("test".to_string(), false);
396 }
397 if filename.contains("doc") {
398 return ("docs".to_string(), false);
399 }
400
401 ("install".to_string(), true)
402}
403
404fn build_dependency(line: &str, scope: &str, is_runtime: bool) -> Option<Dependency> {
405 let trimmed = line.trim();
406 if trimmed.is_empty() {
407 return None;
408 }
409
410 let mut is_editable = false;
411 let mut requirement = truncate_field(trimmed.to_string());
412 let mut extracted_requirement = truncate_field(trimmed.to_string());
413
414 if let Some(rest) = trimmed.strip_prefix("-e") {
415 is_editable = true;
416 requirement = truncate_field(rest.trim().to_string());
417 extracted_requirement = truncate_field(format!("--editable {}", requirement));
418 } else if let Some(rest) = trimmed.strip_prefix("--editable") {
419 is_editable = true;
420 requirement = truncate_field(rest.trim().to_string());
421 extracted_requirement = truncate_field(format!("--editable {}", requirement));
422 }
423
424 let (requirement, hash_options) = split_hash_options(&requirement);
425 let requirement = requirement.trim();
426 if requirement.is_empty() {
427 return None;
428 }
429
430 if looks_like_hash_only_requirement(requirement) {
431 return None;
432 }
433
434 let parsed = parse_requirement(requirement);
435
436 let pinned_version = parsed
437 .specifiers
438 .as_deref()
439 .and_then(extract_pinned_version);
440 let is_pinned = pinned_version.is_some();
441
442 let purl = parsed
443 .name
444 .as_ref()
445 .and_then(|name| create_pypi_purl(name, pinned_version.as_deref()));
446
447 let mut extra_data = HashMap::new();
448 extra_data.insert("is_editable".to_string(), JsonValue::Bool(is_editable));
449 extra_data.insert(
450 "link".to_string(),
451 parsed
452 .link
453 .clone()
454 .map(|l| JsonValue::String(truncate_field(l)))
455 .unwrap_or(JsonValue::Null),
456 );
457 extra_data.insert(
458 "hash_options".to_string(),
459 JsonValue::Array(
460 hash_options
461 .into_iter()
462 .map(|h| JsonValue::String(truncate_field(h)))
463 .collect(),
464 ),
465 );
466 extra_data.insert("is_constraint".to_string(), JsonValue::Bool(false));
467 extra_data.insert(
468 "is_archive".to_string(),
469 parsed
470 .is_archive
471 .map(JsonValue::Bool)
472 .unwrap_or(JsonValue::Null),
473 );
474 extra_data.insert("is_wheel".to_string(), JsonValue::Bool(parsed.is_wheel));
475 extra_data.insert(
476 "is_url".to_string(),
477 parsed
478 .is_url
479 .map(JsonValue::Bool)
480 .unwrap_or(JsonValue::Null),
481 );
482 extra_data.insert(
483 "is_vcs_url".to_string(),
484 parsed
485 .is_vcs_url
486 .map(JsonValue::Bool)
487 .unwrap_or(JsonValue::Null),
488 );
489 extra_data.insert(
490 "is_name_at_url".to_string(),
491 JsonValue::Bool(parsed.is_name_at_url),
492 );
493 extra_data.insert(
494 "is_local_path".to_string(),
495 parsed
496 .is_local_path
497 .map(|value| value || is_editable)
498 .map(JsonValue::Bool)
499 .unwrap_or(JsonValue::Null),
500 );
501
502 if let Some(marker) = parsed.marker {
503 extra_data.insert(
504 "markers".to_string(),
505 JsonValue::String(truncate_field(marker)),
506 );
507 }
508
509 Some(Dependency {
510 purl,
511 extracted_requirement: Some(truncate_field(extracted_requirement)),
512 scope: Some(scope.to_string()),
513 is_runtime: Some(is_runtime),
514 is_optional: Some(false),
515 is_pinned: Some(is_pinned),
516 is_direct: Some(true),
517 resolved_package: None,
518 extra_data: Some(extra_data),
519 })
520}
521
522fn looks_like_hash_only_requirement(requirement: &str) -> bool {
523 let trimmed = requirement.trim();
524 if !matches!(trimmed.len(), 32 | 40 | 64 | 96 | 128) {
525 return false;
526 }
527
528 if trimmed.contains(char::is_whitespace)
529 || trimmed.contains(['[', ']', '@', ';', '/', '\\'])
530 || trimmed.contains("==")
531 || trimmed.contains("://")
532 || trimmed.contains("git+")
533 {
534 return false;
535 }
536
537 trimmed.chars().all(|ch| ch.is_ascii_hexdigit())
538}
539
540fn split_hash_options(input: &str) -> (String, Vec<String>) {
541 let mut filtered = Vec::new();
542 let mut hashes = Vec::new();
543
544 for token in input.split_whitespace() {
545 if let Some(value) = token.strip_prefix("--hash=") {
546 if !value.is_empty() {
547 hashes.push(value.to_string());
548 }
549 } else {
550 filtered.push(token);
551 }
552 }
553
554 (filtered.join(" "), hashes)
555}
556
557struct ParsedRequirement {
558 name: Option<String>,
559 specifiers: Option<String>,
560 marker: Option<String>,
561 link: Option<String>,
562 is_url: Option<bool>,
563 is_vcs_url: Option<bool>,
564 is_local_path: Option<bool>,
565 is_name_at_url: bool,
566 is_archive: Option<bool>,
567 is_wheel: bool,
568}
569
570fn parse_requirement(input: &str) -> ParsedRequirement {
571 if let Some(parsed) = parse_pep508_requirement(input) {
572 if let Some(url) = parsed.url.clone() {
573 return parsed_with_link(parsed, &url);
574 }
575
576 if !is_link_like(input) {
577 let name = Some(normalize_pypi_name(&parsed.name));
578 return ParsedRequirement {
579 name,
580 specifiers: parsed.specifiers.map(truncate_field),
581 marker: parsed.marker.map(truncate_field),
582 link: None,
583 is_url: None,
584 is_vcs_url: None,
585 is_local_path: None,
586 is_name_at_url: false,
587 is_archive: None,
588 is_wheel: false,
589 };
590 }
591 }
592
593 if let Some((name, link)) = parse_link_with_name(input) {
594 let normalized_name = normalize_pypi_name(&name);
595 let link_info = parse_link_flags(&link);
596 return ParsedRequirement {
597 name: Some(normalized_name),
598 specifiers: None,
599 marker: None,
600 link: Some(truncate_field(link)),
601 is_url: Some(link_info.is_url),
602 is_vcs_url: Some(link_info.is_vcs_url),
603 is_local_path: Some(link_info.is_local_path),
604 is_name_at_url: link_info.is_name_at_url,
605 is_archive: link_info.is_archive,
606 is_wheel: link_info.is_wheel,
607 };
608 }
609
610 let link_info = parse_link_flags(input);
611 ParsedRequirement {
612 name: None,
613 specifiers: None,
614 marker: None,
615 link: Some(truncate_field(input.to_string())),
616 is_url: Some(link_info.is_url),
617 is_vcs_url: Some(link_info.is_vcs_url),
618 is_local_path: Some(link_info.is_local_path),
619 is_name_at_url: link_info.is_name_at_url,
620 is_archive: link_info.is_archive,
621 is_wheel: link_info.is_wheel,
622 }
623}
624
625fn parsed_with_link(parsed: Pep508Requirement, link: &str) -> ParsedRequirement {
626 let name = normalize_pypi_name(&parsed.name);
627 let link_info = parse_link_flags(link);
628 ParsedRequirement {
629 name: Some(name),
630 specifiers: parsed.specifiers.map(truncate_field),
631 marker: parsed.marker.map(truncate_field),
632 link: Some(truncate_field(link.to_string())),
633 is_url: Some(link_info.is_url),
634 is_vcs_url: Some(link_info.is_vcs_url),
635 is_local_path: Some(link_info.is_local_path),
636 is_name_at_url: parsed.is_name_at_url,
637 is_archive: link_info.is_archive,
638 is_wheel: link_info.is_wheel,
639 }
640}
641
642fn parse_link_with_name(input: &str) -> Option<(String, String)> {
643 if let Some(egg) = extract_egg_name(input) {
644 return Some((egg, input.to_string()));
645 }
646 None
647}
648
649fn extract_egg_name(input: &str) -> Option<String> {
650 let fragment = input.split('#').nth(1)?;
651 let egg_part = fragment.strip_prefix("egg=")?;
652 let name_part = egg_part.split('&').next()?.trim();
653 if name_part.is_empty() {
654 return None;
655 }
656 let (name, _extras, _) = parse_pep508_requirement(name_part)
657 .map(|parsed| (parsed.name, parsed.extras, parsed.specifiers))
658 .unwrap_or_else(|| (name_part.to_string(), Vec::new(), None));
659 Some(name)
660}
661
662struct LinkFlags {
663 is_url: bool,
664 is_vcs_url: bool,
665 is_local_path: bool,
666 is_name_at_url: bool,
667 is_archive: Option<bool>,
668 is_wheel: bool,
669}
670
671fn parse_link_flags(link: &str) -> LinkFlags {
672 let trimmed = link.trim();
673 let is_vcs_url = trimmed.starts_with("git+")
674 || trimmed.starts_with("hg+")
675 || trimmed.starts_with("svn+")
676 || trimmed.starts_with("bzr+");
677 let has_scheme = trimmed.contains("://") || trimmed.starts_with("file:");
678 let is_local_path = trimmed.starts_with("./")
679 || trimmed.starts_with("../")
680 || trimmed.starts_with('/')
681 || trimmed.starts_with('~')
682 || trimmed.starts_with("file:");
683
684 let is_wheel = trimmed.ends_with(".whl");
685 let is_archive = if is_wheel
686 || trimmed.ends_with(".zip")
687 || trimmed.ends_with(".tar.gz")
688 || trimmed.ends_with(".tgz")
689 || trimmed.ends_with(".tar.bz2")
690 || trimmed.ends_with(".tar")
691 {
692 Some(true)
693 } else if has_scheme || is_local_path {
694 Some(false)
695 } else {
696 None
697 };
698
699 LinkFlags {
700 is_url: has_scheme || is_vcs_url,
701 is_vcs_url,
702 is_local_path,
703 is_name_at_url: false,
704 is_archive,
705 is_wheel,
706 }
707}
708
709fn is_link_like(input: &str) -> bool {
710 let trimmed = input.trim();
711 trimmed.starts_with("git+")
712 || trimmed.starts_with("hg+")
713 || trimmed.starts_with("svn+")
714 || trimmed.starts_with("bzr+")
715 || trimmed.starts_with("file:")
716 || trimmed.contains("://")
717 || trimmed.starts_with("./")
718 || trimmed.starts_with("../")
719 || trimmed.starts_with('/')
720 || trimmed.starts_with('~')
721}
722
723fn extract_pinned_version(specifiers: &str) -> Option<String> {
724 let trimmed = specifiers.trim();
725 if trimmed.contains(',') {
726 return None;
727 }
728
729 let stripped = if let Some(version) = trimmed.strip_prefix("===") {
730 version
731 } else {
732 trimmed.strip_prefix("==")?
733 };
734
735 let version = stripped.trim();
736 if version.is_empty() {
737 None
738 } else {
739 Some(version.to_string())
740 }
741}
742
743fn create_pypi_purl(name: &str, version: Option<&str>) -> Option<String> {
744 PackageUrl::new(RequirementsTxtParser::PACKAGE_TYPE.as_str(), name)
745 .ok()
746 .map(|_| match version {
747 Some(version) => format!("pkg:pypi/{name}@{}", encode_pypi_purl_version(version)),
748 None => format!("pkg:pypi/{name}"),
749 })
750}
751
752fn encode_pypi_purl_version(version: &str) -> String {
753 version.replace('*', "%2A")
754}
755
756fn normalize_pypi_name(name: &str) -> String {
757 let lower = name.trim().to_ascii_lowercase();
758 let mut normalized = String::new();
759 let mut last_was_sep = false;
760 for ch in lower.chars() {
761 let is_sep = matches!(ch, '-' | '_' | '.');
762 if is_sep {
763 if !last_was_sep {
764 normalized.push('-');
765 last_was_sep = true;
766 }
767 } else {
768 normalized.push(ch);
769 last_was_sep = false;
770 }
771 }
772 normalized
773}
774
775crate::register_parser!(
776 "pip requirements file",
777 &[
778 "**/requirements*.txt",
779 "**/*requirements.txt",
780 "**/reqs.txt",
781 "**/minreqs.txt",
782 "**/*-reqs.txt",
783 "**/*_reqs.txt",
784 "**/*.reqs.txt",
785 "**/*-minreqs.txt",
786 "**/*_minreqs.txt",
787 "**/*.minreqs.txt",
788 "**/requirements*.in",
789 "**/*requirements.in",
790 "**/requires.txt",
791 "**/requirements/*.txt",
792 "**/requirements/*.in",
793 "**/requirements/**/*.txt",
794 "**/requirements/**/*.in",
795 "**/requirements*/*.txt",
796 "**/requirements*/*.in",
797 "**/requirements*/**/*.txt",
798 "**/requirements*/**/*.in"
799 ],
800 "pypi",
801 "Python",
802 Some("https://pip.pypa.io/en/latest/reference/requirements-file-format/"),
803);