1use std::collections::HashMap;
26use std::path::{Path, PathBuf};
27
28use crate::parser_warn as warn;
29use packageurl::PackageUrl;
30use serde_json::Value as JsonValue;
31
32use crate::models::{DatasourceId, Dependency, PackageData, PackageType};
33use crate::parsers::pep508::{Pep508Requirement, parse_pep508_requirement};
34use crate::parsers::utils::{
35 MAX_ITERATION_COUNT, MAX_RECURSION_DEPTH, RecursionGuard, read_file_to_string, truncate_field,
36};
37
38use super::PackageParser;
39
40pub struct RequirementsTxtParser;
45
46impl PackageParser for RequirementsTxtParser {
47 const PACKAGE_TYPE: PackageType = PackageType::Pypi;
48
49 fn extract_packages(path: &Path) -> Vec<PackageData> {
50 vec![extract_from_requirements_txt(path)]
51 }
52
53 fn is_match(path: &Path) -> bool {
54 let filename = path.file_name().and_then(|name| name.to_str());
55 let Some(name) = filename else {
56 return false;
57 };
58
59 is_requirements_txt_filename(name)
60 || (is_requirements_like_extension(name) && has_requirements_like_ancestor(path))
61 }
62}
63
64fn is_requirements_txt_filename(name: &str) -> bool {
65 if name == "requirements.txt" || name == "requires.txt" {
66 return true;
67 }
68
69 let (stem, extension) = if let Some(stem) = name.strip_suffix(".txt") {
70 (stem, "txt")
71 } else if let Some(stem) = name.strip_suffix(".in") {
72 (stem, "in")
73 } else {
74 return false;
75 };
76
77 stem == "requirements"
81 || stem.starts_with("requirements")
82 || stem.ends_with("requirements")
83 || (extension == "txt" && is_reqs_alias_stem(stem))
84}
85
86fn is_reqs_alias_stem(stem: &str) -> bool {
87 stem == "reqs" || stem.ends_with("-reqs") || stem.ends_with("_reqs") || stem.ends_with(".reqs")
88}
89
90fn is_requirements_like_extension(name: &str) -> bool {
91 name.ends_with(".txt") || name.ends_with(".in")
92}
93
94fn has_requirements_like_ancestor(path: &Path) -> bool {
95 path.parent()
96 .into_iter()
97 .flat_map(Path::ancestors)
98 .filter_map(|ancestor| ancestor.file_name())
99 .filter_map(|name| name.to_str())
100 .any(is_requirements_like_dir_name)
101}
102
103fn is_requirements_like_dir_name(name: &str) -> bool {
104 name == "requirements" || name.starts_with("requirements") || name.ends_with("requirements")
105}
106
107struct ParseState {
108 dependencies: Vec<Dependency>,
109 extra_index_urls: Vec<String>,
110 index_url: Option<String>,
111 includes: Vec<String>,
112 constraints: Vec<String>,
113 guard: RecursionGuard<PathBuf>,
114}
115
116fn extract_from_requirements_txt(path: &Path) -> PackageData {
117 let mut state = ParseState {
118 dependencies: Vec::new(),
119 extra_index_urls: Vec::new(),
120 index_url: None,
121 includes: Vec::new(),
122 constraints: Vec::new(),
123 guard: RecursionGuard::new(),
124 };
125
126 let (scope, is_runtime) = scope_from_filename(path);
127
128 parse_requirements_with_includes(path, &mut state, &scope, is_runtime);
129
130 let mut extra_data = HashMap::new();
131 if let Some(url) = state.index_url {
132 extra_data.insert(
133 "index_url".to_string(),
134 JsonValue::String(truncate_field(url)),
135 );
136 }
137 if !state.extra_index_urls.is_empty() {
138 extra_data.insert(
139 "extra_index_urls".to_string(),
140 JsonValue::Array(
141 state
142 .extra_index_urls
143 .into_iter()
144 .map(|u| JsonValue::String(truncate_field(u)))
145 .collect(),
146 ),
147 );
148 }
149 if !state.includes.is_empty() {
150 extra_data.insert(
151 "requirements_includes".to_string(),
152 JsonValue::Array(
153 state
154 .includes
155 .into_iter()
156 .map(|i| JsonValue::String(truncate_field(i)))
157 .collect(),
158 ),
159 );
160 }
161 if !state.constraints.is_empty() {
162 extra_data.insert(
163 "constraints".to_string(),
164 JsonValue::Array(
165 state
166 .constraints
167 .into_iter()
168 .map(|c| JsonValue::String(truncate_field(c)))
169 .collect(),
170 ),
171 );
172 }
173
174 let extra_data = if extra_data.is_empty() {
175 None
176 } else {
177 Some(extra_data)
178 };
179
180 default_package_data(state.dependencies, extra_data)
181}
182
183fn parse_requirements_with_includes(
184 path: &Path,
185 state: &mut ParseState,
186 scope: &str,
187 is_runtime: bool,
188) {
189 if state.guard.exceeded() {
190 warn!(
191 "Maximum recursion depth ({}) exceeded for include: {:?}",
192 MAX_RECURSION_DEPTH, path
193 );
194 return;
195 }
196
197 let abs_path = match path.canonicalize() {
198 Ok(p) => p,
199 Err(_) => {
200 warn!("Cannot resolve path: {:?}", path);
201 return;
202 }
203 };
204
205 if state.guard.enter(abs_path.clone()) {
206 warn!("Circular include detected: {:?}", path);
207 return;
208 }
209
210 let content = match read_file_to_string(&abs_path, None) {
211 Ok(c) => c,
212 Err(e) => {
213 warn!("Cannot read file {:?}: {}", abs_path, e);
214 return;
215 }
216 };
217
218 for line in collect_logical_lines(&content)
219 .into_iter()
220 .take(MAX_ITERATION_COUNT)
221 {
222 let cleaned = strip_inline_comment(&line);
223 let trimmed = cleaned.trim();
224 if trimmed.is_empty() || trimmed.starts_with('#') {
225 continue;
226 }
227
228 if let Some(url) = parse_option_value(trimmed, "--extra-index-url") {
229 state.extra_index_urls.push(truncate_field(url));
230 continue;
231 }
232
233 if let Some(url) = parse_option_value(trimmed, "--index-url") {
234 state.index_url = Some(truncate_field(url));
235 continue;
236 }
237
238 if let Some(path_value) = parse_option_value(trimmed, "-r")
239 .or_else(|| parse_option_value(trimmed, "--requirement"))
240 {
241 state.includes.push(truncate_field(path_value.clone()));
242 let included_path = abs_path
243 .parent()
244 .unwrap_or_else(|| Path::new("."))
245 .join(&path_value);
246
247 if included_path.exists() {
248 parse_requirements_with_includes(&included_path, state, scope, is_runtime);
249 } else {
250 warn!("Included file not found: {:?}", included_path);
251 }
252 continue;
253 }
254
255 if let Some(path_value) = parse_option_value(trimmed, "-c")
256 .or_else(|| parse_option_value(trimmed, "--constraint"))
257 {
258 state.constraints.push(truncate_field(path_value.clone()));
259 let constraint_path = abs_path
260 .parent()
261 .unwrap_or_else(|| Path::new("."))
262 .join(&path_value);
263
264 if constraint_path.exists() {
265 parse_requirements_with_includes(&constraint_path, state, scope, is_runtime);
266 } else {
267 warn!("Constraint file not found: {:?}", constraint_path);
268 }
269 continue;
270 }
271
272 if trimmed.starts_with('-')
273 && !trimmed.starts_with("-e")
274 && !trimmed.starts_with("--editable")
275 {
276 continue;
277 }
278
279 if let Some(dependency) = build_dependency(trimmed, scope, is_runtime) {
280 if state.dependencies.len() >= MAX_ITERATION_COUNT {
281 warn!(
282 "Reached maximum dependency count ({}) in {:?}",
283 MAX_ITERATION_COUNT, abs_path
284 );
285 break;
286 }
287 state.dependencies.push(dependency);
288 }
289 }
290
291 state.guard.leave(abs_path);
292}
293
294fn default_package_data(
295 dependencies: Vec<Dependency>,
296 extra_data: Option<HashMap<String, JsonValue>>,
297) -> PackageData {
298 PackageData {
299 package_type: Some(RequirementsTxtParser::PACKAGE_TYPE),
300 primary_language: Some("Python".to_string()),
301 extra_data,
302 dependencies,
303 datasource_id: Some(DatasourceId::PipRequirements),
304 ..Default::default()
305 }
306}
307
308fn collect_logical_lines(content: &str) -> Vec<String> {
309 let mut lines = Vec::new();
310 let mut current = String::new();
311
312 for raw_line in content.lines().take(MAX_ITERATION_COUNT) {
313 let line = raw_line.trim_end_matches('\r');
314 let trimmed = line.trim_end();
315 let is_continuation = trimmed.ends_with('\\');
316 let line_without = if is_continuation {
317 trimmed.trim_end_matches('\\')
318 } else {
319 line
320 };
321
322 if !line_without.trim().is_empty() {
323 if !current.is_empty() {
324 current.push(' ');
325 }
326 current.push_str(line_without.trim());
327 }
328
329 if !is_continuation && !current.is_empty() {
330 lines.push(current.trim().to_string());
331 current.clear();
332 }
333 }
334
335 if !current.is_empty() {
336 lines.push(current.trim().to_string());
337 }
338
339 lines
340}
341
342fn strip_inline_comment(line: &str) -> String {
343 let mut in_single = false;
344 let mut in_double = false;
345 for (idx, ch) in line.char_indices() {
346 match ch {
347 '\'' if !in_double => in_single = !in_single,
348 '"' if !in_single => in_double = !in_double,
349 '#' if !in_single && !in_double => {
350 let prefix = &line[..idx];
351 if prefix.trim_end().is_empty() || prefix.ends_with(char::is_whitespace) {
352 return prefix.trim_end().to_string();
353 }
354 }
355 _ => {}
356 }
357 }
358 line.to_string()
359}
360
361fn parse_option_value(line: &str, option: &str) -> Option<String> {
362 let stripped = line.strip_prefix(option)?;
363 let mut rest = stripped.trim();
364 if let Some(rest_stripped) = rest.strip_prefix('=') {
365 rest = rest_stripped.trim();
366 }
367 if rest.is_empty() {
368 None
369 } else {
370 Some(rest.to_string())
371 }
372}
373
374fn scope_from_filename(path: &Path) -> (String, bool) {
375 let filename = path
376 .file_name()
377 .and_then(|name| name.to_str())
378 .unwrap_or_default()
379 .to_ascii_lowercase();
380
381 if filename.contains("dev") {
382 return ("develop".to_string(), false);
383 }
384 if filename.contains("test") {
385 return ("test".to_string(), false);
386 }
387 if filename.contains("doc") {
388 return ("docs".to_string(), false);
389 }
390
391 ("install".to_string(), true)
392}
393
394fn build_dependency(line: &str, scope: &str, is_runtime: bool) -> Option<Dependency> {
395 let trimmed = line.trim();
396 if trimmed.is_empty() {
397 return None;
398 }
399
400 let mut is_editable = false;
401 let mut requirement = truncate_field(trimmed.to_string());
402 let mut extracted_requirement = truncate_field(trimmed.to_string());
403
404 if let Some(rest) = trimmed.strip_prefix("-e") {
405 is_editable = true;
406 requirement = truncate_field(rest.trim().to_string());
407 extracted_requirement = truncate_field(format!("--editable {}", requirement));
408 } else if let Some(rest) = trimmed.strip_prefix("--editable") {
409 is_editable = true;
410 requirement = truncate_field(rest.trim().to_string());
411 extracted_requirement = truncate_field(format!("--editable {}", requirement));
412 }
413
414 let (requirement, hash_options) = split_hash_options(&requirement);
415 let requirement = requirement.trim();
416 if requirement.is_empty() {
417 return None;
418 }
419
420 if looks_like_hash_only_requirement(requirement) {
421 return None;
422 }
423
424 let parsed = parse_requirement(requirement);
425
426 let pinned_version = parsed
427 .specifiers
428 .as_deref()
429 .and_then(extract_pinned_version);
430 let is_pinned = pinned_version.is_some();
431
432 let purl = parsed
433 .name
434 .as_ref()
435 .and_then(|name| create_pypi_purl(name, pinned_version.as_deref()));
436
437 let mut extra_data = HashMap::new();
438 extra_data.insert("is_editable".to_string(), JsonValue::Bool(is_editable));
439 extra_data.insert(
440 "link".to_string(),
441 parsed
442 .link
443 .clone()
444 .map(|l| JsonValue::String(truncate_field(l)))
445 .unwrap_or(JsonValue::Null),
446 );
447 extra_data.insert(
448 "hash_options".to_string(),
449 JsonValue::Array(
450 hash_options
451 .into_iter()
452 .map(|h| JsonValue::String(truncate_field(h)))
453 .collect(),
454 ),
455 );
456 extra_data.insert("is_constraint".to_string(), JsonValue::Bool(false));
457 extra_data.insert(
458 "is_archive".to_string(),
459 parsed
460 .is_archive
461 .map(JsonValue::Bool)
462 .unwrap_or(JsonValue::Null),
463 );
464 extra_data.insert("is_wheel".to_string(), JsonValue::Bool(parsed.is_wheel));
465 extra_data.insert(
466 "is_url".to_string(),
467 parsed
468 .is_url
469 .map(JsonValue::Bool)
470 .unwrap_or(JsonValue::Null),
471 );
472 extra_data.insert(
473 "is_vcs_url".to_string(),
474 parsed
475 .is_vcs_url
476 .map(JsonValue::Bool)
477 .unwrap_or(JsonValue::Null),
478 );
479 extra_data.insert(
480 "is_name_at_url".to_string(),
481 JsonValue::Bool(parsed.is_name_at_url),
482 );
483 extra_data.insert(
484 "is_local_path".to_string(),
485 parsed
486 .is_local_path
487 .map(|value| value || is_editable)
488 .map(JsonValue::Bool)
489 .unwrap_or(JsonValue::Null),
490 );
491
492 if let Some(marker) = parsed.marker {
493 extra_data.insert(
494 "markers".to_string(),
495 JsonValue::String(truncate_field(marker)),
496 );
497 }
498
499 Some(Dependency {
500 purl,
501 extracted_requirement: Some(truncate_field(extracted_requirement)),
502 scope: Some(scope.to_string()),
503 is_runtime: Some(is_runtime),
504 is_optional: Some(false),
505 is_pinned: Some(is_pinned),
506 is_direct: Some(true),
507 resolved_package: None,
508 extra_data: Some(extra_data),
509 })
510}
511
512fn looks_like_hash_only_requirement(requirement: &str) -> bool {
513 let trimmed = requirement.trim();
514 if !matches!(trimmed.len(), 32 | 40 | 64 | 96 | 128) {
515 return false;
516 }
517
518 if trimmed.contains(char::is_whitespace)
519 || trimmed.contains(['[', ']', '@', ';', '/', '\\'])
520 || trimmed.contains("==")
521 || trimmed.contains("://")
522 || trimmed.contains("git+")
523 {
524 return false;
525 }
526
527 trimmed.chars().all(|ch| ch.is_ascii_hexdigit())
528}
529
530fn split_hash_options(input: &str) -> (String, Vec<String>) {
531 let mut filtered = Vec::new();
532 let mut hashes = Vec::new();
533
534 for token in input.split_whitespace() {
535 if let Some(value) = token.strip_prefix("--hash=") {
536 if !value.is_empty() {
537 hashes.push(value.to_string());
538 }
539 } else {
540 filtered.push(token);
541 }
542 }
543
544 (filtered.join(" "), hashes)
545}
546
547struct ParsedRequirement {
548 name: Option<String>,
549 specifiers: Option<String>,
550 marker: Option<String>,
551 link: Option<String>,
552 is_url: Option<bool>,
553 is_vcs_url: Option<bool>,
554 is_local_path: Option<bool>,
555 is_name_at_url: bool,
556 is_archive: Option<bool>,
557 is_wheel: bool,
558}
559
560fn parse_requirement(input: &str) -> ParsedRequirement {
561 if let Some(parsed) = parse_pep508_requirement(input) {
562 if let Some(url) = parsed.url.clone() {
563 return parsed_with_link(parsed, &url);
564 }
565
566 if !is_link_like(input) {
567 let name = Some(normalize_pypi_name(&parsed.name));
568 return ParsedRequirement {
569 name,
570 specifiers: parsed.specifiers.map(truncate_field),
571 marker: parsed.marker.map(truncate_field),
572 link: None,
573 is_url: None,
574 is_vcs_url: None,
575 is_local_path: None,
576 is_name_at_url: false,
577 is_archive: None,
578 is_wheel: false,
579 };
580 }
581 }
582
583 if let Some((name, link)) = parse_link_with_name(input) {
584 let normalized_name = normalize_pypi_name(&name);
585 let link_info = parse_link_flags(&link);
586 return ParsedRequirement {
587 name: Some(normalized_name),
588 specifiers: None,
589 marker: None,
590 link: Some(truncate_field(link)),
591 is_url: Some(link_info.is_url),
592 is_vcs_url: Some(link_info.is_vcs_url),
593 is_local_path: Some(link_info.is_local_path),
594 is_name_at_url: link_info.is_name_at_url,
595 is_archive: link_info.is_archive,
596 is_wheel: link_info.is_wheel,
597 };
598 }
599
600 let link_info = parse_link_flags(input);
601 ParsedRequirement {
602 name: None,
603 specifiers: None,
604 marker: None,
605 link: Some(truncate_field(input.to_string())),
606 is_url: Some(link_info.is_url),
607 is_vcs_url: Some(link_info.is_vcs_url),
608 is_local_path: Some(link_info.is_local_path),
609 is_name_at_url: link_info.is_name_at_url,
610 is_archive: link_info.is_archive,
611 is_wheel: link_info.is_wheel,
612 }
613}
614
615fn parsed_with_link(parsed: Pep508Requirement, link: &str) -> ParsedRequirement {
616 let name = normalize_pypi_name(&parsed.name);
617 let link_info = parse_link_flags(link);
618 ParsedRequirement {
619 name: Some(name),
620 specifiers: parsed.specifiers.map(truncate_field),
621 marker: parsed.marker.map(truncate_field),
622 link: Some(truncate_field(link.to_string())),
623 is_url: Some(link_info.is_url),
624 is_vcs_url: Some(link_info.is_vcs_url),
625 is_local_path: Some(link_info.is_local_path),
626 is_name_at_url: parsed.is_name_at_url,
627 is_archive: link_info.is_archive,
628 is_wheel: link_info.is_wheel,
629 }
630}
631
632fn parse_link_with_name(input: &str) -> Option<(String, String)> {
633 if let Some(egg) = extract_egg_name(input) {
634 return Some((egg, input.to_string()));
635 }
636 None
637}
638
639fn extract_egg_name(input: &str) -> Option<String> {
640 let fragment = input.split('#').nth(1)?;
641 let egg_part = fragment.strip_prefix("egg=")?;
642 let name_part = egg_part.split('&').next()?.trim();
643 if name_part.is_empty() {
644 return None;
645 }
646 let (name, _extras, _) = parse_pep508_requirement(name_part)
647 .map(|parsed| (parsed.name, parsed.extras, parsed.specifiers))
648 .unwrap_or_else(|| (name_part.to_string(), Vec::new(), None));
649 Some(name)
650}
651
652struct LinkFlags {
653 is_url: bool,
654 is_vcs_url: bool,
655 is_local_path: bool,
656 is_name_at_url: bool,
657 is_archive: Option<bool>,
658 is_wheel: bool,
659}
660
661fn parse_link_flags(link: &str) -> LinkFlags {
662 let trimmed = link.trim();
663 let is_vcs_url = trimmed.starts_with("git+")
664 || trimmed.starts_with("hg+")
665 || trimmed.starts_with("svn+")
666 || trimmed.starts_with("bzr+");
667 let has_scheme = trimmed.contains("://") || trimmed.starts_with("file:");
668 let is_local_path = trimmed.starts_with("./")
669 || trimmed.starts_with("../")
670 || trimmed.starts_with('/')
671 || trimmed.starts_with('~')
672 || trimmed.starts_with("file:");
673
674 let is_wheel = trimmed.ends_with(".whl");
675 let is_archive = if is_wheel
676 || trimmed.ends_with(".zip")
677 || trimmed.ends_with(".tar.gz")
678 || trimmed.ends_with(".tgz")
679 || trimmed.ends_with(".tar.bz2")
680 || trimmed.ends_with(".tar")
681 {
682 Some(true)
683 } else if has_scheme || is_local_path {
684 Some(false)
685 } else {
686 None
687 };
688
689 LinkFlags {
690 is_url: has_scheme || is_vcs_url,
691 is_vcs_url,
692 is_local_path,
693 is_name_at_url: false,
694 is_archive,
695 is_wheel,
696 }
697}
698
699fn is_link_like(input: &str) -> bool {
700 let trimmed = input.trim();
701 trimmed.starts_with("git+")
702 || trimmed.starts_with("hg+")
703 || trimmed.starts_with("svn+")
704 || trimmed.starts_with("bzr+")
705 || trimmed.starts_with("file:")
706 || trimmed.contains("://")
707 || trimmed.starts_with("./")
708 || trimmed.starts_with("../")
709 || trimmed.starts_with('/')
710 || trimmed.starts_with('~')
711}
712
713fn extract_pinned_version(specifiers: &str) -> Option<String> {
714 let trimmed = specifiers.trim();
715 if trimmed.contains(',') {
716 return None;
717 }
718
719 let stripped = if let Some(version) = trimmed.strip_prefix("==") {
720 version
721 } else if let Some(version) = trimmed.strip_prefix("===") {
722 version
723 } else {
724 return None;
725 };
726
727 let version = stripped.trim();
728 if version.is_empty() {
729 None
730 } else {
731 Some(version.to_string())
732 }
733}
734
735fn create_pypi_purl(name: &str, version: Option<&str>) -> Option<String> {
736 PackageUrl::new(RequirementsTxtParser::PACKAGE_TYPE.as_str(), name)
737 .ok()
738 .map(|_| match version {
739 Some(version) => format!("pkg:pypi/{name}@{}", encode_pypi_purl_version(version)),
740 None => format!("pkg:pypi/{name}"),
741 })
742}
743
744fn encode_pypi_purl_version(version: &str) -> String {
745 version.replace('*', "%2A")
746}
747
748fn normalize_pypi_name(name: &str) -> String {
749 let lower = name.trim().to_ascii_lowercase();
750 let mut normalized = String::new();
751 let mut last_was_sep = false;
752 for ch in lower.chars() {
753 let is_sep = matches!(ch, '-' | '_' | '.');
754 if is_sep {
755 if !last_was_sep {
756 normalized.push('-');
757 last_was_sep = true;
758 }
759 } else {
760 normalized.push(ch);
761 last_was_sep = false;
762 }
763 }
764 normalized
765}
766
767crate::register_parser!(
768 "pip requirements file",
769 &[
770 "**/requirements*.txt",
771 "**/*requirements.txt",
772 "**/reqs.txt",
773 "**/*-reqs.txt",
774 "**/*_reqs.txt",
775 "**/*.reqs.txt",
776 "**/requirements*.in",
777 "**/*requirements.in",
778 "**/requires.txt",
779 "**/requirements/*.txt",
780 "**/requirements/*.in",
781 "**/requirements/**/*.txt",
782 "**/requirements/**/*.in",
783 "**/requirements*/*.txt",
784 "**/requirements*/*.in",
785 "**/requirements*/**/*.txt",
786 "**/requirements*/**/*.in"
787 ],
788 "pypi",
789 "Python",
790 Some("https://pip.pypa.io/en/latest/reference/requirements-file-format/"),
791);