1use std::collections::HashMap;
29use std::path::{Path, PathBuf};
30
31use crate::parser_warn as warn;
32use packageurl::PackageUrl;
33use serde_json::Value as JsonValue;
34
35use crate::models::{DatasourceId, Dependency, PackageData, PackageType};
36use crate::parsers::pep508::{Pep508Requirement, parse_pep508_requirement};
37use crate::parsers::utils::{
38 MAX_ITERATION_COUNT, MAX_RECURSION_DEPTH, RecursionGuard, read_file_to_string, truncate_field,
39};
40
41use super::PackageParser;
42
43pub struct RequirementsTxtParser;
48
49impl PackageParser for RequirementsTxtParser {
50 const PACKAGE_TYPE: PackageType = PackageType::Pypi;
51
52 fn extract_packages(path: &Path) -> Vec<PackageData> {
53 vec![extract_from_requirements_txt(path)]
54 }
55
56 fn is_match(path: &Path) -> bool {
57 let filename = path.file_name().and_then(|name| name.to_str());
58 let Some(name) = filename else {
59 return false;
60 };
61
62 is_requirements_txt_filename(name)
63 || (is_requirements_like_extension(name) && has_requirements_like_ancestor(path))
64 }
65
66 fn metadata() -> Vec<super::metadata::ParserMetadata> {
67 vec![super::metadata::ParserMetadata {
68 description: "pip requirements file",
69 file_patterns: &[
70 "**/requirements*.txt",
71 "**/*requirements.txt",
72 "**/reqs.txt",
73 "**/minreqs.txt",
74 "**/*-reqs.txt",
75 "**/*_reqs.txt",
76 "**/*.reqs.txt",
77 "**/*-minreqs.txt",
78 "**/*_minreqs.txt",
79 "**/*.minreqs.txt",
80 "**/requirements*.in",
81 "**/*requirements.in",
82 "**/requires.txt",
83 "**/requirements/*.txt",
84 "**/requirements/*.in",
85 "**/requirements/**/*.txt",
86 "**/requirements/**/*.in",
87 "**/requirements*/*.txt",
88 "**/requirements*/*.in",
89 "**/requirements*/**/*.txt",
90 "**/requirements*/**/*.in",
91 ],
92 package_type: "pypi",
93 primary_language: "Python",
94 documentation_url: Some(
95 "https://pip.pypa.io/en/latest/reference/requirements-file-format/",
96 ),
97 }]
98 }
99}
100
101fn is_requirements_txt_filename(name: &str) -> bool {
102 if name == "requirements.txt" || name == "requires.txt" {
103 return true;
104 }
105
106 let (stem, extension) = if let Some(stem) = name.strip_suffix(".txt") {
107 (stem, "txt")
108 } else if let Some(stem) = name.strip_suffix(".in") {
109 (stem, "in")
110 } else {
111 return false;
112 };
113
114 stem == "requirements"
118 || stem.starts_with("requirements")
119 || stem.ends_with("requirements")
120 || (extension == "txt" && is_reqs_alias_stem(stem))
121}
122
123fn is_reqs_alias_stem(stem: &str) -> bool {
124 matches_requirement_alias_stem(stem, "reqs") || matches_requirement_alias_stem(stem, "minreqs")
125}
126
127fn matches_requirement_alias_stem(stem: &str, alias: &str) -> bool {
128 stem == alias
129 || stem
130 .strip_suffix(alias)
131 .is_some_and(|prefix| matches!(prefix.chars().last(), Some('-' | '_' | '.')))
132}
133
134fn is_requirements_like_extension(name: &str) -> bool {
135 name.ends_with(".txt") || name.ends_with(".in")
136}
137
138fn has_requirements_like_ancestor(path: &Path) -> bool {
139 path.parent()
140 .into_iter()
141 .flat_map(Path::ancestors)
142 .filter_map(|ancestor| ancestor.file_name())
143 .filter_map(|name| name.to_str())
144 .any(is_requirements_like_dir_name)
145}
146
147fn is_requirements_like_dir_name(name: &str) -> bool {
148 name == "requirements" || name.starts_with("requirements") || name.ends_with("requirements")
149}
150
151struct ParseState {
152 dependencies: Vec<Dependency>,
153 extra_index_urls: Vec<String>,
154 index_url: Option<String>,
155 includes: Vec<String>,
156 constraints: Vec<String>,
157 guard: RecursionGuard<PathBuf>,
158}
159
160fn extract_from_requirements_txt(path: &Path) -> PackageData {
161 let mut state = ParseState {
162 dependencies: Vec::new(),
163 extra_index_urls: Vec::new(),
164 index_url: None,
165 includes: Vec::new(),
166 constraints: Vec::new(),
167 guard: RecursionGuard::new(),
168 };
169
170 let (scope, is_runtime) = scope_from_filename(path);
171
172 parse_requirements_with_includes(path, &mut state, &scope, is_runtime);
173
174 let mut extra_data = HashMap::new();
175 if let Some(url) = state.index_url {
176 extra_data.insert(
177 "index_url".to_string(),
178 JsonValue::String(truncate_field(url)),
179 );
180 }
181 if !state.extra_index_urls.is_empty() {
182 extra_data.insert(
183 "extra_index_urls".to_string(),
184 JsonValue::Array(
185 state
186 .extra_index_urls
187 .into_iter()
188 .map(|u| JsonValue::String(truncate_field(u)))
189 .collect(),
190 ),
191 );
192 }
193 if !state.includes.is_empty() {
194 extra_data.insert(
195 "requirements_includes".to_string(),
196 JsonValue::Array(
197 state
198 .includes
199 .into_iter()
200 .map(|i| JsonValue::String(truncate_field(i)))
201 .collect(),
202 ),
203 );
204 }
205 if !state.constraints.is_empty() {
206 extra_data.insert(
207 "constraints".to_string(),
208 JsonValue::Array(
209 state
210 .constraints
211 .into_iter()
212 .map(|c| JsonValue::String(truncate_field(c)))
213 .collect(),
214 ),
215 );
216 }
217
218 let extra_data = if extra_data.is_empty() {
219 None
220 } else {
221 Some(extra_data)
222 };
223
224 default_package_data(state.dependencies, extra_data)
225}
226
227fn parse_requirements_with_includes(
228 path: &Path,
229 state: &mut ParseState,
230 scope: &str,
231 is_runtime: bool,
232) {
233 if state.guard.exceeded() {
234 warn!(
235 "Maximum recursion depth ({}) exceeded for include: {:?}",
236 MAX_RECURSION_DEPTH, path
237 );
238 return;
239 }
240
241 let abs_path = match path.canonicalize() {
242 Ok(p) => p,
243 Err(_) => {
244 warn!("Cannot resolve path: {:?}", path);
245 return;
246 }
247 };
248
249 if state.guard.enter(abs_path.clone()) {
250 warn!("Circular include detected: {:?}", path);
251 return;
252 }
253
254 let content = match read_file_to_string(&abs_path, None) {
255 Ok(c) => c,
256 Err(e) => {
257 warn!("Cannot read file {:?}: {}", abs_path, e);
258 return;
259 }
260 };
261
262 for line in collect_logical_lines(&content)
263 .into_iter()
264 .take(MAX_ITERATION_COUNT)
265 {
266 let cleaned = strip_inline_comment(&line);
267 let trimmed = cleaned.trim();
268 if trimmed.is_empty() || trimmed.starts_with('#') {
269 continue;
270 }
271
272 if let Some(url) = parse_option_value(trimmed, "--extra-index-url") {
273 state.extra_index_urls.push(truncate_field(url));
274 continue;
275 }
276
277 if let Some(url) = parse_option_value(trimmed, "--index-url") {
278 state.index_url = Some(truncate_field(url));
279 continue;
280 }
281
282 if let Some(path_value) = parse_option_value(trimmed, "-r")
283 .or_else(|| parse_option_value(trimmed, "--requirement"))
284 {
285 state.includes.push(truncate_field(path_value.clone()));
286 let included_path = abs_path
287 .parent()
288 .unwrap_or_else(|| Path::new("."))
289 .join(&path_value);
290
291 if included_path.exists() {
292 parse_requirements_with_includes(&included_path, state, scope, is_runtime);
293 } else {
294 warn!("Included file not found: {:?}", included_path);
295 }
296 continue;
297 }
298
299 if let Some(path_value) = parse_option_value(trimmed, "-c")
300 .or_else(|| parse_option_value(trimmed, "--constraint"))
301 {
302 state.constraints.push(truncate_field(path_value.clone()));
303 let constraint_path = abs_path
304 .parent()
305 .unwrap_or_else(|| Path::new("."))
306 .join(&path_value);
307
308 if constraint_path.exists() {
309 parse_requirements_with_includes(&constraint_path, state, scope, is_runtime);
310 } else {
311 warn!("Constraint file not found: {:?}", constraint_path);
312 }
313 continue;
314 }
315
316 if trimmed.starts_with('-')
317 && !trimmed.starts_with("-e")
318 && !trimmed.starts_with("--editable")
319 {
320 continue;
321 }
322
323 if let Some(dependency) = build_dependency(trimmed, scope, is_runtime) {
324 if state.dependencies.len() >= MAX_ITERATION_COUNT {
325 warn!(
326 "Reached maximum dependency count ({}) in {:?}",
327 MAX_ITERATION_COUNT, abs_path
328 );
329 break;
330 }
331 state.dependencies.push(dependency);
332 }
333 }
334
335 state.guard.leave(abs_path);
336}
337
338fn default_package_data(
339 dependencies: Vec<Dependency>,
340 extra_data: Option<HashMap<String, JsonValue>>,
341) -> PackageData {
342 PackageData {
343 package_type: Some(RequirementsTxtParser::PACKAGE_TYPE),
344 primary_language: Some("Python".to_string()),
345 extra_data,
346 dependencies,
347 datasource_id: Some(DatasourceId::PipRequirements),
348 ..Default::default()
349 }
350}
351
352fn collect_logical_lines(content: &str) -> Vec<String> {
353 let mut lines = Vec::new();
354 let mut current = String::new();
355
356 for raw_line in content.lines().take(MAX_ITERATION_COUNT) {
357 let line = raw_line.trim_end_matches('\r');
358 let trimmed = line.trim_end();
359 let is_continuation = trimmed.ends_with('\\');
360 let line_without = if is_continuation {
361 trimmed.trim_end_matches('\\')
362 } else {
363 line
364 };
365
366 if !line_without.trim().is_empty() {
367 if !current.is_empty() {
368 current.push(' ');
369 }
370 current.push_str(line_without.trim());
371 }
372
373 if !is_continuation && !current.is_empty() {
374 lines.push(current.trim().to_string());
375 current.clear();
376 }
377 }
378
379 if !current.is_empty() {
380 lines.push(current.trim().to_string());
381 }
382
383 lines
384}
385
386fn strip_inline_comment(line: &str) -> String {
387 let mut in_single = false;
388 let mut in_double = false;
389 for (idx, ch) in line.char_indices() {
390 match ch {
391 '\'' if !in_double => in_single = !in_single,
392 '"' if !in_single => in_double = !in_double,
393 '#' if !in_single && !in_double => {
394 let prefix = &line[..idx];
395 if prefix.trim_end().is_empty() || prefix.ends_with(char::is_whitespace) {
396 return prefix.trim_end().to_string();
397 }
398 }
399 _ => {}
400 }
401 }
402 line.to_string()
403}
404
405fn parse_option_value(line: &str, option: &str) -> Option<String> {
406 let stripped = line.strip_prefix(option)?;
407 let mut rest = stripped.trim();
408 if let Some(rest_stripped) = rest.strip_prefix('=') {
409 rest = rest_stripped.trim();
410 }
411 if rest.is_empty() {
412 None
413 } else {
414 Some(rest.to_string())
415 }
416}
417
418fn scope_from_filename(path: &Path) -> (String, bool) {
419 let filename = path
420 .file_name()
421 .and_then(|name| name.to_str())
422 .unwrap_or_default()
423 .to_ascii_lowercase();
424
425 if filename.contains("dev") {
426 return ("develop".to_string(), false);
427 }
428 if filename.contains("test") {
429 return ("test".to_string(), false);
430 }
431 if filename.contains("doc") {
432 return ("docs".to_string(), false);
433 }
434
435 ("install".to_string(), true)
436}
437
438fn build_dependency(line: &str, scope: &str, is_runtime: bool) -> Option<Dependency> {
439 let trimmed = line.trim();
440 if trimmed.is_empty() {
441 return None;
442 }
443
444 let mut is_editable = false;
445 let mut requirement = truncate_field(trimmed.to_string());
446 let mut extracted_requirement = truncate_field(trimmed.to_string());
447
448 if let Some(rest) = trimmed.strip_prefix("-e") {
449 is_editable = true;
450 requirement = truncate_field(rest.trim().to_string());
451 extracted_requirement = truncate_field(format!("--editable {}", requirement));
452 } else if let Some(rest) = trimmed.strip_prefix("--editable") {
453 is_editable = true;
454 requirement = truncate_field(rest.trim().to_string());
455 extracted_requirement = truncate_field(format!("--editable {}", requirement));
456 }
457
458 let (requirement, hash_options) = split_hash_options(&requirement);
459 let requirement = requirement.trim();
460 if requirement.is_empty() {
461 return None;
462 }
463
464 if looks_like_hash_only_requirement(requirement) {
465 return None;
466 }
467
468 let parsed = parse_requirement(requirement);
469
470 let pinned_version = parsed
471 .specifiers
472 .as_deref()
473 .and_then(extract_pinned_version);
474 let is_pinned = pinned_version.is_some();
475
476 let purl = parsed
477 .name
478 .as_ref()
479 .and_then(|name| create_pypi_purl(name, pinned_version.as_deref()));
480
481 let mut extra_data = HashMap::new();
482 extra_data.insert("is_editable".to_string(), JsonValue::Bool(is_editable));
483 extra_data.insert(
484 "link".to_string(),
485 parsed
486 .link
487 .clone()
488 .map(|l| JsonValue::String(truncate_field(l)))
489 .unwrap_or(JsonValue::Null),
490 );
491 extra_data.insert(
492 "hash_options".to_string(),
493 JsonValue::Array(
494 hash_options
495 .into_iter()
496 .map(|h| JsonValue::String(truncate_field(h)))
497 .collect(),
498 ),
499 );
500 extra_data.insert("is_constraint".to_string(), JsonValue::Bool(false));
501 extra_data.insert(
502 "is_archive".to_string(),
503 parsed
504 .is_archive
505 .map(JsonValue::Bool)
506 .unwrap_or(JsonValue::Null),
507 );
508 extra_data.insert("is_wheel".to_string(), JsonValue::Bool(parsed.is_wheel));
509 extra_data.insert(
510 "is_url".to_string(),
511 parsed
512 .is_url
513 .map(JsonValue::Bool)
514 .unwrap_or(JsonValue::Null),
515 );
516 extra_data.insert(
517 "is_vcs_url".to_string(),
518 parsed
519 .is_vcs_url
520 .map(JsonValue::Bool)
521 .unwrap_or(JsonValue::Null),
522 );
523 extra_data.insert(
524 "is_name_at_url".to_string(),
525 JsonValue::Bool(parsed.is_name_at_url),
526 );
527 extra_data.insert(
528 "is_local_path".to_string(),
529 parsed
530 .is_local_path
531 .map(|value| value || is_editable)
532 .map(JsonValue::Bool)
533 .unwrap_or(JsonValue::Null),
534 );
535
536 if let Some(marker) = parsed.marker {
537 extra_data.insert(
538 "markers".to_string(),
539 JsonValue::String(truncate_field(marker)),
540 );
541 }
542
543 Some(Dependency {
544 purl,
545 extracted_requirement: Some(truncate_field(extracted_requirement)),
546 scope: Some(scope.to_string()),
547 is_runtime: Some(is_runtime),
548 is_optional: Some(false),
549 is_pinned: Some(is_pinned),
550 is_direct: Some(true),
551 resolved_package: None,
552 extra_data: Some(extra_data),
553 })
554}
555
556fn looks_like_hash_only_requirement(requirement: &str) -> bool {
557 let trimmed = requirement.trim();
558 if !matches!(trimmed.len(), 32 | 40 | 64 | 96 | 128) {
559 return false;
560 }
561
562 if trimmed.contains(char::is_whitespace)
563 || trimmed.contains(['[', ']', '@', ';', '/', '\\'])
564 || trimmed.contains("==")
565 || trimmed.contains("://")
566 || trimmed.contains("git+")
567 {
568 return false;
569 }
570
571 trimmed.chars().all(|ch| ch.is_ascii_hexdigit())
572}
573
574fn split_hash_options(input: &str) -> (String, Vec<String>) {
575 let mut filtered = Vec::new();
576 let mut hashes = Vec::new();
577
578 for token in input.split_whitespace() {
579 if let Some(value) = token.strip_prefix("--hash=") {
580 if !value.is_empty() {
581 hashes.push(value.to_string());
582 }
583 } else {
584 filtered.push(token);
585 }
586 }
587
588 (filtered.join(" "), hashes)
589}
590
591struct ParsedRequirement {
592 name: Option<String>,
593 specifiers: Option<String>,
594 marker: Option<String>,
595 link: Option<String>,
596 is_url: Option<bool>,
597 is_vcs_url: Option<bool>,
598 is_local_path: Option<bool>,
599 is_name_at_url: bool,
600 is_archive: Option<bool>,
601 is_wheel: bool,
602}
603
604fn parse_requirement(input: &str) -> ParsedRequirement {
605 if let Some(parsed) = parse_pep508_requirement(input) {
606 if let Some(url) = parsed.url.clone() {
607 return parsed_with_link(parsed, &url);
608 }
609
610 if !is_link_like(input) {
611 let name = Some(normalize_pypi_name(&parsed.name));
612 return ParsedRequirement {
613 name,
614 specifiers: parsed.specifiers.map(truncate_field),
615 marker: parsed.marker.map(truncate_field),
616 link: None,
617 is_url: None,
618 is_vcs_url: None,
619 is_local_path: None,
620 is_name_at_url: false,
621 is_archive: None,
622 is_wheel: false,
623 };
624 }
625 }
626
627 if let Some((name, link)) = parse_link_with_name(input) {
628 let normalized_name = normalize_pypi_name(&name);
629 let link_info = parse_link_flags(&link);
630 return ParsedRequirement {
631 name: Some(normalized_name),
632 specifiers: None,
633 marker: None,
634 link: Some(truncate_field(link)),
635 is_url: Some(link_info.is_url),
636 is_vcs_url: Some(link_info.is_vcs_url),
637 is_local_path: Some(link_info.is_local_path),
638 is_name_at_url: link_info.is_name_at_url,
639 is_archive: link_info.is_archive,
640 is_wheel: link_info.is_wheel,
641 };
642 }
643
644 let link_info = parse_link_flags(input);
645 ParsedRequirement {
646 name: None,
647 specifiers: None,
648 marker: None,
649 link: Some(truncate_field(input.to_string())),
650 is_url: Some(link_info.is_url),
651 is_vcs_url: Some(link_info.is_vcs_url),
652 is_local_path: Some(link_info.is_local_path),
653 is_name_at_url: link_info.is_name_at_url,
654 is_archive: link_info.is_archive,
655 is_wheel: link_info.is_wheel,
656 }
657}
658
659fn parsed_with_link(parsed: Pep508Requirement, link: &str) -> ParsedRequirement {
660 let name = normalize_pypi_name(&parsed.name);
661 let link_info = parse_link_flags(link);
662 ParsedRequirement {
663 name: Some(name),
664 specifiers: parsed.specifiers.map(truncate_field),
665 marker: parsed.marker.map(truncate_field),
666 link: Some(truncate_field(link.to_string())),
667 is_url: Some(link_info.is_url),
668 is_vcs_url: Some(link_info.is_vcs_url),
669 is_local_path: Some(link_info.is_local_path),
670 is_name_at_url: parsed.is_name_at_url,
671 is_archive: link_info.is_archive,
672 is_wheel: link_info.is_wheel,
673 }
674}
675
676fn parse_link_with_name(input: &str) -> Option<(String, String)> {
677 if let Some(egg) = extract_egg_name(input) {
678 return Some((egg, input.to_string()));
679 }
680 None
681}
682
683fn extract_egg_name(input: &str) -> Option<String> {
684 let fragment = input.split('#').nth(1)?;
685 let egg_part = fragment.strip_prefix("egg=")?;
686 let name_part = egg_part.split('&').next()?.trim();
687 if name_part.is_empty() {
688 return None;
689 }
690 let (name, _extras, _) = parse_pep508_requirement(name_part)
691 .map(|parsed| (parsed.name, parsed.extras, parsed.specifiers))
692 .unwrap_or_else(|| (name_part.to_string(), Vec::new(), None));
693 Some(name)
694}
695
696struct LinkFlags {
697 is_url: bool,
698 is_vcs_url: bool,
699 is_local_path: bool,
700 is_name_at_url: bool,
701 is_archive: Option<bool>,
702 is_wheel: bool,
703}
704
705fn parse_link_flags(link: &str) -> LinkFlags {
706 let trimmed = link.trim();
707 let is_vcs_url = trimmed.starts_with("git+")
708 || trimmed.starts_with("hg+")
709 || trimmed.starts_with("svn+")
710 || trimmed.starts_with("bzr+");
711 let has_scheme = trimmed.contains("://") || trimmed.starts_with("file:");
712 let is_local_path = trimmed.starts_with("./")
713 || trimmed.starts_with("../")
714 || trimmed.starts_with('/')
715 || trimmed.starts_with('~')
716 || trimmed.starts_with("file:");
717
718 let is_wheel = trimmed.ends_with(".whl");
719 let is_archive = if is_wheel
720 || trimmed.ends_with(".zip")
721 || trimmed.ends_with(".tar.gz")
722 || trimmed.ends_with(".tgz")
723 || trimmed.ends_with(".tar.bz2")
724 || trimmed.ends_with(".tar")
725 {
726 Some(true)
727 } else if has_scheme || is_local_path {
728 Some(false)
729 } else {
730 None
731 };
732
733 LinkFlags {
734 is_url: has_scheme || is_vcs_url,
735 is_vcs_url,
736 is_local_path,
737 is_name_at_url: false,
738 is_archive,
739 is_wheel,
740 }
741}
742
743fn is_link_like(input: &str) -> bool {
744 let trimmed = input.trim();
745 trimmed.starts_with("git+")
746 || trimmed.starts_with("hg+")
747 || trimmed.starts_with("svn+")
748 || trimmed.starts_with("bzr+")
749 || trimmed.starts_with("file:")
750 || trimmed.contains("://")
751 || trimmed.starts_with("./")
752 || trimmed.starts_with("../")
753 || trimmed.starts_with('/')
754 || trimmed.starts_with('~')
755}
756
757fn extract_pinned_version(specifiers: &str) -> Option<String> {
758 let trimmed = specifiers.trim();
759 if trimmed.contains(',') {
760 return None;
761 }
762
763 let stripped = if let Some(version) = trimmed.strip_prefix("===") {
764 version
765 } else {
766 trimmed.strip_prefix("==")?
767 };
768
769 let version = stripped.trim();
770 if version.is_empty() {
771 None
772 } else {
773 Some(version.to_string())
774 }
775}
776
777fn create_pypi_purl(name: &str, version: Option<&str>) -> Option<String> {
778 PackageUrl::new(RequirementsTxtParser::PACKAGE_TYPE.as_str(), name)
779 .ok()
780 .map(|_| match version {
781 Some(version) => format!("pkg:pypi/{name}@{}", encode_pypi_purl_version(version)),
782 None => format!("pkg:pypi/{name}"),
783 })
784}
785
786fn encode_pypi_purl_version(version: &str) -> String {
787 version.replace('*', "%2A")
788}
789
790fn normalize_pypi_name(name: &str) -> String {
791 let lower = name.trim().to_ascii_lowercase();
792 let mut normalized = String::new();
793 let mut last_was_sep = false;
794 for ch in lower.chars() {
795 let is_sep = matches!(ch, '-' | '_' | '.');
796 if is_sep {
797 if !last_was_sep {
798 normalized.push('-');
799 last_was_sep = true;
800 }
801 } else {
802 normalized.push(ch);
803 last_was_sep = false;
804 }
805 }
806 normalized
807}