1use std::collections::{HashMap, HashSet};
26use std::fs;
27use std::path::{Path, PathBuf};
28
29use crate::parser_warn as warn;
30use packageurl::PackageUrl;
31use serde_json::Value as JsonValue;
32
33use crate::models::{DatasourceId, Dependency, PackageData, PackageType};
34use crate::parsers::pep508::{Pep508Requirement, parse_pep508_requirement};
35
36use super::PackageParser;
37
38pub struct RequirementsTxtParser;
43
44impl PackageParser for RequirementsTxtParser {
45 const PACKAGE_TYPE: PackageType = PackageType::Pypi;
46
47 fn extract_packages(path: &Path) -> Vec<PackageData> {
48 vec![extract_from_requirements_txt(path)]
49 }
50
51 fn is_match(path: &Path) -> bool {
52 let filename = path.file_name().and_then(|name| name.to_str());
53 let Some(name) = filename else {
54 return false;
55 };
56
57 is_requirements_txt_filename(name)
58 || (is_requirements_like_extension(name) && has_requirements_like_ancestor(path))
59 }
60}
61
62fn is_requirements_txt_filename(name: &str) -> bool {
63 if name == "requirements.txt" || name == "requires.txt" {
64 return true;
65 }
66
67 let (stem, extension) = if let Some(stem) = name.strip_suffix(".txt") {
68 (stem, "txt")
69 } else if let Some(stem) = name.strip_suffix(".in") {
70 (stem, "in")
71 } else {
72 return false;
73 };
74
75 stem == "requirements"
79 || stem.starts_with("requirements")
80 || stem.ends_with("requirements")
81 || (extension == "txt" && is_reqs_alias_stem(stem))
82}
83
84fn is_reqs_alias_stem(stem: &str) -> bool {
85 stem == "reqs" || stem.ends_with("-reqs") || stem.ends_with("_reqs") || stem.ends_with(".reqs")
86}
87
88fn is_requirements_like_extension(name: &str) -> bool {
89 name.ends_with(".txt") || name.ends_with(".in")
90}
91
92fn has_requirements_like_ancestor(path: &Path) -> bool {
93 path.parent()
94 .into_iter()
95 .flat_map(Path::ancestors)
96 .filter_map(|ancestor| ancestor.file_name())
97 .filter_map(|name| name.to_str())
98 .any(is_requirements_like_dir_name)
99}
100
101fn is_requirements_like_dir_name(name: &str) -> bool {
102 name == "requirements" || name.starts_with("requirements") || name.ends_with("requirements")
103}
104
105struct ParseState {
106 dependencies: Vec<Dependency>,
107 extra_index_urls: Vec<String>,
108 index_url: Option<String>,
109 includes: Vec<String>,
110 constraints: Vec<String>,
111 visited: HashSet<PathBuf>,
112}
113
114fn extract_from_requirements_txt(path: &Path) -> PackageData {
115 let mut state = ParseState {
116 dependencies: Vec::new(),
117 extra_index_urls: Vec::new(),
118 index_url: None,
119 includes: Vec::new(),
120 constraints: Vec::new(),
121 visited: HashSet::new(),
122 };
123
124 let (scope, is_runtime) = scope_from_filename(path);
125
126 parse_requirements_with_includes(path, &mut state, &scope, is_runtime);
127
128 let mut extra_data = HashMap::new();
129 if let Some(url) = state.index_url {
130 extra_data.insert("index_url".to_string(), JsonValue::String(url));
131 }
132 if !state.extra_index_urls.is_empty() {
133 extra_data.insert(
134 "extra_index_urls".to_string(),
135 JsonValue::Array(
136 state
137 .extra_index_urls
138 .into_iter()
139 .map(JsonValue::String)
140 .collect(),
141 ),
142 );
143 }
144 if !state.includes.is_empty() {
145 extra_data.insert(
146 "requirements_includes".to_string(),
147 JsonValue::Array(state.includes.into_iter().map(JsonValue::String).collect()),
148 );
149 }
150 if !state.constraints.is_empty() {
151 extra_data.insert(
152 "constraints".to_string(),
153 JsonValue::Array(
154 state
155 .constraints
156 .into_iter()
157 .map(JsonValue::String)
158 .collect(),
159 ),
160 );
161 }
162
163 let extra_data = if extra_data.is_empty() {
164 None
165 } else {
166 Some(extra_data)
167 };
168
169 default_package_data(state.dependencies, extra_data)
170}
171
172fn parse_requirements_with_includes(
173 path: &Path,
174 state: &mut ParseState,
175 scope: &str,
176 is_runtime: bool,
177) {
178 let abs_path = match path.canonicalize() {
179 Ok(p) => p,
180 Err(_) => {
181 warn!("Cannot resolve path: {:?}", path);
182 return;
183 }
184 };
185
186 if state.visited.contains(&abs_path) {
187 warn!("Circular include detected: {:?}", path);
188 return;
189 }
190
191 state.visited.insert(abs_path.clone());
192
193 let content = match fs::read_to_string(&abs_path) {
194 Ok(c) => c,
195 Err(e) => {
196 warn!("Cannot read file {:?}: {}", abs_path, e);
197 return;
198 }
199 };
200
201 for line in collect_logical_lines(&content) {
202 let cleaned = strip_inline_comment(&line);
203 let trimmed = cleaned.trim();
204 if trimmed.is_empty() || trimmed.starts_with('#') {
205 continue;
206 }
207
208 if let Some(url) = parse_option_value(trimmed, "--extra-index-url") {
209 state.extra_index_urls.push(url);
210 continue;
211 }
212
213 if let Some(url) = parse_option_value(trimmed, "--index-url") {
214 state.index_url = Some(url);
215 continue;
216 }
217
218 if let Some(path_value) = parse_option_value(trimmed, "-r")
219 .or_else(|| parse_option_value(trimmed, "--requirement"))
220 {
221 state.includes.push(path_value.clone());
222 let included_path = abs_path
223 .parent()
224 .unwrap_or_else(|| Path::new("."))
225 .join(&path_value);
226
227 if included_path.exists() {
228 parse_requirements_with_includes(&included_path, state, scope, is_runtime);
229 } else {
230 warn!("Included file not found: {:?}", included_path);
231 }
232 continue;
233 }
234
235 if let Some(path_value) = parse_option_value(trimmed, "-c")
236 .or_else(|| parse_option_value(trimmed, "--constraint"))
237 {
238 state.constraints.push(path_value.clone());
239 let constraint_path = abs_path
240 .parent()
241 .unwrap_or_else(|| Path::new("."))
242 .join(&path_value);
243
244 if constraint_path.exists() {
245 parse_requirements_with_includes(&constraint_path, state, scope, is_runtime);
246 } else {
247 warn!("Constraint file not found: {:?}", constraint_path);
248 }
249 continue;
250 }
251
252 if trimmed.starts_with('-')
253 && !trimmed.starts_with("-e")
254 && !trimmed.starts_with("--editable")
255 {
256 continue;
257 }
258
259 if let Some(dependency) = build_dependency(trimmed, scope, is_runtime) {
260 state.dependencies.push(dependency);
261 }
262 }
263}
264
265fn default_package_data(
266 dependencies: Vec<Dependency>,
267 extra_data: Option<HashMap<String, JsonValue>>,
268) -> PackageData {
269 PackageData {
270 package_type: Some(RequirementsTxtParser::PACKAGE_TYPE),
271 primary_language: Some("Python".to_string()),
272 extra_data,
273 dependencies,
274 datasource_id: Some(DatasourceId::PipRequirements),
275 ..Default::default()
276 }
277}
278
279fn collect_logical_lines(content: &str) -> Vec<String> {
280 let mut lines = Vec::new();
281 let mut current = String::new();
282
283 for raw_line in content.lines() {
284 let line = raw_line.trim_end_matches('\r');
285 let trimmed = line.trim_end();
286 let is_continuation = trimmed.ends_with('\\');
287 let line_without = if is_continuation {
288 trimmed.trim_end_matches('\\')
289 } else {
290 line
291 };
292
293 if !line_without.trim().is_empty() {
294 if !current.is_empty() {
295 current.push(' ');
296 }
297 current.push_str(line_without.trim());
298 }
299
300 if !is_continuation && !current.is_empty() {
301 lines.push(current.trim().to_string());
302 current.clear();
303 }
304 }
305
306 if !current.is_empty() {
307 lines.push(current.trim().to_string());
308 }
309
310 lines
311}
312
313fn strip_inline_comment(line: &str) -> String {
314 let mut in_single = false;
315 let mut in_double = false;
316 for (idx, ch) in line.char_indices() {
317 match ch {
318 '\'' if !in_double => in_single = !in_single,
319 '"' if !in_single => in_double = !in_double,
320 '#' if !in_single && !in_double => {
321 let prefix = &line[..idx];
322 if prefix.trim_end().is_empty() || prefix.ends_with(char::is_whitespace) {
323 return prefix.trim_end().to_string();
324 }
325 }
326 _ => {}
327 }
328 }
329 line.to_string()
330}
331
332fn parse_option_value(line: &str, option: &str) -> Option<String> {
333 let stripped = line.strip_prefix(option)?;
334 let mut rest = stripped.trim();
335 if let Some(rest_stripped) = rest.strip_prefix('=') {
336 rest = rest_stripped.trim();
337 }
338 if rest.is_empty() {
339 None
340 } else {
341 Some(rest.to_string())
342 }
343}
344
345fn scope_from_filename(path: &Path) -> (String, bool) {
346 let filename = path
347 .file_name()
348 .and_then(|name| name.to_str())
349 .unwrap_or_default()
350 .to_ascii_lowercase();
351
352 if filename.contains("dev") {
353 return ("develop".to_string(), false);
354 }
355 if filename.contains("test") {
356 return ("test".to_string(), false);
357 }
358 if filename.contains("doc") {
359 return ("docs".to_string(), false);
360 }
361
362 ("install".to_string(), true)
363}
364
365fn build_dependency(line: &str, scope: &str, is_runtime: bool) -> Option<Dependency> {
366 let trimmed = line.trim();
367 if trimmed.is_empty() {
368 return None;
369 }
370
371 let mut is_editable = false;
372 let mut requirement = trimmed.to_string();
373 let mut extracted_requirement = trimmed.to_string();
374
375 if let Some(rest) = trimmed.strip_prefix("-e") {
376 is_editable = true;
377 requirement = rest.trim().to_string();
378 extracted_requirement = format!("--editable {}", requirement);
379 } else if let Some(rest) = trimmed.strip_prefix("--editable") {
380 is_editable = true;
381 requirement = rest.trim().to_string();
382 extracted_requirement = format!("--editable {}", requirement);
383 }
384
385 let (requirement, hash_options) = split_hash_options(&requirement);
386 let requirement = requirement.trim();
387 if requirement.is_empty() {
388 return None;
389 }
390
391 if looks_like_hash_only_requirement(requirement) {
392 return None;
393 }
394
395 let parsed = parse_requirement(requirement);
396
397 let pinned_version = parsed
398 .specifiers
399 .as_deref()
400 .and_then(extract_pinned_version);
401 let is_pinned = pinned_version.is_some();
402
403 let purl = parsed
404 .name
405 .as_ref()
406 .and_then(|name| create_pypi_purl(name, pinned_version.as_deref()));
407
408 let mut extra_data = HashMap::new();
409 extra_data.insert("is_editable".to_string(), JsonValue::Bool(is_editable));
410 extra_data.insert(
411 "link".to_string(),
412 parsed
413 .link
414 .clone()
415 .map(JsonValue::String)
416 .unwrap_or(JsonValue::Null),
417 );
418 extra_data.insert(
419 "hash_options".to_string(),
420 JsonValue::Array(hash_options.into_iter().map(JsonValue::String).collect()),
421 );
422 extra_data.insert("is_constraint".to_string(), JsonValue::Bool(false));
423 extra_data.insert(
424 "is_archive".to_string(),
425 parsed
426 .is_archive
427 .map(JsonValue::Bool)
428 .unwrap_or(JsonValue::Null),
429 );
430 extra_data.insert("is_wheel".to_string(), JsonValue::Bool(parsed.is_wheel));
431 extra_data.insert(
432 "is_url".to_string(),
433 parsed
434 .is_url
435 .map(JsonValue::Bool)
436 .unwrap_or(JsonValue::Null),
437 );
438 extra_data.insert(
439 "is_vcs_url".to_string(),
440 parsed
441 .is_vcs_url
442 .map(JsonValue::Bool)
443 .unwrap_or(JsonValue::Null),
444 );
445 extra_data.insert(
446 "is_name_at_url".to_string(),
447 JsonValue::Bool(parsed.is_name_at_url),
448 );
449 extra_data.insert(
450 "is_local_path".to_string(),
451 parsed
452 .is_local_path
453 .map(|value| value || is_editable)
454 .map(JsonValue::Bool)
455 .unwrap_or(JsonValue::Null),
456 );
457
458 if let Some(marker) = parsed.marker {
459 extra_data.insert("markers".to_string(), JsonValue::String(marker));
460 }
461
462 Some(Dependency {
463 purl,
464 extracted_requirement: Some(extracted_requirement),
465 scope: Some(scope.to_string()),
466 is_runtime: Some(is_runtime),
467 is_optional: Some(false),
468 is_pinned: Some(is_pinned),
469 is_direct: Some(true),
470 resolved_package: None,
471 extra_data: Some(extra_data),
472 })
473}
474
475fn looks_like_hash_only_requirement(requirement: &str) -> bool {
476 let trimmed = requirement.trim();
477 if !matches!(trimmed.len(), 32 | 40 | 64 | 96 | 128) {
478 return false;
479 }
480
481 if trimmed.contains(char::is_whitespace)
482 || trimmed.contains(['[', ']', '@', ';', '/', '\\'])
483 || trimmed.contains("==")
484 || trimmed.contains("://")
485 || trimmed.contains("git+")
486 {
487 return false;
488 }
489
490 trimmed.chars().all(|ch| ch.is_ascii_hexdigit())
491}
492
493fn split_hash_options(input: &str) -> (String, Vec<String>) {
494 let mut filtered = Vec::new();
495 let mut hashes = Vec::new();
496
497 for token in input.split_whitespace() {
498 if let Some(value) = token.strip_prefix("--hash=") {
499 if !value.is_empty() {
500 hashes.push(value.to_string());
501 }
502 } else {
503 filtered.push(token);
504 }
505 }
506
507 (filtered.join(" "), hashes)
508}
509
510struct ParsedRequirement {
511 name: Option<String>,
512 specifiers: Option<String>,
513 marker: Option<String>,
514 link: Option<String>,
515 is_url: Option<bool>,
516 is_vcs_url: Option<bool>,
517 is_local_path: Option<bool>,
518 is_name_at_url: bool,
519 is_archive: Option<bool>,
520 is_wheel: bool,
521}
522
523fn parse_requirement(input: &str) -> ParsedRequirement {
524 if let Some(parsed) = parse_pep508_requirement(input) {
525 if let Some(url) = parsed.url.clone() {
526 return parsed_with_link(parsed, &url);
527 }
528
529 if !is_link_like(input) {
530 let name = Some(normalize_pypi_name(&parsed.name));
531 return ParsedRequirement {
532 name,
533 specifiers: parsed.specifiers,
534 marker: parsed.marker,
535 link: None,
536 is_url: None,
537 is_vcs_url: None,
538 is_local_path: None,
539 is_name_at_url: false,
540 is_archive: None,
541 is_wheel: false,
542 };
543 }
544 }
545
546 if let Some((name, link)) = parse_link_with_name(input) {
547 let normalized_name = normalize_pypi_name(&name);
548 let link_info = parse_link_flags(&link);
549 return ParsedRequirement {
550 name: Some(normalized_name),
551 specifiers: None,
552 marker: None,
553 link: Some(link),
554 is_url: Some(link_info.is_url),
555 is_vcs_url: Some(link_info.is_vcs_url),
556 is_local_path: Some(link_info.is_local_path),
557 is_name_at_url: link_info.is_name_at_url,
558 is_archive: link_info.is_archive,
559 is_wheel: link_info.is_wheel,
560 };
561 }
562
563 let link_info = parse_link_flags(input);
564 ParsedRequirement {
565 name: None,
566 specifiers: None,
567 marker: None,
568 link: Some(input.to_string()),
569 is_url: Some(link_info.is_url),
570 is_vcs_url: Some(link_info.is_vcs_url),
571 is_local_path: Some(link_info.is_local_path),
572 is_name_at_url: link_info.is_name_at_url,
573 is_archive: link_info.is_archive,
574 is_wheel: link_info.is_wheel,
575 }
576}
577
578fn parsed_with_link(parsed: Pep508Requirement, link: &str) -> ParsedRequirement {
579 let name = normalize_pypi_name(&parsed.name);
580 let link_info = parse_link_flags(link);
581 ParsedRequirement {
582 name: Some(name),
583 specifiers: parsed.specifiers,
584 marker: parsed.marker,
585 link: Some(link.to_string()),
586 is_url: Some(link_info.is_url),
587 is_vcs_url: Some(link_info.is_vcs_url),
588 is_local_path: Some(link_info.is_local_path),
589 is_name_at_url: parsed.is_name_at_url,
590 is_archive: link_info.is_archive,
591 is_wheel: link_info.is_wheel,
592 }
593}
594
595fn parse_link_with_name(input: &str) -> Option<(String, String)> {
596 if let Some(egg) = extract_egg_name(input) {
597 return Some((egg, input.to_string()));
598 }
599 None
600}
601
602fn extract_egg_name(input: &str) -> Option<String> {
603 let fragment = input.split('#').nth(1)?;
604 let egg_part = fragment.strip_prefix("egg=")?;
605 let name_part = egg_part.split('&').next()?.trim();
606 if name_part.is_empty() {
607 return None;
608 }
609 let (name, _extras, _) = parse_pep508_requirement(name_part)
610 .map(|parsed| (parsed.name, parsed.extras, parsed.specifiers))
611 .unwrap_or_else(|| (name_part.to_string(), Vec::new(), None));
612 Some(name)
613}
614
615struct LinkFlags {
616 is_url: bool,
617 is_vcs_url: bool,
618 is_local_path: bool,
619 is_name_at_url: bool,
620 is_archive: Option<bool>,
621 is_wheel: bool,
622}
623
624fn parse_link_flags(link: &str) -> LinkFlags {
625 let trimmed = link.trim();
626 let is_vcs_url = trimmed.starts_with("git+")
627 || trimmed.starts_with("hg+")
628 || trimmed.starts_with("svn+")
629 || trimmed.starts_with("bzr+");
630 let has_scheme = trimmed.contains("://") || trimmed.starts_with("file:");
631 let is_local_path = trimmed.starts_with("./")
632 || trimmed.starts_with("../")
633 || trimmed.starts_with('/')
634 || trimmed.starts_with('~')
635 || trimmed.starts_with("file:");
636
637 let is_wheel = trimmed.ends_with(".whl");
638 let is_archive = if is_wheel
639 || trimmed.ends_with(".zip")
640 || trimmed.ends_with(".tar.gz")
641 || trimmed.ends_with(".tgz")
642 || trimmed.ends_with(".tar.bz2")
643 || trimmed.ends_with(".tar")
644 {
645 Some(true)
646 } else if has_scheme || is_local_path {
647 Some(false)
648 } else {
649 None
650 };
651
652 LinkFlags {
653 is_url: has_scheme || is_vcs_url,
654 is_vcs_url,
655 is_local_path,
656 is_name_at_url: false,
657 is_archive,
658 is_wheel,
659 }
660}
661
662fn is_link_like(input: &str) -> bool {
663 let trimmed = input.trim();
664 trimmed.starts_with("git+")
665 || trimmed.starts_with("hg+")
666 || trimmed.starts_with("svn+")
667 || trimmed.starts_with("bzr+")
668 || trimmed.starts_with("file:")
669 || trimmed.contains("://")
670 || trimmed.starts_with("./")
671 || trimmed.starts_with("../")
672 || trimmed.starts_with('/')
673 || trimmed.starts_with('~')
674}
675
676fn extract_pinned_version(specifiers: &str) -> Option<String> {
677 let trimmed = specifiers.trim();
678 if trimmed.contains(',') {
679 return None;
680 }
681
682 let stripped = if let Some(version) = trimmed.strip_prefix("==") {
683 version
684 } else if let Some(version) = trimmed.strip_prefix("===") {
685 version
686 } else {
687 return None;
688 };
689
690 let version = stripped.trim();
691 if version.is_empty() {
692 None
693 } else {
694 Some(version.to_string())
695 }
696}
697
698fn create_pypi_purl(name: &str, version: Option<&str>) -> Option<String> {
699 PackageUrl::new(RequirementsTxtParser::PACKAGE_TYPE.as_str(), name)
700 .ok()
701 .map(|_| match version {
702 Some(version) => format!("pkg:pypi/{name}@{}", encode_pypi_purl_version(version)),
703 None => format!("pkg:pypi/{name}"),
704 })
705}
706
707fn encode_pypi_purl_version(version: &str) -> String {
708 version.replace('*', "%2A")
709}
710
711fn normalize_pypi_name(name: &str) -> String {
712 let lower = name.trim().to_ascii_lowercase();
713 let mut normalized = String::new();
714 let mut last_was_sep = false;
715 for ch in lower.chars() {
716 let is_sep = matches!(ch, '-' | '_' | '.');
717 if is_sep {
718 if !last_was_sep {
719 normalized.push('-');
720 last_was_sep = true;
721 }
722 } else {
723 normalized.push(ch);
724 last_was_sep = false;
725 }
726 }
727 normalized
728}
729
730crate::register_parser!(
731 "pip requirements file",
732 &[
733 "**/requirements*.txt",
734 "**/*requirements.txt",
735 "**/reqs.txt",
736 "**/*-reqs.txt",
737 "**/*_reqs.txt",
738 "**/*.reqs.txt",
739 "**/requirements*.in",
740 "**/*requirements.in",
741 "**/requires.txt",
742 "**/requirements/*.txt",
743 "**/requirements/*.in",
744 "**/requirements/**/*.txt",
745 "**/requirements/**/*.in",
746 "**/requirements*/*.txt",
747 "**/requirements*/*.in",
748 "**/requirements*/**/*.txt",
749 "**/requirements*/**/*.in"
750 ],
751 "pypi",
752 "Python",
753 Some("https://pip.pypa.io/en/latest/reference/requirements-file-format/"),
754);