1use std::collections::{HashMap, HashSet};
26use std::fs;
27use std::path::{Path, PathBuf};
28
29use crate::parser_warn as warn;
30use packageurl::PackageUrl;
31use serde_json::Value as JsonValue;
32
33use crate::models::{DatasourceId, Dependency, PackageData, PackageType};
34use crate::parsers::pep508::{Pep508Requirement, parse_pep508_requirement};
35
36use super::PackageParser;
37
38pub struct RequirementsTxtParser;
43
44impl PackageParser for RequirementsTxtParser {
45 const PACKAGE_TYPE: PackageType = PackageType::Pypi;
46
47 fn extract_packages(path: &Path) -> Vec<PackageData> {
48 vec![extract_from_requirements_txt(path)]
49 }
50
51 fn is_match(path: &Path) -> bool {
52 let filename = path.file_name().and_then(|name| name.to_str());
53 let Some(name) = filename else {
54 return false;
55 };
56
57 is_requirements_txt_filename(name)
58 || (is_requirements_like_extension(name) && has_requirements_like_ancestor(path))
59 }
60}
61
62fn is_requirements_txt_filename(name: &str) -> bool {
63 if name == "requirements.txt" || name == "requires.txt" {
64 return true;
65 }
66
67 let Some(stem) = name
68 .strip_suffix(".txt")
69 .or_else(|| name.strip_suffix(".in"))
70 else {
71 return false;
72 };
73
74 stem == "requirements" || stem.starts_with("requirements") || stem.ends_with("requirements")
75}
76
77fn is_requirements_like_extension(name: &str) -> bool {
78 name.ends_with(".txt") || name.ends_with(".in")
79}
80
81fn has_requirements_like_ancestor(path: &Path) -> bool {
82 path.parent()
83 .into_iter()
84 .flat_map(Path::ancestors)
85 .filter_map(|ancestor| ancestor.file_name())
86 .filter_map(|name| name.to_str())
87 .any(is_requirements_like_dir_name)
88}
89
90fn is_requirements_like_dir_name(name: &str) -> bool {
91 name == "requirements" || name.starts_with("requirements") || name.ends_with("requirements")
92}
93
94struct ParseState {
95 dependencies: Vec<Dependency>,
96 extra_index_urls: Vec<String>,
97 index_url: Option<String>,
98 includes: Vec<String>,
99 constraints: Vec<String>,
100 visited: HashSet<PathBuf>,
101}
102
103fn extract_from_requirements_txt(path: &Path) -> PackageData {
104 let mut state = ParseState {
105 dependencies: Vec::new(),
106 extra_index_urls: Vec::new(),
107 index_url: None,
108 includes: Vec::new(),
109 constraints: Vec::new(),
110 visited: HashSet::new(),
111 };
112
113 let (scope, is_runtime) = scope_from_filename(path);
114
115 parse_requirements_with_includes(path, &mut state, &scope, is_runtime);
116
117 let mut extra_data = HashMap::new();
118 if let Some(url) = state.index_url {
119 extra_data.insert("index_url".to_string(), JsonValue::String(url));
120 }
121 if !state.extra_index_urls.is_empty() {
122 extra_data.insert(
123 "extra_index_urls".to_string(),
124 JsonValue::Array(
125 state
126 .extra_index_urls
127 .into_iter()
128 .map(JsonValue::String)
129 .collect(),
130 ),
131 );
132 }
133 if !state.includes.is_empty() {
134 extra_data.insert(
135 "requirements_includes".to_string(),
136 JsonValue::Array(state.includes.into_iter().map(JsonValue::String).collect()),
137 );
138 }
139 if !state.constraints.is_empty() {
140 extra_data.insert(
141 "constraints".to_string(),
142 JsonValue::Array(
143 state
144 .constraints
145 .into_iter()
146 .map(JsonValue::String)
147 .collect(),
148 ),
149 );
150 }
151
152 let extra_data = if extra_data.is_empty() {
153 None
154 } else {
155 Some(extra_data)
156 };
157
158 default_package_data(state.dependencies, extra_data)
159}
160
161fn parse_requirements_with_includes(
162 path: &Path,
163 state: &mut ParseState,
164 scope: &str,
165 is_runtime: bool,
166) {
167 let abs_path = match path.canonicalize() {
168 Ok(p) => p,
169 Err(_) => {
170 warn!("Cannot resolve path: {:?}", path);
171 return;
172 }
173 };
174
175 if state.visited.contains(&abs_path) {
176 warn!("Circular include detected: {:?}", path);
177 return;
178 }
179
180 state.visited.insert(abs_path.clone());
181
182 let content = match fs::read_to_string(&abs_path) {
183 Ok(c) => c,
184 Err(e) => {
185 warn!("Cannot read file {:?}: {}", abs_path, e);
186 return;
187 }
188 };
189
190 for line in collect_logical_lines(&content) {
191 let cleaned = strip_inline_comment(&line);
192 let trimmed = cleaned.trim();
193 if trimmed.is_empty() || trimmed.starts_with('#') {
194 continue;
195 }
196
197 if let Some(url) = parse_option_value(trimmed, "--extra-index-url") {
198 state.extra_index_urls.push(url);
199 continue;
200 }
201
202 if let Some(url) = parse_option_value(trimmed, "--index-url") {
203 state.index_url = Some(url);
204 continue;
205 }
206
207 if let Some(path_value) = parse_option_value(trimmed, "-r")
208 .or_else(|| parse_option_value(trimmed, "--requirement"))
209 {
210 state.includes.push(path_value.clone());
211 let included_path = abs_path
212 .parent()
213 .unwrap_or_else(|| Path::new("."))
214 .join(&path_value);
215
216 if included_path.exists() {
217 parse_requirements_with_includes(&included_path, state, scope, is_runtime);
218 } else {
219 warn!("Included file not found: {:?}", included_path);
220 }
221 continue;
222 }
223
224 if let Some(path_value) = parse_option_value(trimmed, "-c")
225 .or_else(|| parse_option_value(trimmed, "--constraint"))
226 {
227 state.constraints.push(path_value.clone());
228 let constraint_path = abs_path
229 .parent()
230 .unwrap_or_else(|| Path::new("."))
231 .join(&path_value);
232
233 if constraint_path.exists() {
234 parse_requirements_with_includes(&constraint_path, state, scope, is_runtime);
235 } else {
236 warn!("Constraint file not found: {:?}", constraint_path);
237 }
238 continue;
239 }
240
241 if trimmed.starts_with('-')
242 && !trimmed.starts_with("-e")
243 && !trimmed.starts_with("--editable")
244 {
245 continue;
246 }
247
248 if let Some(dependency) = build_dependency(trimmed, scope, is_runtime) {
249 state.dependencies.push(dependency);
250 }
251 }
252}
253
254fn default_package_data(
255 dependencies: Vec<Dependency>,
256 extra_data: Option<HashMap<String, JsonValue>>,
257) -> PackageData {
258 PackageData {
259 package_type: Some(RequirementsTxtParser::PACKAGE_TYPE),
260 primary_language: Some("Python".to_string()),
261 extra_data,
262 dependencies,
263 datasource_id: Some(DatasourceId::PipRequirements),
264 ..Default::default()
265 }
266}
267
268fn collect_logical_lines(content: &str) -> Vec<String> {
269 let mut lines = Vec::new();
270 let mut current = String::new();
271
272 for raw_line in content.lines() {
273 let line = raw_line.trim_end_matches('\r');
274 let trimmed = line.trim_end();
275 let is_continuation = trimmed.ends_with('\\');
276 let line_without = if is_continuation {
277 trimmed.trim_end_matches('\\')
278 } else {
279 line
280 };
281
282 if !line_without.trim().is_empty() {
283 if !current.is_empty() {
284 current.push(' ');
285 }
286 current.push_str(line_without.trim());
287 }
288
289 if !is_continuation && !current.is_empty() {
290 lines.push(current.trim().to_string());
291 current.clear();
292 }
293 }
294
295 if !current.is_empty() {
296 lines.push(current.trim().to_string());
297 }
298
299 lines
300}
301
302fn strip_inline_comment(line: &str) -> String {
303 let mut in_single = false;
304 let mut in_double = false;
305 for (idx, ch) in line.char_indices() {
306 match ch {
307 '\'' if !in_double => in_single = !in_single,
308 '"' if !in_single => in_double = !in_double,
309 '#' if !in_single && !in_double => {
310 let prefix = &line[..idx];
311 if prefix.trim_end().is_empty() || prefix.ends_with(char::is_whitespace) {
312 return prefix.trim_end().to_string();
313 }
314 }
315 _ => {}
316 }
317 }
318 line.to_string()
319}
320
321fn parse_option_value(line: &str, option: &str) -> Option<String> {
322 let stripped = line.strip_prefix(option)?;
323 let mut rest = stripped.trim();
324 if let Some(rest_stripped) = rest.strip_prefix('=') {
325 rest = rest_stripped.trim();
326 }
327 if rest.is_empty() {
328 None
329 } else {
330 Some(rest.to_string())
331 }
332}
333
334fn scope_from_filename(path: &Path) -> (String, bool) {
335 let filename = path
336 .file_name()
337 .and_then(|name| name.to_str())
338 .unwrap_or_default()
339 .to_ascii_lowercase();
340
341 if filename.contains("dev") {
342 return ("develop".to_string(), false);
343 }
344 if filename.contains("test") {
345 return ("test".to_string(), false);
346 }
347 if filename.contains("doc") {
348 return ("docs".to_string(), false);
349 }
350
351 ("install".to_string(), true)
352}
353
354fn build_dependency(line: &str, scope: &str, is_runtime: bool) -> Option<Dependency> {
355 let trimmed = line.trim();
356 if trimmed.is_empty() {
357 return None;
358 }
359
360 let mut is_editable = false;
361 let mut requirement = trimmed.to_string();
362 let mut extracted_requirement = trimmed.to_string();
363
364 if let Some(rest) = trimmed.strip_prefix("-e") {
365 is_editable = true;
366 requirement = rest.trim().to_string();
367 extracted_requirement = format!("--editable {}", requirement);
368 } else if let Some(rest) = trimmed.strip_prefix("--editable") {
369 is_editable = true;
370 requirement = rest.trim().to_string();
371 extracted_requirement = format!("--editable {}", requirement);
372 }
373
374 let (requirement, hash_options) = split_hash_options(&requirement);
375 let requirement = requirement.trim();
376 if requirement.is_empty() {
377 return None;
378 }
379
380 if looks_like_hash_only_requirement(requirement) {
381 return None;
382 }
383
384 let parsed = parse_requirement(requirement);
385
386 let pinned_version = parsed
387 .specifiers
388 .as_deref()
389 .and_then(extract_pinned_version);
390 let is_pinned = pinned_version.is_some();
391
392 let purl = parsed
393 .name
394 .as_ref()
395 .and_then(|name| create_pypi_purl(name, pinned_version.as_deref()));
396
397 let mut extra_data = HashMap::new();
398 extra_data.insert("is_editable".to_string(), JsonValue::Bool(is_editable));
399 extra_data.insert(
400 "link".to_string(),
401 parsed
402 .link
403 .clone()
404 .map(JsonValue::String)
405 .unwrap_or(JsonValue::Null),
406 );
407 extra_data.insert(
408 "hash_options".to_string(),
409 JsonValue::Array(hash_options.into_iter().map(JsonValue::String).collect()),
410 );
411 extra_data.insert("is_constraint".to_string(), JsonValue::Bool(false));
412 extra_data.insert(
413 "is_archive".to_string(),
414 parsed
415 .is_archive
416 .map(JsonValue::Bool)
417 .unwrap_or(JsonValue::Null),
418 );
419 extra_data.insert("is_wheel".to_string(), JsonValue::Bool(parsed.is_wheel));
420 extra_data.insert(
421 "is_url".to_string(),
422 parsed
423 .is_url
424 .map(JsonValue::Bool)
425 .unwrap_or(JsonValue::Null),
426 );
427 extra_data.insert(
428 "is_vcs_url".to_string(),
429 parsed
430 .is_vcs_url
431 .map(JsonValue::Bool)
432 .unwrap_or(JsonValue::Null),
433 );
434 extra_data.insert(
435 "is_name_at_url".to_string(),
436 JsonValue::Bool(parsed.is_name_at_url),
437 );
438 extra_data.insert(
439 "is_local_path".to_string(),
440 parsed
441 .is_local_path
442 .map(|value| value || is_editable)
443 .map(JsonValue::Bool)
444 .unwrap_or(JsonValue::Null),
445 );
446
447 if let Some(marker) = parsed.marker {
448 extra_data.insert("markers".to_string(), JsonValue::String(marker));
449 }
450
451 Some(Dependency {
452 purl,
453 extracted_requirement: Some(extracted_requirement),
454 scope: Some(scope.to_string()),
455 is_runtime: Some(is_runtime),
456 is_optional: Some(false),
457 is_pinned: Some(is_pinned),
458 is_direct: Some(true),
459 resolved_package: None,
460 extra_data: Some(extra_data),
461 })
462}
463
464fn looks_like_hash_only_requirement(requirement: &str) -> bool {
465 let trimmed = requirement.trim();
466 if !matches!(trimmed.len(), 32 | 40 | 64 | 96 | 128) {
467 return false;
468 }
469
470 if trimmed.contains(char::is_whitespace)
471 || trimmed.contains(['[', ']', '@', ';', '/', '\\'])
472 || trimmed.contains("==")
473 || trimmed.contains("://")
474 || trimmed.contains("git+")
475 {
476 return false;
477 }
478
479 trimmed.chars().all(|ch| ch.is_ascii_hexdigit())
480}
481
482fn split_hash_options(input: &str) -> (String, Vec<String>) {
483 let mut filtered = Vec::new();
484 let mut hashes = Vec::new();
485
486 for token in input.split_whitespace() {
487 if let Some(value) = token.strip_prefix("--hash=") {
488 if !value.is_empty() {
489 hashes.push(value.to_string());
490 }
491 } else {
492 filtered.push(token);
493 }
494 }
495
496 (filtered.join(" "), hashes)
497}
498
499struct ParsedRequirement {
500 name: Option<String>,
501 specifiers: Option<String>,
502 marker: Option<String>,
503 link: Option<String>,
504 is_url: Option<bool>,
505 is_vcs_url: Option<bool>,
506 is_local_path: Option<bool>,
507 is_name_at_url: bool,
508 is_archive: Option<bool>,
509 is_wheel: bool,
510}
511
512fn parse_requirement(input: &str) -> ParsedRequirement {
513 if let Some(parsed) = parse_pep508_requirement(input) {
514 if let Some(url) = parsed.url.clone() {
515 return parsed_with_link(parsed, &url);
516 }
517
518 if !is_link_like(input) {
519 let name = Some(normalize_pypi_name(&parsed.name));
520 return ParsedRequirement {
521 name,
522 specifiers: parsed.specifiers,
523 marker: parsed.marker,
524 link: None,
525 is_url: None,
526 is_vcs_url: None,
527 is_local_path: None,
528 is_name_at_url: false,
529 is_archive: None,
530 is_wheel: false,
531 };
532 }
533 }
534
535 if let Some((name, link)) = parse_link_with_name(input) {
536 let normalized_name = normalize_pypi_name(&name);
537 let link_info = parse_link_flags(&link);
538 return ParsedRequirement {
539 name: Some(normalized_name),
540 specifiers: None,
541 marker: None,
542 link: Some(link),
543 is_url: Some(link_info.is_url),
544 is_vcs_url: Some(link_info.is_vcs_url),
545 is_local_path: Some(link_info.is_local_path),
546 is_name_at_url: link_info.is_name_at_url,
547 is_archive: link_info.is_archive,
548 is_wheel: link_info.is_wheel,
549 };
550 }
551
552 let link_info = parse_link_flags(input);
553 ParsedRequirement {
554 name: None,
555 specifiers: None,
556 marker: None,
557 link: Some(input.to_string()),
558 is_url: Some(link_info.is_url),
559 is_vcs_url: Some(link_info.is_vcs_url),
560 is_local_path: Some(link_info.is_local_path),
561 is_name_at_url: link_info.is_name_at_url,
562 is_archive: link_info.is_archive,
563 is_wheel: link_info.is_wheel,
564 }
565}
566
567fn parsed_with_link(parsed: Pep508Requirement, link: &str) -> ParsedRequirement {
568 let name = normalize_pypi_name(&parsed.name);
569 let link_info = parse_link_flags(link);
570 ParsedRequirement {
571 name: Some(name),
572 specifiers: parsed.specifiers,
573 marker: parsed.marker,
574 link: Some(link.to_string()),
575 is_url: Some(link_info.is_url),
576 is_vcs_url: Some(link_info.is_vcs_url),
577 is_local_path: Some(link_info.is_local_path),
578 is_name_at_url: parsed.is_name_at_url,
579 is_archive: link_info.is_archive,
580 is_wheel: link_info.is_wheel,
581 }
582}
583
584fn parse_link_with_name(input: &str) -> Option<(String, String)> {
585 if let Some(egg) = extract_egg_name(input) {
586 return Some((egg, input.to_string()));
587 }
588 None
589}
590
591fn extract_egg_name(input: &str) -> Option<String> {
592 let fragment = input.split('#').nth(1)?;
593 let egg_part = fragment.strip_prefix("egg=")?;
594 let name_part = egg_part.split('&').next()?.trim();
595 if name_part.is_empty() {
596 return None;
597 }
598 let (name, _extras, _) = parse_pep508_requirement(name_part)
599 .map(|parsed| (parsed.name, parsed.extras, parsed.specifiers))
600 .unwrap_or_else(|| (name_part.to_string(), Vec::new(), None));
601 Some(name)
602}
603
604struct LinkFlags {
605 is_url: bool,
606 is_vcs_url: bool,
607 is_local_path: bool,
608 is_name_at_url: bool,
609 is_archive: Option<bool>,
610 is_wheel: bool,
611}
612
613fn parse_link_flags(link: &str) -> LinkFlags {
614 let trimmed = link.trim();
615 let is_vcs_url = trimmed.starts_with("git+")
616 || trimmed.starts_with("hg+")
617 || trimmed.starts_with("svn+")
618 || trimmed.starts_with("bzr+");
619 let has_scheme = trimmed.contains("://") || trimmed.starts_with("file:");
620 let is_local_path = trimmed.starts_with("./")
621 || trimmed.starts_with("../")
622 || trimmed.starts_with('/')
623 || trimmed.starts_with('~')
624 || trimmed.starts_with("file:");
625
626 let is_wheel = trimmed.ends_with(".whl");
627 let is_archive = if is_wheel
628 || trimmed.ends_with(".zip")
629 || trimmed.ends_with(".tar.gz")
630 || trimmed.ends_with(".tgz")
631 || trimmed.ends_with(".tar.bz2")
632 || trimmed.ends_with(".tar")
633 {
634 Some(true)
635 } else if has_scheme || is_local_path {
636 Some(false)
637 } else {
638 None
639 };
640
641 LinkFlags {
642 is_url: has_scheme || is_vcs_url,
643 is_vcs_url,
644 is_local_path,
645 is_name_at_url: false,
646 is_archive,
647 is_wheel,
648 }
649}
650
651fn is_link_like(input: &str) -> bool {
652 let trimmed = input.trim();
653 trimmed.starts_with("git+")
654 || trimmed.starts_with("hg+")
655 || trimmed.starts_with("svn+")
656 || trimmed.starts_with("bzr+")
657 || trimmed.starts_with("file:")
658 || trimmed.contains("://")
659 || trimmed.starts_with("./")
660 || trimmed.starts_with("../")
661 || trimmed.starts_with('/')
662 || trimmed.starts_with('~')
663}
664
665fn extract_pinned_version(specifiers: &str) -> Option<String> {
666 let trimmed = specifiers.trim();
667 if trimmed.contains(',') {
668 return None;
669 }
670
671 let stripped = if let Some(version) = trimmed.strip_prefix("==") {
672 version
673 } else if let Some(version) = trimmed.strip_prefix("===") {
674 version
675 } else {
676 return None;
677 };
678
679 let version = stripped.trim();
680 if version.is_empty() {
681 None
682 } else {
683 Some(version.to_string())
684 }
685}
686
687fn create_pypi_purl(name: &str, version: Option<&str>) -> Option<String> {
688 PackageUrl::new(RequirementsTxtParser::PACKAGE_TYPE.as_str(), name)
689 .ok()
690 .map(|_| match version {
691 Some(version) => format!("pkg:pypi/{name}@{}", encode_pypi_purl_version(version)),
692 None => format!("pkg:pypi/{name}"),
693 })
694}
695
696fn encode_pypi_purl_version(version: &str) -> String {
697 version.replace('*', "%2A")
698}
699
700fn normalize_pypi_name(name: &str) -> String {
701 let lower = name.trim().to_ascii_lowercase();
702 let mut normalized = String::new();
703 let mut last_was_sep = false;
704 for ch in lower.chars() {
705 let is_sep = matches!(ch, '-' | '_' | '.');
706 if is_sep {
707 if !last_was_sep {
708 normalized.push('-');
709 last_was_sep = true;
710 }
711 } else {
712 normalized.push(ch);
713 last_was_sep = false;
714 }
715 }
716 normalized
717}
718
719crate::register_parser!(
720 "pip requirements file",
721 &[
722 "**/requirements*.txt",
723 "**/*requirements.txt",
724 "**/requirements*.in",
725 "**/*requirements.in",
726 "**/requires.txt",
727 "**/requirements/*.txt",
728 "**/requirements/*.in",
729 "**/requirements/**/*.txt",
730 "**/requirements/**/*.in",
731 "**/requirements*/*.txt",
732 "**/requirements*/*.in",
733 "**/requirements*/**/*.txt",
734 "**/requirements*/**/*.in"
735 ],
736 "pypi",
737 "Python",
738 Some("https://pip.pypa.io/en/latest/reference/requirements-file-format/"),
739);