1use std::collections::{HashMap, HashSet};
26use std::fs;
27use std::path::{Path, PathBuf};
28
29use crate::parser_warn as warn;
30use packageurl::PackageUrl;
31use serde_json::Value as JsonValue;
32
33use crate::models::{DatasourceId, Dependency, PackageData, PackageType};
34use crate::parsers::pep508::{Pep508Requirement, parse_pep508_requirement};
35
36use super::PackageParser;
37
38pub struct RequirementsTxtParser;
43
44impl PackageParser for RequirementsTxtParser {
45 const PACKAGE_TYPE: PackageType = PackageType::Pypi;
46
47 fn extract_packages(path: &Path) -> Vec<PackageData> {
48 vec![extract_from_requirements_txt(path)]
49 }
50
51 fn is_match(path: &Path) -> bool {
52 let filename = path.file_name().and_then(|name| name.to_str());
53 let parent_name = path
54 .parent()
55 .and_then(|parent| parent.file_name())
56 .and_then(|name| name.to_str());
57
58 if let Some(name) = filename
59 && (is_requirements_txt_filename(name)
60 || (parent_name == Some("requirements") && name.ends_with(".txt")))
61 {
62 return true;
63 }
64
65 false
66 }
67}
68
69fn is_requirements_txt_filename(name: &str) -> bool {
70 if name == "requirements.txt" {
71 return true;
72 }
73
74 let Some(suffix) = name
75 .strip_prefix("requirements")
76 .and_then(|suffix| suffix.strip_suffix(".txt"))
77 else {
78 return false;
79 };
80
81 suffix.is_empty() || suffix.starts_with('-') || suffix.starts_with('_')
82}
83
84struct ParseState {
85 dependencies: Vec<Dependency>,
86 extra_index_urls: Vec<String>,
87 index_url: Option<String>,
88 includes: Vec<String>,
89 constraints: Vec<String>,
90 visited: HashSet<PathBuf>,
91}
92
93fn extract_from_requirements_txt(path: &Path) -> PackageData {
94 let mut state = ParseState {
95 dependencies: Vec::new(),
96 extra_index_urls: Vec::new(),
97 index_url: None,
98 includes: Vec::new(),
99 constraints: Vec::new(),
100 visited: HashSet::new(),
101 };
102
103 let (scope, is_runtime) = scope_from_filename(path);
104
105 parse_requirements_with_includes(path, &mut state, &scope, is_runtime);
106
107 let mut extra_data = HashMap::new();
108 if let Some(url) = state.index_url {
109 extra_data.insert("index_url".to_string(), JsonValue::String(url));
110 }
111 if !state.extra_index_urls.is_empty() {
112 extra_data.insert(
113 "extra_index_urls".to_string(),
114 JsonValue::Array(
115 state
116 .extra_index_urls
117 .into_iter()
118 .map(JsonValue::String)
119 .collect(),
120 ),
121 );
122 }
123 if !state.includes.is_empty() {
124 extra_data.insert(
125 "requirements_includes".to_string(),
126 JsonValue::Array(state.includes.into_iter().map(JsonValue::String).collect()),
127 );
128 }
129 if !state.constraints.is_empty() {
130 extra_data.insert(
131 "constraints".to_string(),
132 JsonValue::Array(
133 state
134 .constraints
135 .into_iter()
136 .map(JsonValue::String)
137 .collect(),
138 ),
139 );
140 }
141
142 let extra_data = if extra_data.is_empty() {
143 None
144 } else {
145 Some(extra_data)
146 };
147
148 default_package_data(state.dependencies, extra_data)
149}
150
151fn parse_requirements_with_includes(
152 path: &Path,
153 state: &mut ParseState,
154 scope: &str,
155 is_runtime: bool,
156) {
157 let abs_path = match path.canonicalize() {
158 Ok(p) => p,
159 Err(_) => {
160 warn!("Cannot resolve path: {:?}", path);
161 return;
162 }
163 };
164
165 if state.visited.contains(&abs_path) {
166 warn!("Circular include detected: {:?}", path);
167 return;
168 }
169
170 state.visited.insert(abs_path.clone());
171
172 let content = match fs::read_to_string(&abs_path) {
173 Ok(c) => c,
174 Err(e) => {
175 warn!("Cannot read file {:?}: {}", abs_path, e);
176 return;
177 }
178 };
179
180 for line in collect_logical_lines(&content) {
181 let cleaned = strip_inline_comment(&line);
182 let trimmed = cleaned.trim();
183 if trimmed.is_empty() || trimmed.starts_with('#') {
184 continue;
185 }
186
187 if let Some(url) = parse_option_value(trimmed, "--extra-index-url") {
188 state.extra_index_urls.push(url);
189 continue;
190 }
191
192 if let Some(url) = parse_option_value(trimmed, "--index-url") {
193 state.index_url = Some(url);
194 continue;
195 }
196
197 if let Some(path_value) = parse_option_value(trimmed, "-r")
198 .or_else(|| parse_option_value(trimmed, "--requirement"))
199 {
200 state.includes.push(path_value.clone());
201 let included_path = abs_path
202 .parent()
203 .unwrap_or_else(|| Path::new("."))
204 .join(&path_value);
205
206 if included_path.exists() {
207 parse_requirements_with_includes(&included_path, state, scope, is_runtime);
208 } else {
209 warn!("Included file not found: {:?}", included_path);
210 }
211 continue;
212 }
213
214 if let Some(path_value) = parse_option_value(trimmed, "-c")
215 .or_else(|| parse_option_value(trimmed, "--constraint"))
216 {
217 state.constraints.push(path_value.clone());
218 let constraint_path = abs_path
219 .parent()
220 .unwrap_or_else(|| Path::new("."))
221 .join(&path_value);
222
223 if constraint_path.exists() {
224 parse_requirements_with_includes(&constraint_path, state, scope, is_runtime);
225 } else {
226 warn!("Constraint file not found: {:?}", constraint_path);
227 }
228 continue;
229 }
230
231 if trimmed.starts_with('-')
232 && !trimmed.starts_with("-e")
233 && !trimmed.starts_with("--editable")
234 {
235 continue;
236 }
237
238 if let Some(dependency) = build_dependency(trimmed, scope, is_runtime) {
239 state.dependencies.push(dependency);
240 }
241 }
242}
243
244fn default_package_data(
245 dependencies: Vec<Dependency>,
246 extra_data: Option<HashMap<String, JsonValue>>,
247) -> PackageData {
248 PackageData {
249 package_type: Some(RequirementsTxtParser::PACKAGE_TYPE),
250 primary_language: Some("Python".to_string()),
251 extra_data,
252 dependencies,
253 datasource_id: Some(DatasourceId::PipRequirements),
254 ..Default::default()
255 }
256}
257
258fn collect_logical_lines(content: &str) -> Vec<String> {
259 let mut lines = Vec::new();
260 let mut current = String::new();
261
262 for raw_line in content.lines() {
263 let line = raw_line.trim_end_matches('\r');
264 let trimmed = line.trim_end();
265 let is_continuation = trimmed.ends_with('\\');
266 let line_without = if is_continuation {
267 trimmed.trim_end_matches('\\')
268 } else {
269 line
270 };
271
272 if !line_without.trim().is_empty() {
273 if !current.is_empty() {
274 current.push(' ');
275 }
276 current.push_str(line_without.trim());
277 }
278
279 if !is_continuation && !current.is_empty() {
280 lines.push(current.trim().to_string());
281 current.clear();
282 }
283 }
284
285 if !current.is_empty() {
286 lines.push(current.trim().to_string());
287 }
288
289 lines
290}
291
292fn strip_inline_comment(line: &str) -> String {
293 let mut in_single = false;
294 let mut in_double = false;
295 for (idx, ch) in line.char_indices() {
296 match ch {
297 '\'' if !in_double => in_single = !in_single,
298 '"' if !in_single => in_double = !in_double,
299 '#' if !in_single && !in_double => {
300 let prefix = &line[..idx];
301 if prefix.trim_end().is_empty() || prefix.ends_with(char::is_whitespace) {
302 return prefix.trim_end().to_string();
303 }
304 }
305 _ => {}
306 }
307 }
308 line.to_string()
309}
310
311fn parse_option_value(line: &str, option: &str) -> Option<String> {
312 let stripped = line.strip_prefix(option)?;
313 let mut rest = stripped.trim();
314 if let Some(rest_stripped) = rest.strip_prefix('=') {
315 rest = rest_stripped.trim();
316 }
317 if rest.is_empty() {
318 None
319 } else {
320 Some(rest.to_string())
321 }
322}
323
324fn scope_from_filename(path: &Path) -> (String, bool) {
325 let filename = path
326 .file_name()
327 .and_then(|name| name.to_str())
328 .unwrap_or_default()
329 .to_ascii_lowercase();
330
331 if filename.contains("dev") {
332 return ("develop".to_string(), false);
333 }
334 if filename.contains("test") {
335 return ("test".to_string(), false);
336 }
337 if filename.contains("doc") {
338 return ("docs".to_string(), false);
339 }
340
341 ("install".to_string(), true)
342}
343
344fn build_dependency(line: &str, scope: &str, is_runtime: bool) -> Option<Dependency> {
345 let trimmed = line.trim();
346 if trimmed.is_empty() {
347 return None;
348 }
349
350 let mut is_editable = false;
351 let mut requirement = trimmed.to_string();
352 let mut extracted_requirement = trimmed.to_string();
353
354 if let Some(rest) = trimmed.strip_prefix("-e") {
355 is_editable = true;
356 requirement = rest.trim().to_string();
357 extracted_requirement = format!("--editable {}", requirement);
358 } else if let Some(rest) = trimmed.strip_prefix("--editable") {
359 is_editable = true;
360 requirement = rest.trim().to_string();
361 extracted_requirement = format!("--editable {}", requirement);
362 }
363
364 let (requirement, hash_options) = split_hash_options(&requirement);
365 let requirement = requirement.trim();
366 if requirement.is_empty() {
367 return None;
368 }
369
370 let parsed = parse_requirement(requirement);
371
372 let pinned_version = parsed
373 .specifiers
374 .as_deref()
375 .and_then(extract_pinned_version);
376 let is_pinned = pinned_version.is_some();
377
378 let purl = parsed
379 .name
380 .as_ref()
381 .and_then(|name| create_pypi_purl(name, pinned_version.as_deref()));
382
383 let mut extra_data = HashMap::new();
384 extra_data.insert("is_editable".to_string(), JsonValue::Bool(is_editable));
385 extra_data.insert(
386 "link".to_string(),
387 parsed
388 .link
389 .clone()
390 .map(JsonValue::String)
391 .unwrap_or(JsonValue::Null),
392 );
393 extra_data.insert(
394 "hash_options".to_string(),
395 JsonValue::Array(hash_options.into_iter().map(JsonValue::String).collect()),
396 );
397 extra_data.insert("is_constraint".to_string(), JsonValue::Bool(false));
398 extra_data.insert(
399 "is_archive".to_string(),
400 parsed
401 .is_archive
402 .map(JsonValue::Bool)
403 .unwrap_or(JsonValue::Null),
404 );
405 extra_data.insert("is_wheel".to_string(), JsonValue::Bool(parsed.is_wheel));
406 extra_data.insert(
407 "is_url".to_string(),
408 parsed
409 .is_url
410 .map(JsonValue::Bool)
411 .unwrap_or(JsonValue::Null),
412 );
413 extra_data.insert(
414 "is_vcs_url".to_string(),
415 parsed
416 .is_vcs_url
417 .map(JsonValue::Bool)
418 .unwrap_or(JsonValue::Null),
419 );
420 extra_data.insert(
421 "is_name_at_url".to_string(),
422 JsonValue::Bool(parsed.is_name_at_url),
423 );
424 extra_data.insert(
425 "is_local_path".to_string(),
426 parsed
427 .is_local_path
428 .map(|value| value || is_editable)
429 .map(JsonValue::Bool)
430 .unwrap_or(JsonValue::Null),
431 );
432
433 if let Some(marker) = parsed.marker {
434 extra_data.insert("markers".to_string(), JsonValue::String(marker));
435 }
436
437 Some(Dependency {
438 purl,
439 extracted_requirement: Some(extracted_requirement),
440 scope: Some(scope.to_string()),
441 is_runtime: Some(is_runtime),
442 is_optional: Some(false),
443 is_pinned: Some(is_pinned),
444 is_direct: Some(true),
445 resolved_package: None,
446 extra_data: Some(extra_data),
447 })
448}
449
450fn split_hash_options(input: &str) -> (String, Vec<String>) {
451 let mut filtered = Vec::new();
452 let mut hashes = Vec::new();
453
454 for token in input.split_whitespace() {
455 if let Some(value) = token.strip_prefix("--hash=") {
456 if !value.is_empty() {
457 hashes.push(value.to_string());
458 }
459 } else {
460 filtered.push(token);
461 }
462 }
463
464 (filtered.join(" "), hashes)
465}
466
467struct ParsedRequirement {
468 name: Option<String>,
469 specifiers: Option<String>,
470 marker: Option<String>,
471 link: Option<String>,
472 is_url: Option<bool>,
473 is_vcs_url: Option<bool>,
474 is_local_path: Option<bool>,
475 is_name_at_url: bool,
476 is_archive: Option<bool>,
477 is_wheel: bool,
478}
479
480fn parse_requirement(input: &str) -> ParsedRequirement {
481 if let Some(parsed) = parse_pep508_requirement(input) {
482 if let Some(url) = parsed.url.clone() {
483 return parsed_with_link(parsed, &url);
484 }
485
486 if !is_link_like(input) {
487 let name = Some(normalize_pypi_name(&parsed.name));
488 return ParsedRequirement {
489 name,
490 specifiers: parsed.specifiers,
491 marker: parsed.marker,
492 link: None,
493 is_url: None,
494 is_vcs_url: None,
495 is_local_path: None,
496 is_name_at_url: false,
497 is_archive: None,
498 is_wheel: false,
499 };
500 }
501 }
502
503 if let Some((name, link)) = parse_link_with_name(input) {
504 let normalized_name = normalize_pypi_name(&name);
505 let link_info = parse_link_flags(&link);
506 return ParsedRequirement {
507 name: Some(normalized_name),
508 specifiers: None,
509 marker: None,
510 link: Some(link),
511 is_url: Some(link_info.is_url),
512 is_vcs_url: Some(link_info.is_vcs_url),
513 is_local_path: Some(link_info.is_local_path),
514 is_name_at_url: link_info.is_name_at_url,
515 is_archive: link_info.is_archive,
516 is_wheel: link_info.is_wheel,
517 };
518 }
519
520 let link_info = parse_link_flags(input);
521 ParsedRequirement {
522 name: None,
523 specifiers: None,
524 marker: None,
525 link: Some(input.to_string()),
526 is_url: Some(link_info.is_url),
527 is_vcs_url: Some(link_info.is_vcs_url),
528 is_local_path: Some(link_info.is_local_path),
529 is_name_at_url: link_info.is_name_at_url,
530 is_archive: link_info.is_archive,
531 is_wheel: link_info.is_wheel,
532 }
533}
534
535fn parsed_with_link(parsed: Pep508Requirement, link: &str) -> ParsedRequirement {
536 let name = normalize_pypi_name(&parsed.name);
537 let link_info = parse_link_flags(link);
538 ParsedRequirement {
539 name: Some(name),
540 specifiers: parsed.specifiers,
541 marker: parsed.marker,
542 link: Some(link.to_string()),
543 is_url: Some(link_info.is_url),
544 is_vcs_url: Some(link_info.is_vcs_url),
545 is_local_path: Some(link_info.is_local_path),
546 is_name_at_url: parsed.is_name_at_url,
547 is_archive: link_info.is_archive,
548 is_wheel: link_info.is_wheel,
549 }
550}
551
552fn parse_link_with_name(input: &str) -> Option<(String, String)> {
553 if let Some(egg) = extract_egg_name(input) {
554 return Some((egg, input.to_string()));
555 }
556 None
557}
558
559fn extract_egg_name(input: &str) -> Option<String> {
560 let fragment = input.split('#').nth(1)?;
561 let egg_part = fragment.strip_prefix("egg=")?;
562 let name_part = egg_part.split('&').next()?.trim();
563 if name_part.is_empty() {
564 return None;
565 }
566 let (name, _extras, _) = parse_pep508_requirement(name_part)
567 .map(|parsed| (parsed.name, parsed.extras, parsed.specifiers))
568 .unwrap_or_else(|| (name_part.to_string(), Vec::new(), None));
569 Some(name)
570}
571
572struct LinkFlags {
573 is_url: bool,
574 is_vcs_url: bool,
575 is_local_path: bool,
576 is_name_at_url: bool,
577 is_archive: Option<bool>,
578 is_wheel: bool,
579}
580
581fn parse_link_flags(link: &str) -> LinkFlags {
582 let trimmed = link.trim();
583 let is_vcs_url = trimmed.starts_with("git+")
584 || trimmed.starts_with("hg+")
585 || trimmed.starts_with("svn+")
586 || trimmed.starts_with("bzr+");
587 let has_scheme = trimmed.contains("://") || trimmed.starts_with("file:");
588 let is_local_path = trimmed.starts_with("./")
589 || trimmed.starts_with("../")
590 || trimmed.starts_with('/')
591 || trimmed.starts_with('~')
592 || trimmed.starts_with("file:");
593
594 let is_wheel = trimmed.ends_with(".whl");
595 let is_archive = if is_wheel
596 || trimmed.ends_with(".zip")
597 || trimmed.ends_with(".tar.gz")
598 || trimmed.ends_with(".tgz")
599 || trimmed.ends_with(".tar.bz2")
600 || trimmed.ends_with(".tar")
601 {
602 Some(true)
603 } else if has_scheme || is_local_path {
604 Some(false)
605 } else {
606 None
607 };
608
609 LinkFlags {
610 is_url: has_scheme || is_vcs_url,
611 is_vcs_url,
612 is_local_path,
613 is_name_at_url: false,
614 is_archive,
615 is_wheel,
616 }
617}
618
619fn is_link_like(input: &str) -> bool {
620 let trimmed = input.trim();
621 trimmed.starts_with("git+")
622 || trimmed.starts_with("hg+")
623 || trimmed.starts_with("svn+")
624 || trimmed.starts_with("bzr+")
625 || trimmed.starts_with("file:")
626 || trimmed.contains("://")
627 || trimmed.starts_with("./")
628 || trimmed.starts_with("../")
629 || trimmed.starts_with('/')
630 || trimmed.starts_with('~')
631}
632
633fn extract_pinned_version(specifiers: &str) -> Option<String> {
634 let trimmed = specifiers.trim();
635 if trimmed.contains(',') {
636 return None;
637 }
638
639 let stripped = if let Some(version) = trimmed.strip_prefix("==") {
640 version
641 } else if let Some(version) = trimmed.strip_prefix("===") {
642 version
643 } else {
644 return None;
645 };
646
647 let version = stripped.trim();
648 if version.is_empty() || version.contains('*') {
649 None
650 } else {
651 Some(version.to_string())
652 }
653}
654
655fn create_pypi_purl(name: &str, version: Option<&str>) -> Option<String> {
656 let mut purl = PackageUrl::new(RequirementsTxtParser::PACKAGE_TYPE.as_str(), name).ok()?;
657 if let Some(version) = version {
658 purl.with_version(version).ok()?;
659 }
660 Some(purl.to_string())
661}
662
663fn normalize_pypi_name(name: &str) -> String {
664 let lower = name.trim().to_ascii_lowercase();
665 let mut normalized = String::new();
666 let mut last_was_sep = false;
667 for ch in lower.chars() {
668 let is_sep = matches!(ch, '-' | '_' | '.');
669 if is_sep {
670 if !last_was_sep {
671 normalized.push('-');
672 last_was_sep = true;
673 }
674 } else {
675 normalized.push(ch);
676 last_was_sep = false;
677 }
678 }
679 normalized
680}
681
682crate::register_parser!(
683 "pip requirements file",
684 &[
685 "**/requirements*.txt",
686 "**/requirements*.in",
687 "**/requirements/*.txt"
688 ],
689 "pypi",
690 "Python",
691 Some("https://pip.pypa.io/en/latest/reference/requirements-file-format/"),
692);