1pub mod types;
20
21pub mod client;
23
24pub mod converters;
26
27pub mod frontmatter;
29
30pub mod detection;
32
33pub mod config;
35
36pub mod utils;
38
39use crate::client::HttpClient;
40use crate::converters::ConverterRegistry;
41use crate::detection::UrlDetector;
42use crate::types::{Markdown, MarkdownError, UrlType};
43use tracing::{debug, error, info, instrument, warn};
44
45pub struct MarkdownDown {
82 config: crate::config::Config,
83 detector: UrlDetector,
84 registry: ConverterRegistry,
85}
86
87impl MarkdownDown {
88 pub fn new() -> Self {
98 Self {
99 config: crate::config::Config::default(),
100 detector: UrlDetector::new(),
101 registry: ConverterRegistry::new(),
102 }
103 }
104
105 pub fn with_config(config: crate::config::Config) -> Self {
123 let http_client = HttpClient::with_config(&config.http, &config.auth);
125
126 let registry =
128 ConverterRegistry::with_config(http_client, config.html.clone(), &config.output);
129
130 Self {
131 config,
132 detector: UrlDetector::new(),
133 registry,
134 }
135 }
136
137 #[instrument(skip(self), fields(url_type))]
171 pub async fn convert_url(&self, url: &str) -> Result<Markdown, MarkdownError> {
172 info!("Starting URL conversion for: {}", url);
173
174 debug!("Normalizing URL");
176 let normalized_url = self.detector.normalize_url(url)?;
177 debug!("Normalized URL: {}", normalized_url);
178
179 debug!("Detecting URL type");
181 let url_type = self.detector.detect_type(&normalized_url)?;
182 tracing::Span::current().record("url_type", format!("{url_type}"));
183 info!("Detected URL type: {}", url_type);
184
185 debug!("Looking up converter for type: {}", url_type);
187 let converter = self.registry.get_converter(&url_type).ok_or_else(|| {
188 error!("No converter available for URL type: {}", url_type);
189 MarkdownError::LegacyConfigurationError {
190 message: format!("No converter available for URL type: {url_type}"),
191 }
192 })?;
193 debug!("Found converter for type: {}", url_type);
194
195 info!("Starting conversion with {} converter", url_type);
197 match converter.convert(&normalized_url).await {
198 Ok(result) => {
199 info!(
200 "Successfully converted URL to markdown ({} chars)",
201 result.as_str().len()
202 );
203 Ok(result)
204 }
205 Err(e) => {
206 error!("Primary converter failed: {}", e);
207
208 if e.is_recoverable() && url_type != UrlType::Html {
210 warn!("Attempting HTML fallback conversion for recoverable error");
211
212 if let Some(html_converter) = self.registry.get_converter(&UrlType::Html) {
214 match html_converter.convert(&normalized_url).await {
215 Ok(fallback_result) => {
216 warn!(
217 "Fallback HTML conversion succeeded ({} chars)",
218 fallback_result.as_str().len()
219 );
220 return Ok(fallback_result);
221 }
222 Err(fallback_error) => {
223 error!("Fallback HTML conversion also failed: {}", fallback_error);
224 }
225 }
226 }
227 }
228
229 Err(e)
230 }
231 }
232 }
233
234 pub fn config(&self) -> &crate::config::Config {
236 &self.config
237 }
238
239 pub fn detector(&self) -> &UrlDetector {
241 &self.detector
242 }
243
244 pub fn registry(&self) -> &ConverterRegistry {
246 &self.registry
247 }
248
249 pub fn supported_types(&self) -> Vec<crate::types::UrlType> {
251 self.registry.supported_types()
252 }
253}
254
255impl Default for MarkdownDown {
256 fn default() -> Self {
257 Self::new()
258 }
259}
260
261pub async fn convert_url(url: &str) -> Result<Markdown, MarkdownError> {
285 MarkdownDown::new().convert_url(url).await
286}
287
288pub async fn convert_url_with_config(
315 url: &str,
316 config: crate::config::Config,
317) -> Result<Markdown, MarkdownError> {
318 MarkdownDown::with_config(config).convert_url(url).await
319}
320
321pub fn detect_url_type(url: &str) -> Result<crate::types::UrlType, MarkdownError> {
343 let detector = UrlDetector::new();
344 detector.detect_type(url)
345}
346
347pub use config::Config;
349pub use converters::{Converter, HtmlConverter};
350pub use types::{Frontmatter, Url};
351
352pub const VERSION: &str = env!("CARGO_PKG_VERSION");
354
355#[cfg(test)]
356mod tests {
357 use super::*;
358 use crate::converters::GitHubConverter;
359 use crate::detection::UrlDetector;
360 use crate::types::UrlType;
361 use std::time::Duration;
362
363 #[test]
364 fn test_version_available() {
365 assert!(VERSION.chars().any(|c| c.is_ascii_digit()));
367 assert!(VERSION.contains('.'));
368 let parts: Vec<&str> = VERSION.split('.').collect();
370 assert!(
371 parts.len() >= 2,
372 "Version should have at least major.minor format"
373 );
374 }
375
376 #[test]
377 fn test_markdowndown_with_default_config() {
378 let md = MarkdownDown::new();
380
381 let config = md.config();
383 assert_eq!(config.http.timeout, Duration::from_secs(30));
384 assert_eq!(config.http.max_retries, 3);
385 assert_eq!(config.http.retry_delay, Duration::from_secs(1));
386 assert_eq!(config.http.max_redirects, 10);
387 assert!(config.auth.github_token.is_none());
388 assert!(config.auth.office365_token.is_none());
389 assert!(config.auth.google_api_key.is_none());
390 assert!(config.output.include_frontmatter);
391 assert_eq!(config.output.max_consecutive_blank_lines, 2);
392 }
393
394 #[test]
395 fn test_markdowndown_with_custom_config() {
396 let config = Config::builder()
398 .timeout_seconds(60)
399 .user_agent("TestApp/1.0")
400 .max_retries(5)
401 .github_token("test_token")
402 .include_frontmatter(false)
403 .max_consecutive_blank_lines(1)
404 .build();
405
406 let md = MarkdownDown::with_config(config);
407
408 let stored_config = md.config();
410 assert_eq!(stored_config.http.timeout, Duration::from_secs(60));
411 assert_eq!(stored_config.http.user_agent, "TestApp/1.0");
412 assert_eq!(stored_config.http.max_retries, 5);
413 assert_eq!(
414 stored_config.auth.github_token,
415 Some("test_token".to_string())
416 );
417 assert!(!stored_config.output.include_frontmatter);
418 assert_eq!(stored_config.output.max_consecutive_blank_lines, 1);
419 }
420
421 #[test]
422 fn test_config_builder_fluent_interface() {
423 let config = Config::builder()
425 .github_token("ghp_test_token")
426 .office365_token("office_token")
427 .google_api_key("google_key")
428 .timeout_seconds(45)
429 .user_agent("IntegrationTest/2.0")
430 .max_retries(3)
431 .include_frontmatter(true)
432 .custom_frontmatter_field("project", "markdowndown")
433 .custom_frontmatter_field("version", "test")
434 .normalize_whitespace(false)
435 .max_consecutive_blank_lines(3)
436 .build();
437
438 assert_eq!(config.auth.github_token, Some("ghp_test_token".to_string()));
440 assert_eq!(
441 config.auth.office365_token,
442 Some("office_token".to_string())
443 );
444 assert_eq!(config.auth.google_api_key, Some("google_key".to_string()));
445 assert_eq!(config.http.timeout, Duration::from_secs(45));
446 assert_eq!(config.http.user_agent, "IntegrationTest/2.0");
447 assert_eq!(config.http.max_retries, 3);
448 assert!(config.output.include_frontmatter);
449 assert_eq!(config.output.custom_frontmatter_fields.len(), 2);
450 assert_eq!(
451 config.output.custom_frontmatter_fields[0],
452 ("project".to_string(), "markdowndown".to_string())
453 );
454 assert_eq!(
455 config.output.custom_frontmatter_fields[1],
456 ("version".to_string(), "test".to_string())
457 );
458 assert!(!config.output.normalize_whitespace);
459 assert_eq!(config.output.max_consecutive_blank_lines, 3);
460 }
461
462 #[test]
463 fn test_config_from_default() {
464 let config = Config::default();
466
467 assert_eq!(config.http.timeout, Duration::from_secs(30));
469 assert!(config.http.user_agent.starts_with("markdowndown/"));
470 assert_eq!(config.http.max_retries, 3);
471 assert_eq!(config.http.retry_delay, Duration::from_secs(1));
472 assert_eq!(config.http.max_redirects, 10);
473
474 assert!(config.auth.github_token.is_none());
476 assert!(config.auth.office365_token.is_none());
477 assert!(config.auth.google_api_key.is_none());
478
479 assert!(config.output.include_frontmatter);
481 assert!(config.output.custom_frontmatter_fields.is_empty());
482 assert!(config.output.normalize_whitespace);
483 assert_eq!(config.output.max_consecutive_blank_lines, 2);
484 }
485
486 #[test]
487 fn test_supported_url_types() {
488 let md = MarkdownDown::new();
490 let supported_types = md.supported_types();
491
492 assert!(supported_types.contains(&crate::types::UrlType::Html));
494 assert!(supported_types.contains(&crate::types::UrlType::GoogleDocs));
495 assert!(supported_types.contains(&crate::types::UrlType::GitHubIssue));
496 assert!(supported_types.contains(&crate::types::UrlType::LocalFile));
497
498 assert_eq!(supported_types.len(), 4);
500 }
501
502 #[test]
503 fn test_detect_url_type_integration() {
504 let html_result = detect_url_type("https://example.com/article.html");
508 assert!(html_result.is_ok());
509 assert_eq!(html_result.unwrap(), crate::types::UrlType::Html);
510
511 let gdocs_result = detect_url_type("https://docs.google.com/document/d/abc123/edit");
513 assert!(gdocs_result.is_ok());
514 assert_eq!(gdocs_result.unwrap(), crate::types::UrlType::GoogleDocs);
515
516 let github_result = detect_url_type("https://github.com/owner/repo/issues/123");
518 assert!(github_result.is_ok());
519 assert_eq!(github_result.unwrap(), crate::types::UrlType::GitHubIssue);
520
521 let invalid_result = detect_url_type("not-a-url");
523 assert!(invalid_result.is_err());
524 }
525
526 #[test]
527 fn test_github_integration_issue_and_pr() {
528 let detector = UrlDetector::new();
530 let converter = GitHubConverter::new();
531
532 let issue_url = "https://github.com/microsoft/vscode/issues/12345";
534 let detected_type = detector.detect_type(issue_url).unwrap();
535 assert_eq!(detected_type, UrlType::GitHubIssue);
536
537 let parsed_issue = converter.parse_github_url(issue_url).unwrap();
539 assert_eq!(parsed_issue.owner, "microsoft");
540 assert_eq!(parsed_issue.repo, "vscode");
541 assert_eq!(parsed_issue.number, 12345);
542
543 let pr_url = "https://github.com/rust-lang/rust/pull/98765";
545 let detected_type = detector.detect_type(pr_url).unwrap();
546 assert_eq!(detected_type, UrlType::GitHubIssue);
547
548 let parsed_pr = converter.parse_github_url(pr_url).unwrap();
550 assert_eq!(parsed_pr.owner, "rust-lang");
551 assert_eq!(parsed_pr.repo, "rust");
552 assert_eq!(parsed_pr.number, 98765);
553 }
554
555 mod comprehensive_coverage_tests {
557 use super::*;
558 use wiremock::matchers::{method, path};
559 use wiremock::{Mock, MockServer, ResponseTemplate};
560
561 #[test]
562 fn test_detector_getter() {
563 let md = MarkdownDown::new();
565 let detector = md.detector();
566
567 let result = detector.detect_type("https://example.com/page.html");
569 assert!(result.is_ok());
570 assert_eq!(result.unwrap(), UrlType::Html);
571 }
572
573 #[test]
574 fn test_registry_getter() {
575 let md = MarkdownDown::new();
577 let registry = md.registry();
578
579 let supported_types = registry.supported_types();
581 assert!(!supported_types.is_empty());
582 assert!(supported_types.contains(&UrlType::Html));
583 }
584
585 #[test]
586 fn test_default_trait_implementation() {
587 let md1 = MarkdownDown::new();
589 let md2 = MarkdownDown::default();
590
591 assert_eq!(md1.config().http.timeout, md2.config().http.timeout);
593 assert_eq!(md1.config().http.max_retries, md2.config().http.max_retries);
594 assert_eq!(
595 md1.config().auth.github_token,
596 md2.config().auth.github_token
597 );
598 assert_eq!(
599 md1.config().output.include_frontmatter,
600 md2.config().output.include_frontmatter
601 );
602 }
603
604 #[tokio::test]
605 async fn test_convert_url_convenience_function() {
606 let mock_server = MockServer::start().await;
608
609 let html_content = "<h1>Test Content</h1><p>This is a test.</p>";
610
611 Mock::given(method("GET"))
612 .and(path("/test-page"))
613 .respond_with(ResponseTemplate::new(200).set_body_string(html_content))
614 .mount(&mock_server)
615 .await;
616
617 let url = format!("{}/test-page", mock_server.uri());
618 let result = convert_url(&url).await;
619
620 assert!(result.is_ok());
621 let markdown = result.unwrap();
622 assert!(markdown.as_str().contains("# Test Content"));
623 assert!(markdown.as_str().contains("This is a test"));
624 }
625
626 #[tokio::test]
627 async fn test_convert_url_with_config_convenience_function() {
628 let mock_server = MockServer::start().await;
630
631 let html_content =
632 "<h1>Custom Config Test</h1><p>Testing with custom configuration.</p>";
633
634 Mock::given(method("GET"))
635 .and(path("/custom-config-page"))
636 .respond_with(ResponseTemplate::new(200).set_body_string(html_content))
637 .mount(&mock_server)
638 .await;
639
640 let config = Config::builder()
642 .timeout_seconds(45)
643 .user_agent("TestConvenience/1.0")
644 .include_frontmatter(false)
645 .build();
646
647 let url = format!("{}/custom-config-page", mock_server.uri());
648 let result = convert_url_with_config(&url, config).await;
649
650 assert!(result.is_ok());
651 let markdown = result.unwrap();
652 assert!(markdown.as_str().contains("# Custom Config Test"));
653 assert!(markdown
654 .as_str()
655 .contains("Testing with custom configuration"));
656 assert!(!markdown.as_str().starts_with("---"));
658 }
659
660 #[tokio::test]
661 async fn test_convert_url_error_no_converter_available() {
662 let mock_server = MockServer::start().await;
669
670 Mock::given(method("GET"))
672 .and(path("/error-test"))
673 .respond_with(ResponseTemplate::new(500))
674 .mount(&mock_server)
675 .await;
676
677 let md = MarkdownDown::new();
678 let url = format!("{}/error-test", mock_server.uri());
679 let result = md.convert_url(&url).await;
680
681 assert!(result.is_err());
683 }
684
685 #[tokio::test]
686 async fn test_fallback_conversion_logic() {
687 let mock_server = MockServer::start().await;
689
690 let html_content = "<h1>Fallback Test</h1><p>This should work via fallback.</p>";
692
693 Mock::given(method("GET"))
694 .and(path("/fallback-test"))
695 .respond_with(ResponseTemplate::new(200).set_body_string(html_content))
696 .mount(&mock_server)
697 .await;
698
699 let md = MarkdownDown::new();
700 let url = format!("{}/fallback-test", mock_server.uri());
701 let result = md.convert_url(&url).await;
702
703 assert!(result.is_ok());
705 let markdown = result.unwrap();
706 assert!(markdown.as_str().contains("# Fallback Test"));
707 assert!(markdown.as_str().contains("This should work via fallback"));
708 }
709
710 #[tokio::test]
711 async fn test_convert_url_invalid_url_error() {
712 let md = MarkdownDown::new();
714 let result = md.convert_url("not-a-valid-url").await;
715
716 assert!(result.is_err());
717 match result.unwrap_err() {
718 MarkdownError::ValidationError { kind, context } => {
719 assert_eq!(kind, crate::types::ValidationErrorKind::InvalidUrl);
720 assert_eq!(context.url, "not-a-valid-url");
721 }
722 _ => panic!("Expected ValidationError for invalid URL"),
723 }
724 }
725
726 #[tokio::test]
727 async fn test_convert_url_malformed_url_error() {
728 let md = MarkdownDown::new();
730 let result = md.convert_url("http://[invalid-host").await;
731
732 assert!(result.is_err());
733 match result.unwrap_err() {
735 MarkdownError::ValidationError { kind, context } => {
736 assert_eq!(kind, crate::types::ValidationErrorKind::InvalidUrl);
737 assert_eq!(context.url, "http://[invalid-host");
738 }
739 _ => panic!("Expected ValidationError for malformed URL"),
740 }
741 }
742
743 #[tokio::test]
744 async fn test_successful_conversion_with_instrumentation() {
745 let mock_server = MockServer::start().await;
747
748 let html_content =
749 "<h1>Instrumentation Test</h1><p>Testing the instrumentation decorator.</p>";
750
751 Mock::given(method("GET"))
752 .and(path("/instrumentation-test"))
753 .respond_with(ResponseTemplate::new(200).set_body_string(html_content))
754 .mount(&mock_server)
755 .await;
756
757 let md = MarkdownDown::new();
758 let url = format!("{}/instrumentation-test", mock_server.uri());
759 let result = md.convert_url(&url).await;
760
761 assert!(result.is_ok());
762 let markdown = result.unwrap();
763 assert!(markdown.as_str().contains("# Instrumentation Test"));
764 assert!(markdown
765 .as_str()
766 .contains("Testing the instrumentation decorator"));
767 }
768
769 #[test]
770 fn test_markdowndown_accessors_comprehensive() {
771 let config = Config::builder()
773 .timeout_seconds(25)
774 .user_agent("AccessorTest/1.0")
775 .github_token("test-accessor-token")
776 .include_frontmatter(true)
777 .build();
778
779 let md = MarkdownDown::with_config(config);
780
781 let stored_config = md.config();
783 assert_eq!(stored_config.http.timeout, Duration::from_secs(25));
784 assert_eq!(stored_config.http.user_agent, "AccessorTest/1.0");
785 assert_eq!(
786 stored_config.auth.github_token,
787 Some("test-accessor-token".to_string())
788 );
789 assert!(stored_config.output.include_frontmatter);
790
791 let detector = md.detector();
793 let html_result = detector.detect_type("https://example.com/test.html");
794 assert!(html_result.is_ok());
795 assert_eq!(html_result.unwrap(), UrlType::Html);
796
797 let registry = md.registry();
799 let supported = registry.supported_types();
800 assert!(supported.contains(&UrlType::Html));
801 assert!(supported.contains(&UrlType::GoogleDocs));
802 assert!(supported.contains(&UrlType::GitHubIssue));
803 assert!(supported.contains(&UrlType::LocalFile));
804
805 let md_supported = md.supported_types();
807 assert_eq!(md_supported, supported);
808 }
809 }
810}