url_preview/
preview_service.rs

1#[cfg(feature = "github")]
2use crate::github_types::{is_github_url, GitHubDetailedInfo};
3use crate::{
4    is_twitter_url, CacheStrategy, Fetcher, Preview, PreviewError, PreviewGenerator,
5    UrlPreviewGenerator,
6};
7#[cfg(feature = "browser")]
8use crate::browser_fetcher::BrowserPreviewService;
9#[cfg(feature = "browser")]
10use crate::mcp_client::{McpConfig, BrowserUsagePolicy};
11use std::sync::Arc;
12use tokio::sync::Semaphore;
13#[cfg(all(feature = "logging", feature = "github"))]
14use tracing::warn;
15#[cfg(feature = "logging")]
16use tracing::{debug, instrument};
17use url::Url;
18
19/// PreviewService provides a unified preview generation service
20/// It can automatically identify different types of URLs and use appropriate processing strategies
21#[derive(Clone)]
22pub struct PreviewService {
23    pub default_generator: Arc<UrlPreviewGenerator>,
24    #[cfg(feature = "twitter")]
25    pub twitter_generator: Arc<UrlPreviewGenerator>,
26    #[cfg(feature = "github")]
27    pub github_generator: Arc<UrlPreviewGenerator>,
28    #[cfg(feature = "browser")]
29    pub browser_service: Option<Arc<BrowserPreviewService>>,
30    // Max Concurrent Requests
31    semaphore: Arc<Semaphore>,
32}
33
34pub const MAX_CONCURRENT_REQUESTS: usize = 500;
35
36impl Default for PreviewService {
37    fn default() -> Self {
38        Self::new()
39    }
40}
41
42impl PreviewService {
43    /// Creates a new preview service instance with default cache capacity
44    pub fn new() -> Self {
45        // Set 1000 cache entries for each generator
46        // This means that up to 1000 different URL previews can be cached for each type (Normal/Twitter/GitHub)
47        // 1000 cache entries take about 1-2MB memory
48        // Total of 3-6MB for three generators is reasonable for modern systems
49        Self::with_cache_cap(1000)
50    }
51
52    pub fn with_cache_cap(cache_capacity: usize) -> Self {
53        #[cfg(feature = "logging")]
54        debug!(
55            "Initializing PreviewService with cache capacity: {}",
56            cache_capacity
57        );
58
59        let default_generator = Arc::new(UrlPreviewGenerator::new_with_fetcher(
60            cache_capacity,
61            CacheStrategy::UseCache,
62            Fetcher::new(),
63        ));
64
65        #[cfg(feature = "twitter")]
66        let twitter_generator = Arc::new(UrlPreviewGenerator::new_with_fetcher(
67            cache_capacity,
68            CacheStrategy::UseCache,
69            Fetcher::new_twitter_client(),
70        ));
71
72        #[cfg(feature = "github")]
73        let github_generator = Arc::new(UrlPreviewGenerator::new_with_fetcher(
74            cache_capacity,
75            CacheStrategy::UseCache,
76            Fetcher::new_github_client(),
77        ));
78
79        let semaphore = Arc::new(Semaphore::new(MAX_CONCURRENT_REQUESTS));
80
81        #[cfg(feature = "logging")]
82        debug!("PreviewService initialized successfully");
83
84        Self {
85            default_generator,
86            #[cfg(feature = "twitter")]
87            twitter_generator,
88            #[cfg(feature = "github")]
89            github_generator,
90            #[cfg(feature = "browser")]
91            browser_service: None,
92            semaphore,
93        }
94    }
95
96    pub fn no_cache() -> Self {
97        #[cfg(feature = "logging")]
98        debug!("Initializing PreviewService with cache capacity: {}", 0);
99
100        let default_generator = Arc::new(UrlPreviewGenerator::new_with_fetcher(
101            0,
102            CacheStrategy::NoCache,
103            Fetcher::new(),
104        ));
105
106        #[cfg(feature = "twitter")]
107        let twitter_generator = Arc::new(UrlPreviewGenerator::new_with_fetcher(
108            0,
109            CacheStrategy::NoCache,
110            Fetcher::new_twitter_client(),
111        ));
112
113        #[cfg(feature = "github")]
114        let github_generator = Arc::new(UrlPreviewGenerator::new_with_fetcher(
115            0,
116            CacheStrategy::NoCache,
117            Fetcher::new_github_client(),
118        ));
119
120        let semaphore = Arc::new(Semaphore::new(MAX_CONCURRENT_REQUESTS));
121
122        #[cfg(feature = "logging")]
123        debug!("PreviewService initialized successfully");
124
125        Self {
126            default_generator,
127            #[cfg(feature = "twitter")]
128            twitter_generator,
129            #[cfg(feature = "github")]
130            github_generator,
131            #[cfg(feature = "browser")]
132            browser_service: None,
133            semaphore,
134        }
135    }
136
137    pub fn new_with_config(config: PreviewServiceConfig) -> Self {
138        #[cfg(feature = "logging")]
139        debug!("Initializing PreviewService with custom configuration");
140
141        let default_generator = Arc::new(UrlPreviewGenerator::new_with_fetcher(
142            config.cache_capacity,
143            config.cache_strategy,
144            config.default_fetcher.unwrap_or_default(),
145        ));
146
147        #[cfg(feature = "twitter")]
148        let twitter_generator = Arc::new(UrlPreviewGenerator::new_with_fetcher(
149            config.cache_capacity,
150            config.cache_strategy,
151            config
152                .twitter_fetcher
153                .unwrap_or_else(Fetcher::new_twitter_client),
154        ));
155
156        #[cfg(feature = "github")]
157        let github_generator = Arc::new(UrlPreviewGenerator::new_with_fetcher(
158            config.cache_capacity,
159            config.cache_strategy,
160            config
161                .github_fetcher
162                .unwrap_or_else(Fetcher::new_github_client),
163        ));
164
165        let semaphore = Arc::new(Semaphore::new(config.max_concurrent_requests));
166        
167        #[cfg(feature = "browser")]
168        let browser_service = if let Some(mcp_config) = config.mcp_config {
169            Some(Arc::new(BrowserPreviewService::new(
170                mcp_config,
171                config.browser_usage_policy,
172            )))
173        } else {
174            None
175        };
176
177        #[cfg(feature = "logging")]
178        debug!("PreviewService initialized with custom configuration");
179
180        Self {
181            default_generator,
182            #[cfg(feature = "twitter")]
183            twitter_generator,
184            #[cfg(feature = "github")]
185            github_generator,
186            #[cfg(feature = "browser")]
187            browser_service,
188            semaphore,
189        }
190    }
191
192    #[cfg(feature = "github")]
193    fn extract_github_info(url: &str) -> Option<(String, String)> {
194        let parsed_url = Url::parse(url).ok()?;
195        if !parsed_url.host_str()?.contains("github.com") {
196            return None;
197        }
198
199        let path_segments: Vec<&str> = parsed_url.path_segments()?.collect();
200        if path_segments.len() >= 2 {
201            return Some((path_segments[0].to_string(), path_segments[1].to_string()));
202        }
203        None
204    }
205
206    #[cfg(feature = "github")]
207    #[cfg_attr(feature = "logging", instrument(level = "debug", skip(self)))]
208    async fn generate_github_preview(&self, url: &str) -> Result<Preview, PreviewError> {
209        #[cfg(feature = "cache")]
210        if let CacheStrategy::UseCache = self.github_generator.cache_strategy {
211            if let Some(cached) = self.github_generator.cache.get(url).await {
212                return Ok(cached);
213            }
214        }
215
216        let (owner, repo_name) = Self::extract_github_info(url).ok_or_else(|| {
217            #[cfg(feature = "logging")]
218            warn!("GitHub URL parsing failed: {}", url);
219            PreviewError::ExtractError("Invalid GitHub URL format".into())
220        })?;
221
222        match self
223            .github_generator
224            .fetcher
225            .fetch_github_basic_preview(&owner, &repo_name)
226            .await
227        {
228            Ok(basic_info) => {
229                #[cfg(feature = "logging")]
230                debug!("Found GitHub Repo {}/{} basic infos", owner, repo_name);
231
232                let preview = Preview {
233                    url: url.to_string(),
234                    title: basic_info.title,
235                    description: basic_info.description,
236                    image_url: basic_info.image_url,
237                    site_name: Some("GitHub".to_string()),
238                    favicon: Some(
239                        "https://github.githubassets.com/favicons/favicon.svg".to_string(),
240                    ),
241                };
242
243                #[cfg(feature = "cache")]
244                if let CacheStrategy::UseCache = self.github_generator.cache_strategy {
245                    self.github_generator
246                        .cache
247                        .set(url.to_string(), preview.clone())
248                        .await;
249                }
250
251                Ok(preview)
252            }
253            Err(_e) => {
254                #[cfg(feature = "logging")]
255                warn!(
256                    error = ?_e,
257                    "Failed to get GitHub basic preview, will use general preview generator as fallback"
258                );
259                self.github_generator.generate_preview(url).await
260            }
261        }
262    }
263
264    #[cfg_attr(feature = "logging", instrument(level = "debug", skip(self)))]
265    pub async fn generate_preview(&self, url: &str) -> Result<Preview, PreviewError> {
266        #[cfg(feature = "logging")]
267        debug!("Starting preview generation for URL: {}", url);
268
269        let _permit = self
270            .semaphore
271            .acquire()
272            .await
273            .map_err(|_| PreviewError::ConcurrencyLimitError)?;
274
275        let _ = Url::parse(url)
276            .map_err(|e| PreviewError::ParseError(format!("Invalid URL format: {e}")))?;
277        
278        // Try browser service first if available
279        #[cfg(feature = "browser")]
280        if let Some(browser_service) = &self.browser_service {
281            if browser_service.should_use_browser(url) {
282                #[cfg(feature = "logging")]
283                debug!("Using browser service for URL: {}", url);
284                
285                match browser_service.generate_preview(url).await {
286                    Ok(preview) => return Ok(preview),
287                    Err(_e) => {
288                        #[cfg(feature = "logging")]
289                        debug!("Browser service failed, falling back: {}", _e);
290                    }
291                }
292            }
293        }
294
295        if is_twitter_url(url) {
296            #[cfg(feature = "logging")]
297            debug!("Detected Twitter URL, using specialized handler");
298            #[cfg(feature = "twitter")]
299            {
300                self.twitter_generator.generate_preview(url).await
301            }
302            #[cfg(not(feature = "twitter"))]
303            {
304                self.default_generator.generate_preview(url).await
305            }
306        } else if cfg!(feature = "github") && {
307            #[cfg(feature = "github")]
308            {
309                is_github_url(url)
310            }
311            #[cfg(not(feature = "github"))]
312            {
313                false
314            }
315        } {
316            #[cfg(feature = "logging")]
317            debug!("Detected GitHub URL, using specialized handler");
318            #[cfg(feature = "github")]
319            {
320                self.generate_github_preview(url).await
321            }
322            #[cfg(not(feature = "github"))]
323            {
324                self.default_generator.generate_preview(url).await
325            }
326        } else {
327            #[cfg(feature = "logging")]
328            debug!("Using default URL handler");
329            self.default_generator.generate_preview(url).await
330        }
331    }
332
333    #[cfg_attr(feature = "logging", instrument(level = "debug", skip(self)))]
334    pub async fn generate_preview_with_concurrency(
335        &self,
336        url: &str,
337    ) -> Result<Preview, PreviewError> {
338        #[cfg(feature = "logging")]
339        debug!("Starting preview generation for URL: {}", url);
340
341        let _permit = self
342            .semaphore
343            .acquire()
344            .await
345            .map_err(|_| PreviewError::ConcurrencyLimitError)?;
346
347        let _ = Url::parse(url)
348            .map_err(|e| PreviewError::ParseError(format!("Invalid URL format: {e}")))?;
349
350        if is_twitter_url(url) {
351            #[cfg(feature = "logging")]
352            debug!("Detected Twitter URL, using specialized handler");
353            #[cfg(feature = "twitter")]
354            {
355                self.twitter_generator.generate_preview(url).await
356            }
357            #[cfg(not(feature = "twitter"))]
358            {
359                self.default_generator.generate_preview(url).await
360            }
361        } else if cfg!(feature = "github") && {
362            #[cfg(feature = "github")]
363            {
364                is_github_url(url)
365            }
366            #[cfg(not(feature = "github"))]
367            {
368                false
369            }
370        } {
371            #[cfg(feature = "logging")]
372            debug!("Detected GitHub URL, using specialized handler");
373            #[cfg(feature = "github")]
374            {
375                self.generate_github_preview(url).await
376            }
377            #[cfg(not(feature = "github"))]
378            {
379                self.default_generator.generate_preview(url).await
380            }
381        } else {
382            #[cfg(feature = "logging")]
383            debug!("Using default URL handler");
384            self.default_generator.generate_preview(url).await
385        }
386    }
387
388    #[cfg(feature = "github")]
389    pub async fn generate_github_basic_preview(&self, url: &str) -> Result<Preview, PreviewError> {
390        let (owner, repo) = Self::extract_github_info(url)
391            .ok_or_else(|| PreviewError::ExtractError("Invalid GitHub URL format".into()))?;
392
393        let basic_info = self
394            .github_generator
395            .fetcher
396            .fetch_github_basic_preview(&owner, &repo)
397            .await?;
398
399        Ok(Preview {
400            url: url.to_string(),
401            title: basic_info.title,
402            description: basic_info.description,
403            image_url: basic_info.image_url,
404            site_name: Some("GitHub".to_string()),
405            favicon: Some("https://github.githubassets.com/favicons/favicon.svg".to_string()),
406        })
407    }
408
409    #[cfg(feature = "github")]
410    pub async fn get_github_detailed_info(
411        &self,
412        url: &str,
413    ) -> Result<GitHubDetailedInfo, PreviewError> {
414        let (owner, repo) = Self::extract_github_info(url)
415            .ok_or_else(|| PreviewError::ExtractError("Invalid GitHub URL format".into()))?;
416
417        self.github_generator
418            .fetcher
419            .fetch_github_detailed_info(&owner, &repo)
420            .await
421    }
422}
423
424/// Static constructor methods
425impl PreviewService {
426    /// Create a new preview service for unit testing
427    pub fn new_minimal() -> Self {
428        let default_generator = Arc::new(UrlPreviewGenerator::new(100, CacheStrategy::UseCache));
429        #[cfg(feature = "twitter")]
430        let twitter_generator = Arc::new(UrlPreviewGenerator::new(100, CacheStrategy::UseCache));
431        #[cfg(feature = "github")]
432        let github_generator = Arc::new(UrlPreviewGenerator::new(100, CacheStrategy::UseCache));
433
434        Self {
435            default_generator,
436            #[cfg(feature = "twitter")]
437            twitter_generator,
438            #[cfg(feature = "github")]
439            github_generator,
440            #[cfg(feature = "browser")]
441            browser_service: None,
442            semaphore: Arc::new(Semaphore::new(10)),
443        }
444    }
445
446    #[cfg_attr(feature = "logging", instrument(level = "debug", skip(self)))]
447    pub async fn generate_preview_no_cache(&self, url: &str) -> Result<Preview, PreviewError> {
448        let generator = UrlPreviewGenerator::new_with_fetcher(
449            0,
450            CacheStrategy::NoCache,
451            self.default_generator.fetcher.clone(),
452        );
453        generator.generate_preview(url).await
454    }
455}
456
457pub struct PreviewServiceConfig {
458    pub cache_capacity: usize,
459    pub cache_strategy: CacheStrategy,
460    pub max_concurrent_requests: usize,
461    pub default_fetcher: Option<Fetcher>,
462    #[cfg(feature = "twitter")]
463    pub twitter_fetcher: Option<Fetcher>,
464    #[cfg(feature = "github")]
465    pub github_fetcher: Option<Fetcher>,
466    #[cfg(feature = "browser")]
467    pub mcp_config: Option<McpConfig>,
468    #[cfg(feature = "browser")]
469    pub browser_usage_policy: BrowserUsagePolicy,
470}
471
472impl PreviewServiceConfig {
473    pub fn new(cache_capacity: usize) -> Self {
474        Self {
475            cache_capacity,
476            cache_strategy: CacheStrategy::UseCache,
477            max_concurrent_requests: MAX_CONCURRENT_REQUESTS,
478            default_fetcher: None,
479            #[cfg(feature = "twitter")]
480            twitter_fetcher: None,
481            #[cfg(feature = "github")]
482            github_fetcher: None,
483            #[cfg(feature = "browser")]
484            mcp_config: None,
485            #[cfg(feature = "browser")]
486            browser_usage_policy: BrowserUsagePolicy::Auto,
487        }
488    }
489
490    #[cfg(feature = "github")]
491    pub fn with_github_fetcher(mut self, fetcher: Fetcher) -> Self {
492        self.github_fetcher = Some(fetcher);
493        self
494    }
495
496    pub fn with_default_fetcher(mut self, fetcher: Fetcher) -> Self {
497        self.default_fetcher = Some(fetcher);
498        self
499    }
500
501    #[cfg(feature = "twitter")]
502    pub fn with_twitter_fetcher(mut self, fetcher: Fetcher) -> Self {
503        self.twitter_fetcher = Some(fetcher);
504        self
505    }
506
507    pub fn with_max_concurrent_requests(mut self, max_concurrent_requests: usize) -> Self {
508        self.max_concurrent_requests = max_concurrent_requests;
509        self
510    }
511
512    pub fn with_cache_strategy(mut self, cache_strategy: CacheStrategy) -> Self {
513        self.cache_strategy = cache_strategy;
514        self
515    }
516    
517    #[cfg(feature = "browser")]
518    pub fn with_mcp_config(mut self, mcp_config: McpConfig) -> Self {
519        self.mcp_config = Some(mcp_config);
520        self
521    }
522    
523    #[cfg(feature = "browser")]
524    pub fn with_browser_usage_policy(mut self, policy: BrowserUsagePolicy) -> Self {
525        self.browser_usage_policy = policy;
526        self
527    }
528}