upstream_ontologist/providers/
php.rs

1use crate::{ProviderError, UpstreamDatum};
2use select::document::Document;
3use select::predicate::{And, Name, Predicate};
4
5/// Fetch upstream metadata for a PECL package
6///
7/// Retrieves package information from the PECL website by scraping the package page
8/// and extracting homepage, repository, and bug database URLs.
9pub async fn guess_from_pecl_package(package: &str) -> Result<Vec<UpstreamDatum>, ProviderError> {
10    let url = format!("https://pecl.php.net/packages/{}", package);
11
12    let client = reqwest::Client::builder()
13        .user_agent(crate::USER_AGENT)
14        // PECL is slow
15        .timeout(std::time::Duration::from_secs(15))
16        .build()
17        .unwrap();
18
19    let response = client
20        .get(url)
21        .send()
22        .await
23        .map_err(|e| ProviderError::Other(e.to_string()))?;
24
25    match response.status() {
26        reqwest::StatusCode::NOT_FOUND => {
27            return Ok(vec![]);
28        }
29        status if !status.is_success() => {
30            return Err(ProviderError::Other(format!("HTTP error: {}", status)));
31        }
32        _ => {}
33    }
34
35    let body = response
36        .text()
37        .await
38        .map_err(|e| ProviderError::Other(e.to_string()))?;
39
40    guess_from_pecl_page(&body)
41}
42
43struct TextContains<'a>(&'a str);
44
45impl<'a> Predicate for TextContains<'a> {
46    fn matches(&self, node: &select::node::Node) -> bool {
47        node.text().contains(self.0)
48    }
49}
50
51fn find_tags_by_text<'a>(
52    document: &'a Document,
53    tag_name: &'a str,
54    text: &'a str,
55) -> Vec<select::node::Node<'a>> {
56    document
57        .find(And(Name(tag_name), TextContains(text)))
58        .collect()
59}
60
61fn guess_from_pecl_page(body: &str) -> Result<Vec<UpstreamDatum>, ProviderError> {
62    let document = Document::from(body);
63    let mut ret = Vec::new();
64
65    let browse_source_selector = find_tags_by_text(&document, "a", "Browse Source")
66        .into_iter()
67        .next();
68
69    if let Some(node) = browse_source_selector {
70        ret.push(UpstreamDatum::RepositoryBrowse(
71            node.attr("href").unwrap().to_string(),
72        ));
73    }
74
75    let package_bugs_selector = find_tags_by_text(&document, "a", "Package Bugs")
76        .into_iter()
77        .next();
78
79    if let Some(node) = package_bugs_selector {
80        ret.push(UpstreamDatum::BugDatabase(
81            node.attr("href").unwrap().to_string(),
82        ));
83    }
84
85    let homepage_selector = find_tags_by_text(&document, "th", "Homepage")
86        .into_iter()
87        .next()
88        .unwrap()
89        .parent()
90        .unwrap()
91        .find(Name("td").descendant(Name("a")))
92        .next();
93
94    if let Some(node) = homepage_selector {
95        ret.push(UpstreamDatum::Homepage(
96            node.attr("href").unwrap().to_string(),
97        ));
98    }
99
100    Ok(ret)
101}
102
103/// PECL (PHP Extension Community Library) third-party repository provider
104pub struct Pecl;
105
106impl Default for Pecl {
107    fn default() -> Self {
108        Self::new()
109    }
110}
111
112impl Pecl {
113    /// Create a new PECL provider instance
114    pub fn new() -> Self {
115        Self
116    }
117}
118
119#[async_trait::async_trait]
120impl crate::ThirdPartyRepository for Pecl {
121    fn name(&self) -> &'static str {
122        "Pecl"
123    }
124
125    fn max_supported_certainty(&self) -> crate::Certainty {
126        crate::Certainty::Certain
127    }
128
129    fn supported_fields(&self) -> &'static [&'static str] {
130        &["Homepage", "Repository", "Bug-Database"]
131    }
132
133    async fn guess_metadata(&self, name: &str) -> Result<Vec<UpstreamDatum>, ProviderError> {
134        guess_from_pecl_package(name).await
135    }
136}
137
138#[cfg(test)]
139mod pecl_tests {
140    use super::*;
141
142    #[test]
143    fn test_guess_from_pecl_page() {
144        let text = include_str!("../testdata/pecl.html");
145        let ret = guess_from_pecl_page(text).unwrap();
146        assert_eq!(
147            ret,
148            vec![
149                UpstreamDatum::RepositoryBrowse(
150                    "https://github.com/eduardok/libsmbclient-php".to_string()
151                ),
152                UpstreamDatum::BugDatabase(
153                    "https://github.com/eduardok/libsmbclient-php/issues".to_string()
154                ),
155                UpstreamDatum::Homepage("https://github.com/eduardok/libsmbclient-php".to_string())
156            ]
157        );
158    }
159}