upstream_ontologist/providers/
php.rs1use crate::{ProviderError, UpstreamDatum};
2use select::document::Document;
3use select::predicate::{And, Name, Predicate};
4
5pub async fn guess_from_pecl_package(package: &str) -> Result<Vec<UpstreamDatum>, ProviderError> {
10 let url = format!("https://pecl.php.net/packages/{}", package);
11
12 let client = reqwest::Client::builder()
13 .user_agent(crate::USER_AGENT)
14 .timeout(std::time::Duration::from_secs(15))
16 .build()
17 .unwrap();
18
19 let response = client
20 .get(url)
21 .send()
22 .await
23 .map_err(|e| ProviderError::Other(e.to_string()))?;
24
25 match response.status() {
26 reqwest::StatusCode::NOT_FOUND => {
27 return Ok(vec![]);
28 }
29 status if !status.is_success() => {
30 return Err(ProviderError::Other(format!("HTTP error: {}", status)));
31 }
32 _ => {}
33 }
34
35 let body = response
36 .text()
37 .await
38 .map_err(|e| ProviderError::Other(e.to_string()))?;
39
40 guess_from_pecl_page(&body)
41}
42
43struct TextContains<'a>(&'a str);
44
45impl<'a> Predicate for TextContains<'a> {
46 fn matches(&self, node: &select::node::Node) -> bool {
47 node.text().contains(self.0)
48 }
49}
50
51fn find_tags_by_text<'a>(
52 document: &'a Document,
53 tag_name: &'a str,
54 text: &'a str,
55) -> Vec<select::node::Node<'a>> {
56 document
57 .find(And(Name(tag_name), TextContains(text)))
58 .collect()
59}
60
61fn guess_from_pecl_page(body: &str) -> Result<Vec<UpstreamDatum>, ProviderError> {
62 let document = Document::from(body);
63 let mut ret = Vec::new();
64
65 let browse_source_selector = find_tags_by_text(&document, "a", "Browse Source")
66 .into_iter()
67 .next();
68
69 if let Some(node) = browse_source_selector {
70 ret.push(UpstreamDatum::RepositoryBrowse(
71 node.attr("href").unwrap().to_string(),
72 ));
73 }
74
75 let package_bugs_selector = find_tags_by_text(&document, "a", "Package Bugs")
76 .into_iter()
77 .next();
78
79 if let Some(node) = package_bugs_selector {
80 ret.push(UpstreamDatum::BugDatabase(
81 node.attr("href").unwrap().to_string(),
82 ));
83 }
84
85 let homepage_selector = find_tags_by_text(&document, "th", "Homepage")
86 .into_iter()
87 .next()
88 .unwrap()
89 .parent()
90 .unwrap()
91 .find(Name("td").descendant(Name("a")))
92 .next();
93
94 if let Some(node) = homepage_selector {
95 ret.push(UpstreamDatum::Homepage(
96 node.attr("href").unwrap().to_string(),
97 ));
98 }
99
100 Ok(ret)
101}
102
103pub struct Pecl;
105
106impl Default for Pecl {
107 fn default() -> Self {
108 Self::new()
109 }
110}
111
112impl Pecl {
113 pub fn new() -> Self {
115 Self
116 }
117}
118
119#[async_trait::async_trait]
120impl crate::ThirdPartyRepository for Pecl {
121 fn name(&self) -> &'static str {
122 "Pecl"
123 }
124
125 fn max_supported_certainty(&self) -> crate::Certainty {
126 crate::Certainty::Certain
127 }
128
129 fn supported_fields(&self) -> &'static [&'static str] {
130 &["Homepage", "Repository", "Bug-Database"]
131 }
132
133 async fn guess_metadata(&self, name: &str) -> Result<Vec<UpstreamDatum>, ProviderError> {
134 guess_from_pecl_package(name).await
135 }
136}
137
138#[cfg(test)]
139mod pecl_tests {
140 use super::*;
141
142 #[test]
143 fn test_guess_from_pecl_page() {
144 let text = include_str!("../testdata/pecl.html");
145 let ret = guess_from_pecl_page(text).unwrap();
146 assert_eq!(
147 ret,
148 vec![
149 UpstreamDatum::RepositoryBrowse(
150 "https://github.com/eduardok/libsmbclient-php".to_string()
151 ),
152 UpstreamDatum::BugDatabase(
153 "https://github.com/eduardok/libsmbclient-php/issues".to_string()
154 ),
155 UpstreamDatum::Homepage("https://github.com/eduardok/libsmbclient-php".to_string())
156 ]
157 );
158 }
159}