1use crate::{
2 archive::Archiveable,
3 exchange::Exchange,
4 request::{Request, params::Params},
5};
6use std::borrow::Cow;
7
8#[derive(Copy, Clone, Debug, Eq, Ord, PartialEq, PartialOrd, serde::Deserialize)]
9#[serde(field_identifier, rename_all = "lowercase")]
10pub enum Field {
11 Request,
12 Response,
13}
14
15pub struct Entry<'a, T: Archiveable> {
16 pub request_params: T::RequestParams,
17 pub exchange: Exchange<'a, T>,
18}
19
20impl<T: Archiveable + bounded_static::IntoBoundedStatic> bounded_static::IntoBoundedStatic
21 for Entry<'_, T>
22where
23 T::Static: Archiveable,
24 T::RequestParams: Into<<T::Static as Archiveable>::RequestParams>,
25{
26 type Static = Entry<'static, T::Static>;
27
28 fn into_static(self) -> Self::Static {
29 Self::Static {
30 request_params: self.request_params.into(),
31 exchange: self.exchange.into_static(),
32 }
33 }
34}
35
36impl<'a, 'de: 'a, T: Archiveable + 'a> serde::de::Deserialize<'de> for Entry<'a, T> {
37 fn deserialize<D: serde::Deserializer<'de>>(deserializer: D) -> Result<Self, D::Error> {
38 struct EntryVisitor<'a, T>(std::marker::PhantomData<&'a T>);
39
40 impl<'a, 'de: 'a, T: Archiveable> serde::de::Visitor<'de> for EntryVisitor<'a, T> {
41 type Value = Entry<'a, T>;
42
43 fn expecting(&self, formatter: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
44 formatter.write_str("scraper exchange archive entry")
45 }
46
47 fn visit_map<A: serde::de::MapAccess<'de>>(
48 self,
49 mut map: A,
50 ) -> Result<Self::Value, A::Error> {
51 let request = map
52 .next_entry::<Field, Request<'_>>()?
53 .and_then(|(field, request)| {
54 if field == Field::Request {
55 Some(request)
56 } else {
57 None
58 }
59 })
60 .ok_or_else(|| serde::de::Error::missing_field("request"))?;
61
62 let request_params = T::RequestParams::parse_request(&request)
63 .map_err(|error| error.serde(&request))?;
64
65 let response = T::deserialize_response_field(&request_params, &mut map)?
66 .and_then(|(field, data)| {
67 if field == Field::Response {
68 Some(data)
69 } else {
70 None
71 }
72 })
73 .ok_or_else(|| serde::de::Error::missing_field("response"))?;
74
75 match map.next_entry::<Cow<'_, str>, serde::de::IgnoredAny>()? {
76 Some((field, _)) => Err(serde::de::Error::unknown_field(
77 &field,
78 &["request", "response"],
79 )),
80 None => Ok(Entry {
81 request_params,
82 exchange: Exchange { request, response },
83 }),
84 }
85 }
86 }
87
88 deserializer.deserialize_map(EntryVisitor(std::marker::PhantomData))
89 }
90}
91
92#[cfg(test)]
93mod tests {
94 use super::{Archiveable, Entry, Field};
95 use crate::exchange::Response;
96 use regex::Regex;
97 use std::borrow::Cow;
98 use std::sync::LazyLock;
99
100 const GOOGLE_PLAY_01_EXAMPLE: &str = include_str!("../../../examples/google-play-01.json");
101
102 #[test]
103 fn deserialize_google_archive() -> Result<(), Box<dyn std::error::Error>> {
104 let archive = serde_json::from_str::<Entry<'_, GoogleData>>(GOOGLE_PLAY_01_EXAMPLE)?;
105
106 assert_eq!(archive.request_params.pagination.country, "us");
107 assert_eq!(archive.request_params.review.app_id, "ai.chesslegends");
108 assert!(matches!(
109 archive.exchange.response.data,
110 GoogleData::Review(serde_json::Value::Array(_))
111 ));
112
113 Ok(())
114 }
115
116 struct ReviewRequest<'a> {
117 pagination: Pagination<'a>,
118 review: Review,
119 }
120
121 impl<'a> crate::request::params::Params for ReviewRequest<'a> {
122 fn build_request(
123 &self,
124 _timestamp: Option<chrono::DateTime<chrono::Utc>>,
125 ) -> crate::request::Request<'_> {
126 todo![]
128 }
129
130 fn parse_request(
131 request: &crate::request::Request<'_>,
132 ) -> Result<Self, crate::request::params::ParseError> {
133 let pagination = request.url.as_str().parse().map_err(|_| {
134 crate::request::params::ParseError::InvalidUrl {
135 expected: "Google review pagination request",
136 }
137 })?;
138
139 let review = request
140 .body
141 .as_ref()
142 .and_then(|body| body.parse().ok())
143 .ok_or_else(|| crate::request::params::ParseError::InvalidBody {
144 expected: "Google review pagination request",
145 })?;
146
147 Ok(Self { pagination, review })
148 }
149 }
150
151 enum GoogleData {
152 Review(serde_json::Value),
153 }
154
155 impl Archiveable for GoogleData {
156 type RequestParams = ReviewRequest<'static>;
157
158 fn deserialize_response_field<'de, A: serde::de::MapAccess<'de>>(
159 _request_params: &Self::RequestParams,
160 map: &mut A,
161 ) -> Result<Option<(Field, Response<'de, Self>)>, A::Error> {
162 Ok(map
163 .next_entry::<Field, Response<'_, serde_json::Value>>()?
164 .map(|(field, response)| (field, response.map(|value| GoogleData::Review(value)))))
165 }
166 }
167
168 #[derive(Clone, Debug, Eq, PartialEq)]
169 struct Pagination<'a> {
170 pub language: Cow<'a, str>,
171 pub country: Cow<'a, str>,
172 }
173
174 impl std::str::FromStr for Pagination<'static> {
175 type Err = String;
176
177 fn from_str(s: &str) -> Result<Self, Self::Err> {
178 static LANGUAGE_AND_COUNTRY_RE: LazyLock<Regex> =
179 LazyLock::new(|| Regex::new(r"hl=([a-z]{2}).*gl=([a-z]{2})").unwrap());
180
181 LANGUAGE_AND_COUNTRY_RE
182 .captures(s)
183 .and_then(|captures| captures.get(1).zip(captures.get(2)))
184 .map(|(language, country)| Self {
185 language: language.as_str().to_string().into(),
186 country: country.as_str().to_string().into(),
187 })
188 .ok_or_else(|| s.to_string())
189 }
190 }
191
192 #[derive(Clone, Debug, Eq, PartialEq)]
193 struct Review {
194 pub app_id: String,
195 pub sort_order: u8,
196 pub number: usize,
197 pub token: Option<String>,
198 }
199
200 impl std::str::FromStr for Review {
201 type Err = String;
202
203 fn from_str(s: &str) -> Result<Self, Self::Err> {
204 static REVIEW_RE: LazyLock<Regex> = LazyLock::new(|| {
205 Regex::new(r#"^f\.req=\[\[\["UsvDTd","\[null,null,\[2,(\d+),\[(\d+),null,([^\]]+)\],null,\[\]\],\[\\"([^\]]+)\\",7\]\]",null,"generic"\]\]\]$"#).unwrap()
206 });
207
208 let decoded = urlencoding::decode(s).map_err(|_| s.to_string())?;
209
210 REVIEW_RE
211 .captures(&decoded)
212 .and_then(|captures| {
213 captures
214 .get(1)
215 .zip(captures.get(2))
216 .zip(captures.get(3))
217 .zip(captures.get(4))
218 .and_then(
219 |(((sort_order_match, number_match), token_match), app_id_match)| {
220 sort_order_match
221 .as_str()
222 .parse::<u8>()
223 .ok()
224 .zip(number_match.as_str().parse::<usize>().ok())
225 .zip(match token_match.as_str() {
226 "null" => Some(None),
227 other
228 if other.starts_with(r#"\""#)
229 && other.ends_with(r#"\""#) =>
230 {
231 Some(Some(other[2..other.len() - 2].to_string()))
232 }
233 _ => None,
234 })
235 .map(|((sort_order, number), token)| Self {
236 app_id: app_id_match.as_str().to_string(),
237 sort_order,
238 number,
239 token,
240 })
241 },
242 )
243 })
244 .ok_or_else(|| s.to_string())
245 }
246 }
247}