Skip to main content

news_flash/util/
greader.rs

1use crate::feed_api::Portal;
2use crate::models::{
3    ArticleID, Category, CategoryID, CategoryMapping, Direction, Enclosure, FatArticle, Feed, FeedID, FeedMapping, Headline, Marked,
4    NEWSFLASH_TOPLEVEL, Read, StreamConversionResult, Tag, TagID, Tagging, Url,
5};
6use crate::{error::FeedApiError, util};
7use chrono::Utc;
8use futures::future;
9use greader_api::models::{
10    Category as GCategory, Feed as GFeed, Item, ItemId, ItemRefs, Stream, StreamPrefs, Summary, Tagging as GTagging, Taggings,
11};
12use greader_api::{ApiError as GReaderError, GReaderApi};
13use reqwest::Client;
14use std::collections::HashSet;
15use std::convert::TryInto;
16use std::sync::Arc;
17use tokio::sync::RwLock;
18
19pub const TAG_READ_STR: &str = "user/-/state/com.google/read";
20pub const TAG_STARRED_STR: &str = "user/-/state/com.google/starred";
21pub const TAG_READING_LIST: &str = "user/-/state/com.google/reading-list";
22
23pub const GOOGLE_ITEM_PREFIX: &str = "tag:google.com,2005:reader/item/";
24
25impl From<GReaderError> for FeedApiError {
26    fn from(error: GReaderError) -> FeedApiError {
27        match error {
28            GReaderError::ApiLimit => FeedApiError::ApiLimit,
29            GReaderError::Url(e) => FeedApiError::Url(e),
30            GReaderError::Json { source, json } => FeedApiError::Json { source, json },
31            GReaderError::Http(e) => FeedApiError::Network(e),
32            GReaderError::GReader(greader_error) => {
33                let error_list = greader_error.errors.iter().fold("".into(), |prev, next| format!("{prev}\n{next}"));
34                FeedApiError::Api {
35                    message: format!("GReader Error:\n{error_list}"),
36                }
37            }
38            GReaderError::BadRequest => FeedApiError::Api {
39                message: GReaderError::BadRequest.to_string(),
40            },
41            GReaderError::Input => FeedApiError::Api {
42                message: GReaderError::Input.to_string(),
43            },
44            GReaderError::Token => FeedApiError::Api {
45                message: GReaderError::Token.to_string(),
46            },
47            GReaderError::TokenExpired => FeedApiError::Api {
48                message: GReaderError::TokenExpired.to_string(),
49            },
50            GReaderError::AccessDenied => FeedApiError::Auth,
51            GReaderError::Parse => FeedApiError::Api {
52                message: GReaderError::Parse.to_string(),
53            },
54            GReaderError::NotLoggedIn => FeedApiError::Login,
55            GReaderError::Other(msg) => FeedApiError::Api { message: msg },
56        }
57    }
58}
59
60pub struct ArticleQuery<'a> {
61    pub stream_id: Option<&'a str>,
62    pub read: Option<Read>,
63    pub marked: Option<Marked>,
64    pub tag_ids: &'a HashSet<TagID>,
65    pub limit: Option<u64>,
66    pub last_sync: Option<i64>,
67}
68
69pub struct GReaderUtil;
70
71impl GReaderUtil {
72    pub fn generate_tag_id(user_id: Option<&str>, name: &str) -> String {
73        let user_id = user_id.unwrap_or("-");
74        format!("user/{user_id}/label/{name}")
75    }
76
77    pub fn root_id(user_id: Option<&str>) -> String {
78        let user_id = user_id.unwrap_or("-");
79        format!("user/{user_id}/state/com.google/root")
80    }
81
82    pub fn convert_category(
83        category: GCategory,
84        index: i32,
85        taggings: Option<&Taggings>,
86        prefs: Option<&StreamPrefs>,
87        user_id: Option<&str>,
88    ) -> (Category, CategoryMapping) {
89        let sort_index = if let Some(tagging) = Self::find_tagging(taggings, &category.id) {
90            Self::convert_sortid(Some(&Self::root_id(user_id)), tagging.sortid.as_deref(), prefs)
91        } else {
92            None
93        };
94
95        let GCategory { id, label } = category;
96        let category_id = CategoryID::new(&id);
97        let category = Category {
98            category_id: category_id.clone(),
99            label,
100        };
101        let category_mapping = CategoryMapping {
102            parent_id: NEWSFLASH_TOPLEVEL.clone(),
103            category_id,
104            sort_index: if sort_index.is_none() { Some(index) } else { sort_index },
105        };
106
107        (category, category_mapping)
108    }
109
110    pub fn find_tagging<'a>(taggings: Option<&'a Taggings>, id: &str) -> Option<&'a GTagging> {
111        if let Some(taggings) = taggings {
112            taggings.tags.iter().find(|t| t.id == id)
113        } else {
114            None
115        }
116    }
117
118    pub fn convert_category_vec(
119        mut categories: Vec<GFeed>,
120        taggings: Option<&Taggings>,
121        prefs: Option<&StreamPrefs>,
122        user_id: Option<&str>,
123    ) -> (Vec<Category>, Vec<CategoryMapping>) {
124        // skip duplicate categories by collecting all ids
125        let mut category_ids: HashSet<CategoryID> = HashSet::new();
126
127        // normalize index by only issuing a new index when a new unique category is found
128        let mut index = 0;
129
130        categories
131            .drain(..)
132            .filter_map(|feed| {
133                let feed_categories: Vec<(Category, CategoryMapping)> = feed
134                    .categories
135                    .into_iter()
136                    .filter_map(|c| {
137                        let (category, category_mapping) = Self::convert_category(c, index, taggings, prefs, user_id);
138                        if category_ids.contains(&category.category_id) {
139                            None
140                        } else {
141                            index += 1;
142                            category_ids.insert(category.category_id.clone());
143                            Some((category, category_mapping))
144                        }
145                    })
146                    .collect();
147
148                if feed_categories.is_empty() { None } else { Some(feed_categories) }
149            })
150            .flatten()
151            .unzip()
152    }
153
154    pub fn convert_tag_list(taggings: Taggings, categories: &[Category]) -> Vec<Tag> {
155        let Taggings { tags: tag_list } = taggings;
156
157        tag_list
158            .into_iter()
159            .filter_map(|tagging| {
160                let GTagging {
161                    id: tag_id,
162                    r#type: _,
163                    sortid,
164                    unread_count: _,
165                    unseen_count: _,
166                } = tagging;
167
168                // tags and folder both have the structure 'user/$USERID/label/$NAME'
169                if tag_id.contains("/label/") {
170                    // if 'tag_id' is a category ignore it
171                    if categories.iter().any(|c| c.category_id.as_str() == tag_id) {
172                        None
173                    } else {
174                        let label = tag_id
175                            .split('/')
176                            .next_back()
177                            .map(|s| s.to_owned())
178                            .unwrap_or_else(|| "Missing Label".into());
179
180                        let sort_index = sortid
181                            .and_then(|str| hex::decode(str).ok())
182                            .and_then(|buf| buf.try_into().ok())
183                            .map(|buf| u32::from_le_bytes(buf) as i32);
184
185                        Some(Tag {
186                            tag_id: TagID::from_owned(tag_id),
187                            label,
188                            color: None,
189                            sort_index,
190                        })
191                    }
192                } else {
193                    None
194                }
195            })
196            .collect()
197    }
198
199    pub fn convert_feed(feed: GFeed) -> Feed {
200        let GFeed {
201            id,
202            title,
203            categories: _,
204            url,
205            html_url,
206            icon_url,
207            sortid: _,
208        } = feed;
209
210        Feed {
211            feed_id: FeedID::new(&id),
212            label: title,
213            website: Url::parse(&html_url).ok(),
214            feed_url: Url::parse(&url).ok(),
215            icon_url: Url::parse(&icon_url).ok(),
216            error_count: 0,
217            error_message: None,
218        }
219    }
220
221    pub fn convert_sortid(parent_id: Option<&str>, sort_id: Option<&str>, prefs: Option<&StreamPrefs>) -> Option<i32> {
222        if let (Some(prefs), Some(parent_id)) = (prefs, parent_id) {
223            if let Some(prefs) = prefs.streamprefs.get(parent_id) {
224                let mut sort_index = None;
225                for pref in prefs {
226                    if pref.id != "subscription-ordering" {
227                        continue;
228                    }
229
230                    let cahrs = pref.value.chars().collect::<Vec<char>>();
231
232                    sort_index = cahrs
233                        .chunks(8)
234                        .map(|c| c.iter().collect::<String>())
235                        .enumerate()
236                        .find(|(_i, id)| sort_id == Some(id))
237                        .map(|(i, _id)| i as i32);
238
239                    break;
240                }
241
242                sort_index
243            } else {
244                None
245            }
246        } else {
247            None
248        }
249    }
250
251    pub fn convert_feed_vec(mut feeds: Vec<GFeed>, prefs: Option<&StreamPrefs>) -> (Vec<Feed>, Vec<FeedMapping>) {
252        let mut mappings: Vec<FeedMapping> = Vec::new();
253        let feeds = feeds
254            .drain(..)
255            .enumerate()
256            .map(|(i, f)| {
257                for category in &f.categories {
258                    let parent_id = f.categories.first().map(|category| category.id.clone());
259                    let sort_index = Self::convert_sortid(parent_id.as_deref(), f.sortid.as_deref(), prefs);
260
261                    mappings.push(FeedMapping {
262                        feed_id: FeedID::new(&f.id.to_string()),
263                        category_id: CategoryID::new(&category.id.to_string()),
264                        sort_index: if sort_index.is_none() { Some(i as i32) } else { sort_index },
265                    });
266                }
267
268                Self::convert_feed(f)
269            })
270            .collect();
271
272        (feeds, mappings)
273    }
274
275    pub async fn convert_stream(stream: Stream, tag_ids: &HashSet<TagID>, portal: Arc<Box<dyn Portal>>) -> StreamConversionResult {
276        let Stream {
277            direction: _,
278            id: _,
279            title: _,
280            description: _,
281            own: _,
282            updated: _,
283            updated_usec: _,
284            items,
285            author: _,
286            continuation: _,
287        } = stream;
288
289        GReaderUtil::convert_item_vec(items, tag_ids, portal).await
290    }
291
292    pub async fn convert_item_vec(articles: Vec<Item>, tag_ids: &HashSet<TagID>, portal: Arc<Box<dyn Portal>>) -> StreamConversionResult {
293        let enclosures: Arc<RwLock<Vec<Enclosure>>> = Arc::new(RwLock::new(Vec::new()));
294        let taggings: Arc<RwLock<Vec<Tagging>>> = Arc::new(RwLock::new(Vec::new()));
295        let headlines: Arc<RwLock<Vec<Headline>>> = Arc::new(RwLock::new(Vec::new()));
296
297        let tasks = articles
298            .into_iter()
299            .map(|item| {
300                let enclosures = enclosures.clone();
301                let taggings = taggings.clone();
302                let headlines = headlines.clone();
303                let portal = portal.clone();
304                let tag_ids = tag_ids.clone();
305
306                tokio::spawn(async move {
307                    let Item {
308                        origin,
309                        updated: _,
310                        id,
311                        categories,
312                        author,
313                        alternate,
314                        timestamp_usec: _,
315                        crawl_time_msec: _,
316                        published,
317                        title,
318                        content,
319                        enclosure,
320                    } = item;
321
322                    let article_id = ArticleID::new(&Self::convert_google_item_id(id));
323                    let article_exists_locally = portal.get_article_exists(&article_id).unwrap_or(false);
324
325                    let unread = if categories.iter().any(|c| c.ends_with("/read")) {
326                        Read::Read
327                    } else {
328                        Read::Unread
329                    };
330                    let marked = if categories.iter().any(|c| c.ends_with("/starred")) {
331                        Marked::Marked
332                    } else {
333                        Marked::Unmarked
334                    };
335
336                    let mut article_taggings = categories
337                        .iter()
338                        .filter_map(|c| {
339                            let tag_id = TagID::new(c);
340                            if tag_ids.contains(&tag_id) {
341                                Some(Tagging {
342                                    tag_id,
343                                    article_id: article_id.clone(),
344                                })
345                            } else {
346                                None
347                            }
348                        })
349                        .collect();
350
351                    taggings.write().await.append(&mut article_taggings);
352
353                    // already in db
354                    // -> only need to update read/marked status
355                    // FIXME: can we check if article was recrawled and updated its content?
356                    if article_exists_locally {
357                        headlines.write().await.push(Headline { article_id, unread, marked });
358                        return None;
359                    }
360
361                    let url = alternate.first().and_then(|alt| Url::parse(&alt.href).ok());
362                    let (html, direction) = if let Some(content) = content {
363                        let Summary { content: html, direction } = content;
364                        let direction = direction.map(|d| if d == "rtl" { Direction::RightToLeft } else { Direction::LeftToRight });
365                        (Some(html), direction)
366                    } else {
367                        (None, None)
368                    };
369
370                    let thumbnail_url = if let Some(enclosure) = enclosure {
371                        let thumbnail_url = enclosure.iter().find_map(|e| {
372                            let is_image_type = e._type.as_ref().map(|t| t.starts_with("image/")).unwrap_or(false);
373                            let is_image_href = e.href.ends_with(".jpeg") || e.href.ends_with(".jpg") || e.href.ends_with(".png");
374
375                            if is_image_type || is_image_href { Some(e.href.clone()) } else { None }
376                        });
377
378                        enclosures.write().await.append(
379                            &mut enclosure
380                                .into_iter()
381                                .filter_map(|e| {
382                                    Url::parse(&e.href).ok().map(|url| Enclosure {
383                                        article_id: article_id.clone(),
384                                        url,
385                                        mime_type: e._type,
386                                        duration: e.length.map(|length| length as i32),
387                                        title: None,
388                                        position: None,
389                                        summary: None,
390                                        thumbnail_url: None,
391                                        filesize: None,
392                                        width: None,
393                                        height: None,
394                                        framerate: None,
395                                        alternative: None,
396                                        is_default: false,
397                                    })
398                                })
399                                .collect(),
400                        );
401
402                        thumbnail_url
403                    } else if let Some(html) = html.as_deref() {
404                        crate::util::thumbnail::extract_thumbnail(html)
405                    } else {
406                        None
407                    };
408
409                    let plain_text = if article_exists_locally {
410                        None
411                    } else {
412                        html.as_deref().map(util::html2text::html2text)
413                    };
414
415                    let summary = plain_text.as_deref().map(util::html2text::text2summary);
416
417                    Some(FatArticle {
418                        article_id,
419                        title: title.map(|t| match escaper::decode_html(&t) {
420                            Ok(title) => title,
421                            Err(_error) => {
422                                // This warning freaks users out for some reason
423                                // warn!("Error {:?} at character {}", error.kind, error.position);
424                                t
425                            }
426                        }),
427                        author,
428                        feed_id: FeedID::new(&origin.stream_id),
429                        url,
430                        date: util::timestamp_to_datetime(published),
431                        synced: Utc::now(),
432                        updated: None,
433                        html,
434                        direction,
435                        summary,
436                        plain_text,
437                        scraped_content: None,
438                        unread,
439                        marked,
440                        thumbnail_url,
441                    })
442                })
443            })
444            .collect::<Vec<_>>();
445
446        let articles = future::join_all(tasks).await.into_iter().filter_map(|res| res.ok().flatten()).collect();
447
448        StreamConversionResult {
449            articles,
450            headlines: Arc::into_inner(headlines).map(|e| e.into_inner()).unwrap_or_default(),
451            taggings: Arc::into_inner(taggings).map(|e| e.into_inner()).unwrap_or_default(),
452            enclosures: Arc::into_inner(enclosures).map(|e| e.into_inner()).unwrap_or_default(),
453        }
454    }
455
456    fn convert_google_item_id(long_id: String) -> String {
457        if long_id.starts_with(GOOGLE_ITEM_PREFIX)
458            && let Some(pos) = long_id.rfind('/')
459        {
460            let hex_id = &long_id[pos + 1..];
461            let dec_id = i64::from_str_radix(hex_id, 16).unwrap();
462
463            return dec_id.to_string();
464        }
465
466        long_id
467    }
468
469    pub async fn get_articles(
470        api: &GReaderApi,
471        client: &Client,
472        portal: Arc<Box<dyn Portal>>,
473        query: ArticleQuery<'_>,
474    ) -> Result<StreamConversionResult, GReaderError> {
475        let mut continuation: Option<String> = None;
476        let mut articles = Vec::new();
477        let mut headlines = Vec::new();
478        let mut taggings = Vec::new();
479        let mut enclosures = Vec::new();
480        let exclude = query.read.and_then(|r| if r == Read::Unread { Some(TAG_READ_STR) } else { None });
481        let include = query.read.and_then(|r| if r == Read::Read { Some(TAG_READ_STR) } else { None });
482        let include = query.marked.and_then(|m| {
483            if m == Marked::Marked {
484                Some(include.unwrap_or(TAG_STARRED_STR))
485            } else {
486                None
487            }
488        });
489
490        let amount = query.limit.map(|l| u64::max(l, 1000)).unwrap_or(1000);
491        let mut missing = query.limit.map(|l| l.saturating_sub(1000)).unwrap_or(u64::MAX);
492
493        loop {
494            let stream = api
495                .stream_contents(
496                    query.stream_id,
497                    false,
498                    Some(amount),
499                    continuation.as_deref(),
500                    exclude,
501                    include,
502                    query.last_sync,
503                    None,
504                    client,
505                )
506                .await?;
507
508            let stream_continuation = stream.continuation.clone();
509            let mut result = GReaderUtil::convert_stream(stream, query.tag_ids, portal.clone()).await;
510            articles.append(&mut result.articles);
511            headlines.append(&mut result.headlines);
512            taggings.append(&mut result.taggings);
513            enclosures.append(&mut result.enclosures);
514
515            if stream_continuation.is_none() {
516                break;
517            }
518
519            if missing == 0 {
520                break;
521            }
522
523            missing -= amount;
524            continuation = stream_continuation;
525        }
526
527        Ok(StreamConversionResult {
528            articles,
529            headlines,
530            taggings,
531            enclosures,
532        })
533    }
534
535    pub async fn get_article_ids(
536        api: &GReaderApi,
537        client: &Client,
538        stream_id: Option<&str>,
539        read: Option<Read>,
540        marked: Option<Marked>,
541        chunk_size: Option<u64>,
542    ) -> Result<Vec<ItemId>, GReaderError> {
543        let mut continuation: Option<String> = None;
544        let mut article_ids = Vec::new();
545        let exclude = read.and_then(|r| if r == Read::Unread { Some(TAG_READ_STR) } else { None });
546        let include = read.and_then(|r| if r == Read::Read { Some(TAG_READ_STR) } else { None });
547        let include = marked.and_then(|m| {
548            if m == Marked::Marked {
549                Some(include.unwrap_or(TAG_STARRED_STR))
550            } else {
551                None
552            }
553        });
554        let chunk_size = chunk_size.unwrap_or(1000);
555
556        loop {
557            let stream = api
558                .items_ids(
559                    stream_id,
560                    Some(chunk_size),
561                    false,
562                    continuation.as_deref(),
563                    exclude,
564                    include,
565                    None,
566                    None,
567                    client,
568                )
569                .await?;
570
571            let ItemRefs { item_refs, continuation: c } = stream;
572            if let Some(mut item_refs) = item_refs {
573                article_ids.append(&mut item_refs);
574            }
575
576            if c.is_none() {
577                break;
578            }
579
580            continuation.clone_from(&c);
581        }
582
583        Ok(article_ids)
584    }
585}