1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
//! Web scraper integration for [Flows.network](https://flows.network)
//!
//! # Quick Start
//!
//! Below examples show a lambda service that
//! responds with the text content of a web page
//! for a url passed as query parameter.
//!
//! ```
//! use std::collections::HashMap;
//!
//! use lambda_flows::{request_received, send_response};
//! use serde_json::Value;
//! use web_scraper_flows::get_page_text;
//!
//! #[no_mangle]
//! #[tokio::main(flavor = "current_thread")]
//! pub async fn run() {
//!     request_received(handler).await;
//! }
//!
//! async fn handler(qry: HashMap<String, Value>, _body: Vec<u8>) {
//!     let url = qry.get("url").expect("No url provided").as_str().unwrap();
//!
//!     match get_page_text(url).await {
//!         Ok(text) => send_response(
//!             200,
//!             vec![(
//!                 String::from("content-type"),
//!                 String::from("text/plain; charset=UTF-8"),
//!             )],
//!             text.as_bytes().to_vec(),
//!         ),
//!         Err(e) => send_response(
//!             400,
//!             vec![(
//!                 String::from("content-type"),
//!                 String::from("text/plain; charset=UTF-8"),
//!             )],
//!             e.as_bytes().to_vec(),
//!         ),
//!     }
//! }
//! ```
//!

use http_req::{
    request::{Method, Request},
    uri::Uri,
};
use lazy_static::lazy_static;
use urlencoding::encode;

lazy_static! {
    static ref WEB_SCRAPER_API_PREFIX: String = String::from(
        std::option_env!("WEB_SCRAPER_API_PREFIX")
            .unwrap_or("https://web-scraper.flows.network/api")
    );
}

extern "C" {
    // Return the user id of the flows platform
    fn get_flows_user(p: *mut u8) -> i32;

    // Return the flow id
    fn get_flow_id(p: *mut u8) -> i32;
}

unsafe fn _get_flows_user() -> String {
    let mut flows_user = Vec::<u8>::with_capacity(100);
    let c = get_flows_user(flows_user.as_mut_ptr());
    flows_user.set_len(c as usize);
    String::from_utf8(flows_user).unwrap()
}

unsafe fn _get_flow_id() -> String {
    let mut flow_id = Vec::<u8>::with_capacity(100);
    let c = get_flow_id(flow_id.as_mut_ptr());
    if c == 0 {
        panic!("Failed to get flow id");
    }
    flow_id.set_len(c as usize);
    String::from_utf8(flow_id).unwrap()
}

/// Return the page's text content
///
pub async fn get_page_text(url: &str) -> Result<String, String> {
    unsafe {
        let flows_user = _get_flows_user();
        let flow_id = _get_flow_id();

        let mut writer = Vec::new();
        let uri = format!(
            "{}/{}/{}/get_page_text?url={}",
            WEB_SCRAPER_API_PREFIX.as_str(),
            flows_user,
            flow_id,
            encode(url),
        );
        let uri = Uri::try_from(uri.as_str()).unwrap();
        match Request::new(&uri).method(Method::GET).send(&mut writer) {
            Ok(res) => {
                if res.status_code().is_success() {
                    Ok(String::from_utf8_lossy(&writer).into_owned())
                } else {
                    let err = String::from_utf8_lossy(&writer);
                    Err(err.into_owned())
                }
            }
            Err(e) => Err(e.to_string()),
        }
    }
}