only_scraper 0.1.2

Only scrape webpages
Documentation
<!DOCTYPE html><html lang="en"><head><meta charset="utf-8"><meta name="viewport" content="width=device-width, initial-scale=1.0"><meta name="generator" content="rustdoc"><meta name="description" content="Source of the Rust file `src/chrome/application.rs`."><title>application.rs - source</title><link rel="preload" as="font" type="font/woff2" crossorigin href="../../../static.files/SourceSerif4-Regular-46f98efaafac5295.ttf.woff2"><link rel="preload" as="font" type="font/woff2" crossorigin href="../../../static.files/FiraSans-Regular-018c141bf0843ffd.woff2"><link rel="preload" as="font" type="font/woff2" crossorigin href="../../../static.files/FiraSans-Medium-8f9a781e4970d388.woff2"><link rel="preload" as="font" type="font/woff2" crossorigin href="../../../static.files/SourceCodePro-Regular-562dcc5011b6de7d.ttf.woff2"><link rel="preload" as="font" type="font/woff2" crossorigin href="../../../static.files/SourceCodePro-Semibold-d899c5a5c4aeb14a.ttf.woff2"><link rel="stylesheet" href="../../../static.files/normalize-76eba96aa4d2e634.css"><link rel="stylesheet" href="../../../static.files/rustdoc-9ee3a5e31a2afa3e.css"><meta name="rustdoc-vars" data-root-path="../../../" data-static-root-path="../../../static.files/" data-current-crate="docs" data-themes="" data-resource-suffix="" data-rustdoc-version="1.75.0 (82e1608df 2023-12-21)" data-channel="1.75.0" data-search-js="search-8fbf244ebcf71464.js" data-settings-js="settings-74424d7eec62a23e.js" ><script src="../../../static.files/storage-fec3eaa3851e447d.js"></script><script defer src="../../../static.files/src-script-3280b574d94e47b4.js"></script><script defer src="../../../src-files.js"></script><script defer src="../../../static.files/main-9dd44ab47b99a0fb.js"></script><noscript><link rel="stylesheet" href="../../../static.files/noscript-5d8b3c7633ad77ba.css"></noscript><link rel="alternate icon" type="image/png" href="../../../static.files/favicon-16x16-8b506e7a72182f1c.png"><link rel="alternate icon" type="image/png" href="../../../static.files/favicon-32x32-422f7d1d52889060.png"><link rel="icon" type="image/svg+xml" href="../../../static.files/favicon-2c020d218678b618.svg"></head><body class="rustdoc src"><!--[if lte IE 11]><div class="warning">This old browser is unsupported and will most likely display funky things.</div><![endif]--><nav class="sidebar"></nav><main><nav class="sub"><form class="search-form"><span></span><input class="search-input" name="search" aria-label="Run search in the documentation" autocomplete="off" spellcheck="false" placeholder="Click or press ‘S’ to search, ‘?’ for more options…" type="search"><div id="help-button" title="help" tabindex="-1"><a href="../../../help.html">?</a></div><div id="settings-menu" tabindex="-1"><a href="../../../settings.html" title="settings"><img width="22" height="22" alt="Change settings" src="../../../static.files/wheel-7b819b6101059cd0.svg"></a></div></form></nav><section id="main-content" class="content"><div class="example-wrap"><div data-nosnippet><pre class="src-line-numbers"><a href="#1" id="1">1</a>
<a href="#2" id="2">2</a>
<a href="#3" id="3">3</a>
<a href="#4" id="4">4</a>
<a href="#5" id="5">5</a>
<a href="#6" id="6">6</a>
<a href="#7" id="7">7</a>
<a href="#8" id="8">8</a>
<a href="#9" id="9">9</a>
<a href="#10" id="10">10</a>
<a href="#11" id="11">11</a>
<a href="#12" id="12">12</a>
<a href="#13" id="13">13</a>
<a href="#14" id="14">14</a>
<a href="#15" id="15">15</a>
<a href="#16" id="16">16</a>
<a href="#17" id="17">17</a>
<a href="#18" id="18">18</a>
<a href="#19" id="19">19</a>
<a href="#20" id="20">20</a>
<a href="#21" id="21">21</a>
<a href="#22" id="22">22</a>
<a href="#23" id="23">23</a>
<a href="#24" id="24">24</a>
<a href="#25" id="25">25</a>
<a href="#26" id="26">26</a>
<a href="#27" id="27">27</a>
<a href="#28" id="28">28</a>
<a href="#29" id="29">29</a>
<a href="#30" id="30">30</a>
<a href="#31" id="31">31</a>
<a href="#32" id="32">32</a>
<a href="#33" id="33">33</a>
<a href="#34" id="34">34</a>
<a href="#35" id="35">35</a>
<a href="#36" id="36">36</a>
<a href="#37" id="37">37</a>
<a href="#38" id="38">38</a>
<a href="#39" id="39">39</a>
<a href="#40" id="40">40</a>
<a href="#41" id="41">41</a>
<a href="#42" id="42">42</a>
<a href="#43" id="43">43</a>
<a href="#44" id="44">44</a>
<a href="#45" id="45">45</a>
<a href="#46" id="46">46</a>
<a href="#47" id="47">47</a>
<a href="#48" id="48">48</a>
<a href="#49" id="49">49</a>
<a href="#50" id="50">50</a>
<a href="#51" id="51">51</a>
<a href="#52" id="52">52</a>
<a href="#53" id="53">53</a>
<a href="#54" id="54">54</a>
<a href="#55" id="55">55</a>
<a href="#56" id="56">56</a>
<a href="#57" id="57">57</a>
<a href="#58" id="58">58</a>
<a href="#59" id="59">59</a>
<a href="#60" id="60">60</a>
<a href="#61" id="61">61</a>
<a href="#62" id="62">62</a>
<a href="#63" id="63">63</a>
<a href="#64" id="64">64</a>
<a href="#65" id="65">65</a>
<a href="#66" id="66">66</a>
<a href="#67" id="67">67</a>
<a href="#68" id="68">68</a>
<a href="#69" id="69">69</a>
<a href="#70" id="70">70</a>
<a href="#71" id="71">71</a>
<a href="#72" id="72">72</a>
<a href="#73" id="73">73</a>
<a href="#74" id="74">74</a>
<a href="#75" id="75">75</a>
<a href="#76" id="76">76</a>
<a href="#77" id="77">77</a>
<a href="#78" id="78">78</a>
<a href="#79" id="79">79</a>
<a href="#80" id="80">80</a>
<a href="#81" id="81">81</a>
<a href="#82" id="82">82</a>
<a href="#83" id="83">83</a>
<a href="#84" id="84">84</a>
<a href="#85" id="85">85</a>
<a href="#86" id="86">86</a>
<a href="#87" id="87">87</a>
<a href="#88" id="88">88</a>
<a href="#89" id="89">89</a>
<a href="#90" id="90">90</a>
<a href="#91" id="91">91</a>
<a href="#92" id="92">92</a>
<a href="#93" id="93">93</a>
<a href="#94" id="94">94</a>
<a href="#95" id="95">95</a>
<a href="#96" id="96">96</a>
<a href="#97" id="97">97</a>
<a href="#98" id="98">98</a>
<a href="#99" id="99">99</a>
<a href="#100" id="100">100</a>
<a href="#101" id="101">101</a>
<a href="#102" id="102">102</a>
<a href="#103" id="103">103</a>
<a href="#104" id="104">104</a>
<a href="#105" id="105">105</a>
<a href="#106" id="106">106</a>
<a href="#107" id="107">107</a>
<a href="#108" id="108">108</a>
<a href="#109" id="109">109</a>
<a href="#110" id="110">110</a>
<a href="#111" id="111">111</a>
<a href="#112" id="112">112</a>
<a href="#113" id="113">113</a>
<a href="#114" id="114">114</a>
<a href="#115" id="115">115</a>
<a href="#116" id="116">116</a>
<a href="#117" id="117">117</a>
<a href="#118" id="118">118</a>
<a href="#119" id="119">119</a>
<a href="#120" id="120">120</a>
<a href="#121" id="121">121</a>
<a href="#122" id="122">122</a>
<a href="#123" id="123">123</a>
<a href="#124" id="124">124</a>
<a href="#125" id="125">125</a>
<a href="#126" id="126">126</a>
<a href="#127" id="127">127</a>
<a href="#128" id="128">128</a>
<a href="#129" id="129">129</a>
<a href="#130" id="130">130</a>
<a href="#131" id="131">131</a>
<a href="#132" id="132">132</a>
<a href="#133" id="133">133</a>
<a href="#134" id="134">134</a>
<a href="#135" id="135">135</a>
<a href="#136" id="136">136</a>
<a href="#137" id="137">137</a>
<a href="#138" id="138">138</a>
<a href="#139" id="139">139</a>
<a href="#140" id="140">140</a>
</pre></div><pre class="rust"><code><span class="kw">use </span>std::io::{<span class="self">self</span>, <span class="prelude-ty">Result</span>};
<span class="kw">use </span>std::path::Path;
<span class="kw">use </span>std::path::PathBuf;
<span class="kw">use </span>std::process::{Command, Stdio};
<span class="kw">use </span>std::str;
<span class="kw">use </span>std::{env, fs};

<span class="kw">const </span>APP_NAME: <span class="kw-2">&amp;</span>str = <span class="string">&quot;only_scrape&quot;</span>;
<span class="kw">const </span>DEFAULT_HOST: <span class="kw-2">&amp;</span>str = <span class="string">&quot;https://storage.googleapis.com&quot;</span>;

<span class="kw">pub fn </span>download_and_install_chrome() -&gt; io::Result&lt;PathBuf&gt; {
    <span class="kw">let </span>install_dir = guess_install_dir()<span class="question-mark">?</span>;
    <span class="kw">let </span>(platform, archive_name, chrome_path) = platform_details();
    <span class="kw">let </span>app_path = install_dir.join(archive_name).join(<span class="kw-2">&amp;</span>chrome_path);

    <span class="kw">if </span>app_path.exists() {
        <span class="kw">return </span><span class="prelude-val">Ok</span>(app_path);
    }

    <span class="kw">let </span>revision = latest_revision(platform)<span class="question-mark">?</span>;

    <span class="kw">let </span>url = <span class="macro">format!</span>(
        <span class="string">&quot;{DEFAULT_HOST}/chromium-browser-snapshots/{platform}/{revision}/{archive_name}.zip&quot;
    </span>);
    <span class="kw">let </span>output_path = install_dir.join(<span class="macro">format!</span>(<span class="string">&quot;{platform}-{revision}.zip&quot;</span>));

    fs::create_dir_all(<span class="kw-2">&amp;</span>install_dir)<span class="question-mark">?</span>;

    download_file(<span class="kw-2">&amp;</span>url, <span class="kw-2">&amp;</span>output_path)<span class="question-mark">?</span>;
    unzip_file(<span class="kw-2">&amp;</span>output_path, <span class="kw-2">&amp;</span>install_dir)<span class="question-mark">?</span>;
    fs::remove_file(<span class="kw-2">&amp;</span>output_path).unwrap_or_else(|<span class="kw">_</span>| <span class="macro">println!</span>(<span class="string">&quot;Failed to delete zip&quot;</span>));

    <span class="prelude-val">Ok</span>(app_path)
}

<span class="kw">fn </span>guess_install_dir() -&gt; <span class="prelude-ty">Result</span>&lt;PathBuf&gt; {
    env::var(<span class="string">&quot;HOME&quot;</span>)
        .or_else(|<span class="kw">_</span>| env::var(<span class="string">&quot;USERPROFILE&quot;</span>))
        .map(PathBuf::from)
        .map(|dir| dir.join(<span class="macro">format!</span>(<span class="string">&quot;.{}&quot;</span>, APP_NAME)))
        .map_err(|<span class="kw">_</span>| io::Error::new(io::ErrorKind::NotFound, <span class="string">&quot;Home directory not found&quot;</span>))
}

<span class="kw">fn </span>platform_details() -&gt; (<span class="kw-2">&amp;</span><span class="lifetime">&#39;static </span>str, <span class="kw-2">&amp;</span><span class="lifetime">&#39;static </span>str, PathBuf) {
    <span class="kw">let </span>(platform, archive_name) = <span class="kw">match </span>(<span class="macro">cfg!</span>(target_os = <span class="string">&quot;linux&quot;</span>), <span class="macro">cfg!</span>(target_arch = <span class="string">&quot;aarch64&quot;</span>))
    {
        (<span class="bool-val">true</span>, <span class="kw">_</span>) =&gt; (<span class="string">&quot;Linux_x64&quot;</span>, <span class="string">&quot;chrome-linux&quot;</span>),
        (<span class="kw">_</span>, <span class="bool-val">true</span>) =&gt; (<span class="string">&quot;Mac_arm&quot;</span>, <span class="string">&quot;chrome-mac&quot;</span>),
        <span class="kw">_ if </span><span class="macro">cfg!</span>(target_os = <span class="string">&quot;macos&quot;</span>) =&gt; (<span class="string">&quot;Mac&quot;</span>, <span class="string">&quot;chrome-mac&quot;</span>),
        <span class="kw">_ if </span><span class="macro">cfg!</span>(target_os = <span class="string">&quot;windows&quot;</span>) =&gt; (<span class="string">&quot;Win_x64&quot;</span>, <span class="string">&quot;chrome-win&quot;</span>),
        <span class="kw">_ </span>=&gt; <span class="macro">panic!</span>(<span class="string">&quot;Unsupported platform&quot;</span>),
    };
    <span class="kw">let </span>chrome_path = <span class="kw">match </span>platform {
        <span class="string">&quot;Linux_x64&quot; </span>=&gt; PathBuf::from(<span class="string">&quot;chrome&quot;</span>),
        <span class="string">&quot;Win_x64&quot; </span>=&gt; PathBuf::from(<span class="string">&quot;chrome.exe&quot;</span>),
        <span class="string">&quot;Mac&quot; </span>| <span class="string">&quot;Mac_arm&quot; </span>=&gt; PathBuf::from(<span class="string">&quot;Chromium.app/Contents/MacOS/Chromium&quot;</span>),
        <span class="kw">_ </span>=&gt; <span class="macro">panic!</span>(<span class="string">&quot;Unsupported platform&quot;</span>),
    };

    (platform, archive_name, chrome_path)
}

<span class="kw">fn </span>latest_revision(platform: <span class="kw-2">&amp;</span>str) -&gt; <span class="prelude-ty">Result</span>&lt;String&gt; {
    <span class="kw">let </span>url = <span class="macro">format!</span>(<span class="string">&quot;{DEFAULT_HOST}/chromium-browser-snapshots/{platform}/LAST_CHANGE&quot;</span>);
    <span class="kw">let </span>output = Command::new(<span class="string">&quot;curl&quot;</span>).arg(<span class="kw-2">&amp;</span>url).output()<span class="question-mark">?</span>;

    <span class="kw">if </span>output.status.success() {
        <span class="kw">let </span>stdout = String::from_utf8(output.stdout)
            .map_err(|<span class="kw">_</span>| io::Error::new(io::ErrorKind::InvalidData, <span class="string">&quot;Invalid UTF-8 sequence&quot;</span>))<span class="question-mark">?</span>;
        <span class="prelude-val">Ok</span>(stdout.trim().to_string())
    } <span class="kw">else </span>{
        <span class="prelude-val">Err</span>(io::Error::new(
            io::ErrorKind::Other,
            <span class="string">&quot;Failed to get latest revision&quot;</span>,
        ))
    }
}

<span class="kw">fn </span>download_file(url: <span class="kw-2">&amp;</span>str, output_path: <span class="kw-2">&amp;</span>PathBuf) -&gt; io::Result&lt;()&gt; {
    Command::new(<span class="string">&quot;curl&quot;</span>)
        .arg(<span class="string">&quot;-o&quot;</span>)
        .arg(output_path)
        .arg(url)
        .status()
        .and_then(|status| {
            <span class="kw">if </span>status.success() {
                <span class="prelude-val">Ok</span>(())
            } <span class="kw">else </span>{
                <span class="prelude-val">Err</span>(io::Error::new(
                    io::ErrorKind::Other,
                    <span class="string">&quot;Failed to download file&quot;</span>,
                ))
            }
        })
}

<span class="kw">fn </span>unzip_file(zip_path: <span class="kw-2">&amp;</span>Path, destination: <span class="kw-2">&amp;</span>Path) -&gt; io::Result&lt;()&gt; {
    <span class="comment">// Determine the command based on the operating system.
    </span><span class="attr">#[cfg(target_os = <span class="string">&quot;windows&quot;</span>)]
    </span><span class="kw">let </span><span class="kw-2">mut </span>command = {
        <span class="kw">let </span><span class="kw-2">mut </span>cmd = Command::new(<span class="string">&quot;tar&quot;</span>);
        cmd.args(<span class="kw-2">&amp;</span>[
            <span class="string">&quot;-xf&quot;</span>,
            zip_path.to_str().unwrap(),
            <span class="string">&quot;-C&quot;</span>,
            destination.to_str().unwrap(),
        ]);
        cmd
    };

    <span class="attr">#[cfg(not(target_os = <span class="string">&quot;windows&quot;</span>))]
    </span><span class="kw">let </span><span class="kw-2">mut </span>command = {
        <span class="kw">let </span><span class="kw-2">mut </span>cmd = Command::new(<span class="string">&quot;unzip&quot;</span>);
        cmd.args(<span class="kw-2">&amp;</span>[
            <span class="string">&quot;-o&quot;</span>,
            zip_path.to_str().unwrap(),
            <span class="string">&quot;-d&quot;</span>,
            destination.to_str().unwrap(),
        ]);
        cmd
    };

    <span class="comment">// Execute the command.
    </span><span class="kw">let </span><span class="kw-2">mut </span>child = command
        .stdout(Stdio::inherit())
        .stderr(Stdio::inherit())
        .spawn()<span class="question-mark">?</span>;

    <span class="comment">// Wait for the command to complete.
    </span><span class="kw">let </span>output = child.wait()<span class="question-mark">?</span>;

    <span class="kw">if </span>output.success() {
        <span class="prelude-val">Ok</span>(())
    } <span class="kw">else </span>{
        <span class="prelude-val">Err</span>(io::Error::new(
            io::ErrorKind::Other,
            <span class="string">&quot;Failed to extract ZIP file&quot;</span>,
        ))
    }
}
</code></pre></div></section></main></body></html>