Skip to main content

hashtree_cli/
pwa.rs

1use anyhow::{anyhow, Context, Result};
2use reqwest::{Client, Url};
3use serde::Serialize;
4use serde_json::Value;
5use std::collections::{BTreeSet, HashMap, HashSet};
6use std::path::Path;
7
8use crate::storage::HashtreeStore;
9use hashtree_core::{nhash_encode, Cid, DirEntry, HashTree, HashTreeConfig, LinkType};
10
11#[derive(Debug, Clone, Serialize, PartialEq, Eq)]
12#[serde(rename_all = "camelCase")]
13pub struct PwaShortcut {
14    pub name: String,
15    pub url: String,
16}
17
18#[derive(Debug, Clone, Serialize, PartialEq, Eq)]
19#[serde(rename_all = "camelCase")]
20pub struct PwaProtocolHandler {
21    pub protocol: String,
22    pub url: String,
23}
24
25#[derive(Debug, Clone, Serialize, PartialEq, Eq)]
26#[serde(rename_all = "camelCase")]
27pub struct InstalledSitePwa {
28    pub name: String,
29    pub launch_url: String,
30    pub icon_url: Option<String>,
31    pub source_app_id: Option<String>,
32    pub source_url: String,
33    pub source_manifest_url: String,
34    pub description: Option<String>,
35    pub display_mode: Option<String>,
36    pub scope_url: Option<String>,
37    pub shortcuts: Vec<PwaShortcut>,
38    pub protocol_handlers: Vec<PwaProtocolHandler>,
39}
40
41#[derive(Debug, Clone)]
42struct PwaAsset {
43    path: String,
44    data: Vec<u8>,
45}
46
47#[derive(Debug, Clone)]
48struct AssetReference {
49    raw_value: String,
50    resolved_url: Url,
51}
52
53#[derive(Debug, Clone)]
54struct FetchedPwa {
55    name: String,
56    source_app_id: Option<String>,
57    source_url: String,
58    source_manifest_url: String,
59    description: Option<String>,
60    display_mode: Option<String>,
61    scope_reference: Option<String>,
62    launch_reference: String,
63    icon_path: Option<String>,
64    shortcuts: Vec<PwaShortcut>,
65    protocol_handlers: Vec<PwaProtocolHandler>,
66    assets: Vec<PwaAsset>,
67}
68
69pub async fn install_site_pwa_to_store(
70    store: &HashtreeStore,
71    url: &str,
72) -> Result<InstalledSitePwa> {
73    let fetched = fetch_pwa(url).await.context("fetch installable PWA")?;
74    let root_cid = store_pwa_assets(store, &fetched.assets)
75        .await
76        .context("store PWA in hashtree")?;
77
78    store.pin(&root_cid.hash).context("pin stored PWA")?;
79
80    let nhash = nhash_encode(&root_cid.hash).context("encode stored PWA root")?;
81
82    Ok(InstalledSitePwa {
83        name: fetched.name,
84        launch_url: format!("htree://{nhash}{}", fetched.launch_reference),
85        icon_url: fetched
86            .icon_path
87            .as_ref()
88            .map(|path| format!("htree://{nhash}{}", absolute_tree_path(path))),
89        source_app_id: fetched.source_app_id,
90        source_url: fetched.source_url,
91        source_manifest_url: fetched.source_manifest_url,
92        description: fetched.description,
93        display_mode: fetched.display_mode,
94        scope_url: fetched
95            .scope_reference
96            .as_ref()
97            .map(|scope_reference| format!("htree://{nhash}{scope_reference}")),
98        shortcuts: fetched
99            .shortcuts
100            .into_iter()
101            .map(|shortcut| PwaShortcut {
102                name: shortcut.name,
103                url: format!("htree://{nhash}{}", shortcut.url),
104            })
105            .collect(),
106        protocol_handlers: fetched
107            .protocol_handlers
108            .into_iter()
109            .map(|handler| PwaProtocolHandler {
110                protocol: handler.protocol,
111                url: format!("htree://{nhash}{}", handler.url),
112            })
113            .collect(),
114    })
115}
116
117pub async fn cache_bookmark_icon_to_store(
118    store: &HashtreeStore,
119    source_url: Option<&str>,
120    source_manifest_url: Option<&str>,
121    icon_url: Option<&str>,
122) -> Result<Option<String>> {
123    let client = build_reqwest_client()?;
124
125    match cache_manifest_icon_to_store(store, &client, source_url, source_manifest_url).await {
126        Ok(Some(cached_icon)) => return Ok(Some(cached_icon)),
127        Ok(None) => {}
128        Err(error) => {
129            tracing::warn!("Failed to cache manifest-derived bookmark icon: {}", error);
130        }
131    }
132
133    let Some(icon_url) = icon_url.filter(|value| is_http_url(value)) else {
134        return Ok(None);
135    };
136    match cache_direct_icon_to_store(store, &client, icon_url).await {
137        Ok(cached_icon) => Ok(Some(cached_icon)),
138        Err(error) => {
139            tracing::warn!(
140                "Failed to cache direct bookmark icon {}: {}",
141                icon_url,
142                error
143            );
144            Ok(None)
145        }
146    }
147}
148
149fn build_reqwest_client() -> Result<Client> {
150    Client::builder()
151        .redirect(reqwest::redirect::Policy::limited(10))
152        .build()
153        .context("build reqwest client")
154}
155
156async fn fetch_pwa(url: &str) -> Result<FetchedPwa> {
157    let client = build_reqwest_client()?;
158
159    let html_response = client
160        .get(url)
161        .send()
162        .await
163        .with_context(|| format!("fetch page {url}"))?;
164    let html_response = html_response
165        .error_for_status()
166        .with_context(|| format!("fetch page {url}"))?;
167    let source_url = html_response.url().to_string();
168    let base_url = html_response.url().clone();
169    let original_html = html_response
170        .text()
171        .await
172        .with_context(|| format!("read page body {source_url}"))?;
173
174    let html_path = url_to_path(&base_url, &base_url);
175    let manifest_reference = extract_manifest_reference(&original_html, &base_url)
176        .ok_or_else(|| anyhow!("page does not expose a web manifest"))?;
177    let manifest_url = manifest_reference.resolved_url.clone();
178    let manifest_response = client
179        .get(manifest_url.clone())
180        .send()
181        .await
182        .with_context(|| format!("fetch manifest {manifest_url}"))?;
183    let manifest_response = manifest_response
184        .error_for_status()
185        .with_context(|| format!("fetch manifest {manifest_url}"))?;
186    let source_manifest_url = manifest_response.url().to_string();
187    let manifest_url = manifest_response.url().clone();
188    let mut manifest: Value = manifest_response
189        .json()
190        .await
191        .with_context(|| format!("parse manifest JSON {source_manifest_url}"))?;
192
193    let mut fetched_urls = HashSet::new();
194    let mut assets = Vec::new();
195    let mut html_rewrites = Vec::new();
196    let mut queued_assets = BTreeSet::new();
197
198    fetched_urls.insert(source_url.clone());
199    fetched_urls.insert(source_manifest_url.clone());
200
201    let manifest_path = url_to_path(&manifest_url, &base_url);
202    html_rewrites.push((
203        manifest_reference.raw_value,
204        relative_tree_reference(&html_path, &manifest_path),
205    ));
206
207    for link_asset in extract_link_asset_references(&original_html, &base_url) {
208        let asset_path = url_to_path(&link_asset.resolved_url, &base_url);
209        html_rewrites.push((
210            link_asset.raw_value,
211            relative_tree_reference(&html_path, &asset_path),
212        ));
213        queued_assets.insert(link_asset.resolved_url.to_string());
214    }
215    for script in extract_script_references(&original_html, &base_url) {
216        let asset_path = url_to_path(&script.resolved_url, &base_url);
217        html_rewrites.push((
218            script.raw_value,
219            relative_tree_reference(&html_path, &asset_path),
220        ));
221        queued_assets.insert(script.resolved_url.to_string());
222    }
223    for image in extract_image_references(&original_html, &base_url) {
224        let asset_path = url_to_path(&image.resolved_url, &base_url);
225        html_rewrites.push((
226            image.raw_value,
227            relative_tree_reference(&html_path, &asset_path),
228        ));
229        queued_assets.insert(image.resolved_url.to_string());
230    }
231    for asset_url in extract_manifest_resource_urls(&manifest, &manifest_url) {
232        queued_assets.insert(asset_url.to_string());
233    }
234
235    let mut queued_asset_urls: Vec<Url> = queued_assets
236        .into_iter()
237        .filter_map(|value| Url::parse(&value).ok())
238        .collect();
239    let mut queued_asset_set: HashSet<String> = queued_asset_urls
240        .iter()
241        .map(|value| value.to_string())
242        .collect();
243
244    let mut queue_index = 0usize;
245    while queue_index < queued_asset_urls.len() {
246        let asset_url = queued_asset_urls[queue_index].clone();
247        queue_index += 1;
248
249        let discovered = fetch_asset(
250            &client,
251            &base_url,
252            &asset_url,
253            &mut fetched_urls,
254            &mut assets,
255        )
256        .await;
257
258        for nested_url in discovered {
259            if queued_asset_set.insert(nested_url.to_string()) {
260                queued_asset_urls.push(nested_url);
261            }
262        }
263    }
264
265    rewrite_manifest_urls(&mut manifest, &manifest_url, &manifest_path);
266
267    let rewritten_html = rewrite_html_urls(&original_html, &html_rewrites);
268    assets.push(PwaAsset {
269        path: html_path.clone(),
270        data: rewritten_html.into_bytes(),
271    });
272    assets.push(PwaAsset {
273        path: manifest_path.clone(),
274        data: serde_json::to_vec_pretty(&manifest).context("serialize rewritten manifest")?,
275    });
276
277    let launch_reference = manifest_start_reference(&manifest, &manifest_url)
278        .unwrap_or_else(|| absolute_tree_path(&html_path));
279    let icon_path = pick_manifest_icon_path(&manifest, &manifest_url);
280    let source_app_id = manifest_app_id(&manifest, &manifest_url);
281    let description = manifest_description(&manifest);
282    let display_mode = manifest_display_mode(&manifest);
283    let scope_reference = manifest_scope_reference(&manifest, &manifest_url);
284    let shortcuts = manifest_shortcuts(&manifest, &manifest_path);
285    let protocol_handlers = manifest_protocol_handlers(&manifest, &manifest_path);
286    let name = manifest_name(&manifest)
287        .or_else(|| extract_title(&original_html))
288        .unwrap_or_else(|| {
289            Url::parse(&source_url)
290                .ok()
291                .and_then(|value| value.host_str().map(str::to_owned))
292                .unwrap_or_else(|| "Installed Site".to_string())
293        });
294
295    Ok(FetchedPwa {
296        name,
297        source_app_id,
298        source_url,
299        source_manifest_url,
300        description,
301        display_mode,
302        scope_reference,
303        launch_reference,
304        icon_path,
305        shortcuts,
306        protocol_handlers,
307        assets,
308    })
309}
310
311async fn cache_manifest_icon_to_store(
312    store: &HashtreeStore,
313    client: &Client,
314    source_url: Option<&str>,
315    source_manifest_url: Option<&str>,
316) -> Result<Option<String>> {
317    let Some((manifest, manifest_url)) =
318        fetch_manifest_for_icon(client, source_url, source_manifest_url).await?
319    else {
320        return Ok(None);
321    };
322    let Some(icon_url) = pick_manifest_icon_url(&manifest, &manifest_url) else {
323        return Ok(None);
324    };
325    cache_icon_url_to_store(store, client, &icon_url)
326        .await
327        .map(Some)
328}
329
330async fn fetch_manifest_for_icon(
331    client: &Client,
332    source_url: Option<&str>,
333    source_manifest_url: Option<&str>,
334) -> Result<Option<(Value, Url)>> {
335    if let Some(manifest_url) = source_manifest_url.filter(|value| is_http_url(value)) {
336        return fetch_manifest_json(client, manifest_url).await.map(Some);
337    }
338
339    let Some(source_url) = source_url.filter(|value| is_http_url(value)) else {
340        return Ok(None);
341    };
342
343    let html_response = client
344        .get(source_url)
345        .send()
346        .await
347        .with_context(|| format!("fetch page {source_url}"))?;
348    let html_response = html_response
349        .error_for_status()
350        .with_context(|| format!("fetch page {source_url}"))?;
351    let base_url = html_response.url().clone();
352    let html = html_response
353        .text()
354        .await
355        .with_context(|| format!("read page body {}", base_url))?;
356    let Some(manifest_reference) = extract_manifest_reference(&html, &base_url) else {
357        return Ok(None);
358    };
359
360    fetch_manifest_json(client, manifest_reference.resolved_url.as_str())
361        .await
362        .map(Some)
363}
364
365async fn fetch_manifest_json(client: &Client, manifest_url: &str) -> Result<(Value, Url)> {
366    let parsed_manifest_url =
367        Url::parse(manifest_url).with_context(|| format!("parse manifest url {manifest_url}"))?;
368    let manifest_response = client
369        .get(parsed_manifest_url.clone())
370        .send()
371        .await
372        .with_context(|| format!("fetch manifest {parsed_manifest_url}"))?;
373    let manifest_response = manifest_response
374        .error_for_status()
375        .with_context(|| format!("fetch manifest {parsed_manifest_url}"))?;
376    let resolved_manifest_url = manifest_response.url().clone();
377    let manifest: Value = manifest_response
378        .json()
379        .await
380        .with_context(|| format!("parse manifest JSON {}", resolved_manifest_url))?;
381    Ok((manifest, resolved_manifest_url))
382}
383
384async fn cache_direct_icon_to_store(
385    store: &HashtreeStore,
386    client: &Client,
387    icon_url: &str,
388) -> Result<String> {
389    let parsed_icon_url =
390        Url::parse(icon_url).with_context(|| format!("parse icon url {icon_url}"))?;
391    cache_icon_url_to_store(store, client, &parsed_icon_url).await
392}
393
394async fn cache_icon_url_to_store(
395    store: &HashtreeStore,
396    client: &Client,
397    icon_url: &Url,
398) -> Result<String> {
399    if !matches!(icon_url.scheme(), "http" | "https") {
400        return Err(anyhow!("icon URL must use http:// or https://"));
401    }
402
403    let response = client
404        .get(icon_url.clone())
405        .send()
406        .await
407        .with_context(|| format!("fetch icon {icon_url}"))?;
408    let response = response
409        .error_for_status()
410        .with_context(|| format!("fetch icon {icon_url}"))?;
411    let resolved_icon_url = response.url().clone();
412    let content_type = response
413        .headers()
414        .get(reqwest::header::CONTENT_TYPE)
415        .and_then(|value| value.to_str().ok())
416        .unwrap_or_default()
417        .to_ascii_lowercase();
418    let bytes = response
419        .bytes()
420        .await
421        .with_context(|| format!("read icon body {resolved_icon_url}"))?;
422
423    if !looks_like_image_payload(&content_type, &bytes) {
424        return Err(anyhow!("icon response was not an image"));
425    }
426
427    let icon_path = icon_asset_path(&resolved_icon_url, &content_type, &bytes);
428    let root_cid = store_pwa_assets(
429        store,
430        &[PwaAsset {
431            path: icon_path.clone(),
432            data: bytes.to_vec(),
433        }],
434    )
435    .await
436    .context("store bookmark icon in hashtree")?;
437    store
438        .pin(&root_cid.hash)
439        .context("pin cached bookmark icon")?;
440
441    let nhash = nhash_encode(&root_cid.hash).context("encode cached bookmark icon root")?;
442    Ok(format!("htree://{nhash}{}", absolute_tree_path(&icon_path)))
443}
444
445async fn fetch_asset(
446    client: &Client,
447    base_url: &Url,
448    asset_url: &Url,
449    fetched_urls: &mut HashSet<String>,
450    assets: &mut Vec<PwaAsset>,
451) -> Vec<Url> {
452    if !matches!(asset_url.scheme(), "http" | "https") {
453        return Vec::new();
454    }
455    if !fetched_urls.insert(asset_url.to_string()) {
456        return Vec::new();
457    }
458
459    let response = match client.get(asset_url.clone()).send().await {
460        Ok(response) => response,
461        Err(error) => {
462            tracing::warn!("Failed to fetch PWA asset {}: {}", asset_url, error);
463            return Vec::new();
464        }
465    };
466    let response = match response.error_for_status() {
467        Ok(response) => response,
468        Err(error) => {
469            tracing::warn!("Failed to fetch PWA asset {}: {}", asset_url, error);
470            return Vec::new();
471        }
472    };
473
474    let content_type = response
475        .headers()
476        .get(reqwest::header::CONTENT_TYPE)
477        .and_then(|value| value.to_str().ok())
478        .unwrap_or_default()
479        .to_ascii_lowercase();
480    let path = url_to_path(asset_url, base_url);
481
482    if content_type.starts_with("text/css") || path.ends_with(".css") {
483        let css = match response.text().await {
484            Ok(css) => css,
485            Err(error) => {
486                tracing::warn!("Failed to read CSS asset {}: {}", asset_url, error);
487                return Vec::new();
488            }
489        };
490        let nested_urls = extract_css_urls(&css, asset_url);
491        let rewritten_css = rewrite_css_urls(&css, &path, asset_url, base_url);
492        assets.push(PwaAsset {
493            path,
494            data: rewritten_css.into_bytes(),
495        });
496        return nested_urls;
497    }
498
499    if content_type.starts_with("text/html") || path.ends_with(".html") || path.ends_with(".htm") {
500        let html = match response.text().await {
501            Ok(html) => html,
502            Err(error) => {
503                tracing::warn!("Failed to read HTML asset {}: {}", asset_url, error);
504                return Vec::new();
505            }
506        };
507        let (rewritten_html, nested_urls) =
508            rewrite_html_asset_urls(&html, &path, asset_url, base_url);
509        assets.push(PwaAsset {
510            path,
511            data: rewritten_html.into_bytes(),
512        });
513        return nested_urls;
514    }
515
516    match response.bytes().await {
517        Ok(bytes) => {
518            assets.push(PwaAsset {
519                path,
520                data: bytes.to_vec(),
521            });
522        }
523        Err(error) => {
524            tracing::warn!("Failed to read PWA asset {}: {}", asset_url, error);
525        }
526    }
527
528    Vec::new()
529}
530
531async fn store_pwa_assets(store: &HashtreeStore, assets: &[PwaAsset]) -> Result<Cid> {
532    let tree = HashTree::new(HashTreeConfig::new(store.store_arc()).public());
533
534    let mut file_entries = HashMap::new();
535    let mut dir_paths = HashSet::from([String::new()]);
536
537    for asset in assets {
538        let clean_path = normalize_asset_path(&asset.path);
539        if clean_path.is_empty() {
540            continue;
541        }
542        let (cid, size) = tree
543            .put(&asset.data)
544            .await
545            .with_context(|| format!("store asset {}", clean_path))?;
546
547        let (parent, name) = split_parent_and_name(&clean_path);
548        dir_paths.extend(parent_chain(&clean_path));
549        file_entries.insert(clean_path, (parent, name, cid, size));
550    }
551
552    let mut sorted_dirs: Vec<String> = dir_paths.into_iter().collect();
553    sorted_dirs.sort_by(|a, b| dir_depth(b).cmp(&dir_depth(a)).then_with(|| a.cmp(b)));
554
555    let mut dir_cids: HashMap<String, Cid> = HashMap::new();
556    for dir_path in sorted_dirs {
557        let mut entries = Vec::new();
558
559        for (parent, name, cid, size) in file_entries.values() {
560            if *parent == dir_path {
561                entries.push(DirEntry::from_cid(name.clone(), cid).with_size(*size));
562            }
563        }
564
565        for (subdir_path, cid) in &dir_cids {
566            if parent_path(subdir_path) == dir_path {
567                let name = file_name(subdir_path).unwrap_or_else(|| subdir_path.clone());
568                entries.push(DirEntry::from_cid(name, cid).with_link_type(LinkType::Dir));
569            }
570        }
571
572        let cid = tree
573            .put_directory(entries)
574            .await
575            .with_context(|| format!("create directory {}", display_dir(&dir_path)))?;
576        dir_cids.insert(dir_path, cid);
577    }
578
579    dir_cids
580        .remove("")
581        .ok_or_else(|| anyhow!("failed to build PWA root directory"))
582}
583
584fn manifest_name(manifest: &Value) -> Option<String> {
585    manifest
586        .get("name")
587        .and_then(Value::as_str)
588        .or_else(|| manifest.get("short_name").and_then(Value::as_str))
589        .map(str::trim)
590        .filter(|value| !value.is_empty())
591        .map(str::to_owned)
592}
593
594fn manifest_description(manifest: &Value) -> Option<String> {
595    manifest
596        .get("description")
597        .and_then(Value::as_str)
598        .map(str::trim)
599        .filter(|value| !value.is_empty())
600        .map(str::to_owned)
601}
602
603fn manifest_start_reference(manifest: &Value, manifest_url: &Url) -> Option<String> {
604    let start_url = manifest.get("start_url")?.as_str()?;
605    let resolved = manifest_url.join(start_url).ok()?;
606    Some(absolute_tree_reference_for_url(&resolved, manifest_url))
607}
608
609fn manifest_scope_reference(manifest: &Value, manifest_url: &Url) -> Option<String> {
610    let raw_scope = manifest
611        .get("scope")
612        .and_then(Value::as_str)
613        .map(str::trim)
614        .filter(|value| !value.is_empty())?;
615    let resolved_scope = resolve_resource_url(raw_scope, manifest_url)?;
616    if resolved_scope.origin() != manifest_url.origin() {
617        return None;
618    }
619
620    let path = resolved_scope.path();
621    if path.is_empty() {
622        Some("/".to_string())
623    } else {
624        Some(path.to_string())
625    }
626}
627
628fn manifest_shortcuts(manifest: &Value, manifest_path: &str) -> Vec<PwaShortcut> {
629    manifest
630        .get("shortcuts")
631        .and_then(Value::as_array)
632        .into_iter()
633        .flatten()
634        .filter_map(|shortcut| {
635            let name = shortcut
636                .get("name")
637                .and_then(Value::as_str)
638                .map(str::trim)
639                .filter(|value| !value.is_empty())?;
640            let url = shortcut
641                .get("url")
642                .and_then(Value::as_str)
643                .and_then(|value| absolute_tree_reference_from_path(manifest_path, value))?;
644
645            Some(PwaShortcut {
646                name: name.to_string(),
647                url,
648            })
649        })
650        .collect()
651}
652
653fn manifest_protocol_handlers(manifest: &Value, manifest_path: &str) -> Vec<PwaProtocolHandler> {
654    manifest
655        .get("protocol_handlers")
656        .and_then(Value::as_array)
657        .into_iter()
658        .flatten()
659        .filter_map(|handler| {
660            let protocol = handler
661                .get("protocol")
662                .and_then(Value::as_str)
663                .map(str::trim)
664                .filter(|value| !value.is_empty())?;
665            let url = handler
666                .get("url")
667                .and_then(Value::as_str)
668                .and_then(|value| absolute_tree_reference_from_path(manifest_path, value))?;
669
670            Some(PwaProtocolHandler {
671                protocol: protocol.to_string(),
672                url,
673            })
674        })
675        .collect()
676}
677
678fn manifest_app_id(manifest: &Value, manifest_url: &Url) -> Option<String> {
679    let raw_id = manifest.get("id")?.as_str()?.trim();
680    if raw_id.is_empty() {
681        return None;
682    }
683
684    Some(
685        manifest_url
686            .join(raw_id)
687            .map(|resolved| resolved.to_string())
688            .unwrap_or_else(|_| raw_id.to_string()),
689    )
690}
691
692fn manifest_display_mode(manifest: &Value) -> Option<String> {
693    let display_mode = manifest
694        .get("display")?
695        .as_str()?
696        .trim()
697        .to_ascii_lowercase();
698    match display_mode.as_str() {
699        "browser" | "minimal-ui" | "standalone" | "fullscreen" => Some(display_mode),
700        _ => None,
701    }
702}
703
704fn extract_manifest_resource_urls(manifest: &Value, manifest_url: &Url) -> Vec<Url> {
705    let mut urls = Vec::new();
706
707    collect_manifest_url_field(manifest.get("start_url"), manifest_url, &mut urls);
708    collect_manifest_icon_like_urls(manifest.get("icons"), manifest_url, &mut urls);
709    collect_manifest_icon_like_urls(manifest.get("screenshots"), manifest_url, &mut urls);
710
711    if let Some(shortcuts) = manifest.get("shortcuts").and_then(Value::as_array) {
712        for shortcut in shortcuts {
713            collect_manifest_url_field(shortcut.get("url"), manifest_url, &mut urls);
714            collect_manifest_icon_like_urls(shortcut.get("icons"), manifest_url, &mut urls);
715        }
716    }
717
718    if let Some(protocol_handlers) = manifest.get("protocol_handlers").and_then(Value::as_array) {
719        for handler in protocol_handlers {
720            collect_manifest_url_field(handler.get("url"), manifest_url, &mut urls);
721        }
722    }
723
724    if let Some(file_handlers) = manifest.get("file_handlers").and_then(Value::as_array) {
725        for handler in file_handlers {
726            collect_manifest_url_field(handler.get("action"), manifest_url, &mut urls);
727            collect_manifest_icon_like_urls(handler.get("icons"), manifest_url, &mut urls);
728        }
729    }
730
731    if let Some(share_target) = manifest.get("share_target") {
732        collect_manifest_url_field(share_target.get("action"), manifest_url, &mut urls);
733        collect_manifest_url_field(share_target.get("url_template"), manifest_url, &mut urls);
734    }
735
736    if let Some(note_taking) = manifest.get("note_taking") {
737        collect_manifest_url_field(note_taking.get("new_note_url"), manifest_url, &mut urls);
738    }
739
740    if let Some(tab_strip) = manifest.get("tab_strip") {
741        if let Some(home_tab) = tab_strip.get("home_tab") {
742            collect_manifest_url_field(home_tab.get("url"), manifest_url, &mut urls);
743            collect_manifest_icon_like_urls(home_tab.get("icons"), manifest_url, &mut urls);
744        }
745        if let Some(new_tab_button) = tab_strip.get("new_tab_button") {
746            collect_manifest_url_field(new_tab_button.get("url"), manifest_url, &mut urls);
747            collect_manifest_icon_like_urls(new_tab_button.get("icons"), manifest_url, &mut urls);
748        }
749    }
750
751    urls
752}
753
754fn pick_manifest_icon_url(manifest: &Value, manifest_url: &Url) -> Option<Url> {
755    let icons = manifest.get("icons")?.as_array()?;
756    let mut best_icon: Option<(&str, u8, u32)> = None;
757
758    for value in icons {
759        let Some(src) = value.get("src").and_then(Value::as_str) else {
760            continue;
761        };
762        let priority = manifest_icon_priority(value);
763        let size = value
764            .get("sizes")
765            .and_then(Value::as_str)
766            .and_then(parse_largest_icon_size)
767            .unwrap_or(0);
768
769        if best_icon
770            .map(|(_, best_priority, best_size)| {
771                priority > best_priority || (priority == best_priority && size > best_size)
772            })
773            .unwrap_or(true)
774        {
775            best_icon = Some((src, priority, size));
776        }
777    }
778
779    manifest_url.join(best_icon?.0).ok()
780}
781
782fn pick_manifest_icon_path(manifest: &Value, manifest_url: &Url) -> Option<String> {
783    let resolved = pick_manifest_icon_url(manifest, manifest_url)?;
784    Some(url_to_path(&resolved, manifest_url))
785}
786
787fn manifest_icon_priority(icon: &Value) -> u8 {
788    let Some(purpose) = icon.get("purpose").and_then(Value::as_str) else {
789        return 3;
790    };
791
792    let purposes: Vec<&str> = purpose.split_whitespace().collect();
793    if purposes
794        .iter()
795        .any(|value| value.eq_ignore_ascii_case("any"))
796    {
797        return 3;
798    }
799    if purposes
800        .iter()
801        .any(|value| value.eq_ignore_ascii_case("maskable"))
802    {
803        return 2;
804    }
805    if purposes
806        .iter()
807        .any(|value| value.eq_ignore_ascii_case("monochrome"))
808    {
809        return 1;
810    }
811    0
812}
813
814fn parse_largest_icon_size(sizes: &str) -> Option<u32> {
815    sizes
816        .split_whitespace()
817        .filter_map(|value| value.split_once('x'))
818        .filter_map(|(width, height)| {
819            let width = width.parse::<u32>().ok()?;
820            let height = height.parse::<u32>().ok()?;
821            Some(width.max(height))
822        })
823        .max()
824}
825
826fn rewrite_manifest_urls(manifest: &mut Value, manifest_url: &Url, manifest_path: &str) {
827    rewrite_manifest_url_field(manifest, "start_url", manifest_url, manifest_path);
828    rewrite_manifest_icon_like_array(manifest, "icons", manifest_url, manifest_path);
829    rewrite_manifest_icon_like_array(manifest, "screenshots", manifest_url, manifest_path);
830
831    if let Some(shortcuts) = manifest.get_mut("shortcuts").and_then(Value::as_array_mut) {
832        for shortcut in shortcuts {
833            rewrite_manifest_url_field(shortcut, "url", manifest_url, manifest_path);
834            rewrite_manifest_icon_like_array(shortcut, "icons", manifest_url, manifest_path);
835        }
836    }
837
838    if let Some(protocol_handlers) = manifest
839        .get_mut("protocol_handlers")
840        .and_then(Value::as_array_mut)
841    {
842        for handler in protocol_handlers {
843            rewrite_manifest_url_field(handler, "url", manifest_url, manifest_path);
844        }
845    }
846
847    if let Some(file_handlers) = manifest
848        .get_mut("file_handlers")
849        .and_then(Value::as_array_mut)
850    {
851        for handler in file_handlers {
852            rewrite_manifest_url_field(handler, "action", manifest_url, manifest_path);
853            rewrite_manifest_icon_like_array(handler, "icons", manifest_url, manifest_path);
854        }
855    }
856
857    if let Some(share_target) = manifest.get_mut("share_target") {
858        rewrite_manifest_url_field(share_target, "action", manifest_url, manifest_path);
859        rewrite_manifest_url_field(share_target, "url_template", manifest_url, manifest_path);
860    }
861
862    if let Some(note_taking) = manifest.get_mut("note_taking") {
863        rewrite_manifest_url_field(note_taking, "new_note_url", manifest_url, manifest_path);
864    }
865
866    if let Some(tab_strip) = manifest.get_mut("tab_strip") {
867        if let Some(home_tab) = tab_strip.get_mut("home_tab") {
868            rewrite_manifest_url_field(home_tab, "url", manifest_url, manifest_path);
869            rewrite_manifest_icon_like_array(home_tab, "icons", manifest_url, manifest_path);
870        }
871        if let Some(new_tab_button) = tab_strip.get_mut("new_tab_button") {
872            rewrite_manifest_url_field(new_tab_button, "url", manifest_url, manifest_path);
873            rewrite_manifest_icon_like_array(new_tab_button, "icons", manifest_url, manifest_path);
874        }
875    }
876}
877
878fn collect_manifest_url_field(value: Option<&Value>, manifest_url: &Url, urls: &mut Vec<Url>) {
879    let Some(raw_value) = value.and_then(Value::as_str) else {
880        return;
881    };
882    let Some(resolved) = resolve_resource_url(raw_value, manifest_url) else {
883        return;
884    };
885    urls.push(resolved);
886}
887
888fn collect_manifest_icon_like_urls(value: Option<&Value>, manifest_url: &Url, urls: &mut Vec<Url>) {
889    let Some(items) = value.and_then(Value::as_array) else {
890        return;
891    };
892    for item in items {
893        collect_manifest_url_field(item.get("src"), manifest_url, urls);
894    }
895}
896
897fn rewrite_manifest_url_field(
898    object: &mut Value,
899    key: &str,
900    manifest_url: &Url,
901    manifest_path: &str,
902) {
903    let Some(value) = object.get_mut(key) else {
904        return;
905    };
906    let Some(raw_value) = value.as_str() else {
907        return;
908    };
909    let Some(resolved) = resolve_resource_url(raw_value, manifest_url) else {
910        return;
911    };
912
913    *value = Value::String(relative_tree_reference_for_url(
914        manifest_path,
915        &resolved,
916        manifest_url,
917    ));
918}
919
920fn rewrite_manifest_icon_like_array(
921    object: &mut Value,
922    key: &str,
923    manifest_url: &Url,
924    manifest_path: &str,
925) {
926    let Some(items) = object.get_mut(key).and_then(Value::as_array_mut) else {
927        return;
928    };
929    for item in items {
930        rewrite_manifest_url_field(item, "src", manifest_url, manifest_path);
931    }
932}
933
934fn relative_tree_reference_for_url(from_path: &str, target_url: &Url, base_url: &Url) -> String {
935    let target_path = url_to_path(target_url, base_url);
936    let mut relative = relative_tree_reference(from_path, &target_path);
937    if let Some(query) = target_url.query() {
938        relative.push('?');
939        relative.push_str(query);
940    }
941    if let Some(fragment) = target_url.fragment() {
942        relative.push('#');
943        relative.push_str(fragment);
944    }
945    relative
946}
947
948fn absolute_tree_reference_for_url(target_url: &Url, base_url: &Url) -> String {
949    let target_path = url_to_path(target_url, base_url);
950    let mut absolute = absolute_tree_path(&target_path);
951    if let Some(query) = target_url.query() {
952        absolute.push('?');
953        absolute.push_str(query);
954    }
955    if let Some(fragment) = target_url.fragment() {
956        absolute.push('#');
957        absolute.push_str(fragment);
958    }
959    absolute
960}
961
962fn absolute_tree_reference_from_path(from_path: &str, reference: &str) -> Option<String> {
963    let trimmed_reference = reference.trim();
964    if trimmed_reference.is_empty() {
965        return None;
966    }
967
968    let base_url = Url::parse(&format!(
969        "https://tree.invalid/{}",
970        normalize_asset_path(from_path)
971    ))
972    .ok()?;
973    let resolved = base_url.join(trimmed_reference).ok()?;
974    Some(absolute_tree_reference_for_url(&resolved, &resolved))
975}
976
977fn rewrite_html_asset_urls(
978    html: &str,
979    html_path: &str,
980    html_url: &Url,
981    base_url: &Url,
982) -> (String, Vec<Url>) {
983    let mut html_rewrites = Vec::new();
984    let mut queued_assets = BTreeSet::new();
985
986    for link_asset in extract_link_asset_references(html, html_url) {
987        let asset_path = url_to_path(&link_asset.resolved_url, base_url);
988        html_rewrites.push((
989            link_asset.raw_value,
990            relative_tree_reference(html_path, &asset_path),
991        ));
992        queued_assets.insert(link_asset.resolved_url);
993    }
994    for script in extract_script_references(html, html_url) {
995        let asset_path = url_to_path(&script.resolved_url, base_url);
996        html_rewrites.push((
997            script.raw_value,
998            relative_tree_reference(html_path, &asset_path),
999        ));
1000        queued_assets.insert(script.resolved_url);
1001    }
1002    for image in extract_image_references(html, html_url) {
1003        let asset_path = url_to_path(&image.resolved_url, base_url);
1004        html_rewrites.push((
1005            image.raw_value,
1006            relative_tree_reference(html_path, &asset_path),
1007        ));
1008        queued_assets.insert(image.resolved_url);
1009    }
1010
1011    (
1012        rewrite_html_urls(html, &html_rewrites),
1013        queued_assets.into_iter().collect(),
1014    )
1015}
1016
1017fn rewrite_html_urls(html: &str, rewrites: &[(String, String)]) -> String {
1018    let mut output = html.to_string();
1019    let mut sorted_rewrites = rewrites.to_vec();
1020    sorted_rewrites.sort_by(|(left, _), (right, _)| right.len().cmp(&left.len()));
1021    for (from, to) in sorted_rewrites {
1022        output = output.replace(&from, &to);
1023    }
1024    output
1025}
1026
1027fn rewrite_css_urls(css: &str, css_path: &str, css_url: &Url, base_url: &Url) -> String {
1028    let mut output = String::with_capacity(css.len());
1029    let mut cursor = 0usize;
1030
1031    while let Some(found) = css[cursor..].find("url(") {
1032        let start = cursor + found;
1033        output.push_str(&css[cursor..start]);
1034        let mut value_start = start + 4;
1035        while let Some(ch) = css[value_start..].chars().next() {
1036            if ch.is_whitespace() {
1037                value_start += ch.len_utf8();
1038                continue;
1039            }
1040            break;
1041        }
1042
1043        let mut quoted = None;
1044        if let Some(ch) = css[value_start..].chars().next() {
1045            if ch == '"' || ch == '\'' {
1046                quoted = Some(ch);
1047                value_start += ch.len_utf8();
1048            }
1049        }
1050
1051        let mut value_end = value_start;
1052        while value_end < css.len() {
1053            let ch = css[value_end..].chars().next().unwrap_or(')');
1054            if let Some(quote) = quoted {
1055                if ch == quote {
1056                    break;
1057                }
1058            } else if ch == ')' {
1059                break;
1060            }
1061            value_end += ch.len_utf8();
1062        }
1063
1064        let raw_value = css[value_start..value_end].trim();
1065        let mut after_value = value_end;
1066        if quoted.is_some() && after_value < css.len() {
1067            after_value += css[after_value..]
1068                .chars()
1069                .next()
1070                .map(|ch| ch.len_utf8())
1071                .unwrap_or(0);
1072        }
1073        while after_value < css.len() {
1074            let ch = css[after_value..].chars().next().unwrap_or(')');
1075            after_value += ch.len_utf8();
1076            if ch == ')' {
1077                break;
1078            }
1079        }
1080
1081        if let Some(resolved) = resolve_resource_url(raw_value, css_url) {
1082            let target_path = url_to_path(&resolved, base_url);
1083            output.push_str(&format!(
1084                "url(\"{}\")",
1085                relative_tree_reference(css_path, &target_path)
1086            ));
1087        } else {
1088            output.push_str(&css[start..after_value]);
1089        }
1090
1091        cursor = after_value;
1092    }
1093
1094    output.push_str(&css[cursor..]);
1095    output
1096}
1097
1098fn is_http_url(value: &str) -> bool {
1099    Url::parse(value)
1100        .ok()
1101        .map(|url| matches!(url.scheme(), "http" | "https"))
1102        .unwrap_or(false)
1103}
1104
1105fn looks_like_image_payload(content_type: &str, bytes: &[u8]) -> bool {
1106    let normalized_content_type = content_type
1107        .split(';')
1108        .next()
1109        .map(str::trim)
1110        .unwrap_or_default()
1111        .to_ascii_lowercase();
1112    if normalized_content_type.starts_with("image/") {
1113        return true;
1114    }
1115
1116    bytes.starts_with(&[0x89, b'P', b'N', b'G', 0x0D, 0x0A, 0x1A, 0x0A])
1117        || bytes.starts_with(&[0xFF, 0xD8, 0xFF])
1118        || bytes.starts_with(b"GIF87a")
1119        || bytes.starts_with(b"GIF89a")
1120        || bytes.starts_with(&[0x00, 0x00, 0x01, 0x00])
1121        || bytes.starts_with(&[0x00, 0x00, 0x02, 0x00])
1122        || (bytes.len() >= 12 && &bytes[0..4] == b"RIFF" && &bytes[8..12] == b"WEBP")
1123        || bytes_trimmed_starts_with_svg(bytes)
1124}
1125
1126fn bytes_trimmed_starts_with_svg(bytes: &[u8]) -> bool {
1127    let Ok(text) = std::str::from_utf8(bytes) else {
1128        return false;
1129    };
1130    let trimmed = text.trim_start_matches(|ch: char| ch.is_whitespace() || ch == '\u{FEFF}');
1131    trimmed.starts_with("<svg") || (trimmed.starts_with("<?xml") && trimmed.contains("<svg"))
1132}
1133
1134fn icon_asset_path(icon_url: &Url, content_type: &str, bytes: &[u8]) -> String {
1135    let mut path = url_to_path(icon_url, icon_url);
1136    let has_extension = Path::new(&path)
1137        .extension()
1138        .and_then(|value| value.to_str())
1139        .map(|value| !value.is_empty())
1140        .unwrap_or(false);
1141
1142    if !has_extension {
1143        let extension = infer_icon_extension(content_type, bytes).unwrap_or("bin");
1144        if path == "index.html" {
1145            path = format!("icon.{extension}");
1146        } else {
1147            path = format!("{path}.{extension}");
1148        }
1149    }
1150
1151    path
1152}
1153
1154fn infer_icon_extension(content_type: &str, bytes: &[u8]) -> Option<&'static str> {
1155    let normalized_content_type = content_type
1156        .split(';')
1157        .next()
1158        .map(str::trim)
1159        .unwrap_or_default()
1160        .to_ascii_lowercase();
1161    if normalized_content_type == "image/png" || bytes.starts_with(&[0x89, b'P', b'N', b'G']) {
1162        return Some("png");
1163    }
1164    if normalized_content_type == "image/jpeg" || bytes.starts_with(&[0xFF, 0xD8, 0xFF]) {
1165        return Some("jpg");
1166    }
1167    if normalized_content_type == "image/gif"
1168        || bytes.starts_with(b"GIF87a")
1169        || bytes.starts_with(b"GIF89a")
1170    {
1171        return Some("gif");
1172    }
1173    if normalized_content_type == "image/webp"
1174        || (bytes.len() >= 12 && &bytes[0..4] == b"RIFF" && &bytes[8..12] == b"WEBP")
1175    {
1176        return Some("webp");
1177    }
1178    if normalized_content_type == "image/svg+xml" || bytes_trimmed_starts_with_svg(bytes) {
1179        return Some("svg");
1180    }
1181    if normalized_content_type == "image/x-icon"
1182        || normalized_content_type == "image/vnd.microsoft.icon"
1183        || bytes.starts_with(&[0x00, 0x00, 0x01, 0x00])
1184        || bytes.starts_with(&[0x00, 0x00, 0x02, 0x00])
1185    {
1186        return Some("ico");
1187    }
1188    None
1189}
1190
1191fn extract_manifest_reference(html: &str, base_url: &Url) -> Option<AssetReference> {
1192    extract_tag_attributes(html, "link")
1193        .into_iter()
1194        .find(|attrs| rel_contains(attrs, "manifest") && attrs.contains_key("href"))
1195        .and_then(|attrs| attrs.get("href").cloned())
1196        .and_then(|href| asset_reference(href, base_url))
1197}
1198
1199fn extract_link_asset_references(html: &str, base_url: &Url) -> Vec<AssetReference> {
1200    const ASSET_LINK_RELS: &[&str] = &["stylesheet", "modulepreload", "preload", "prefetch"];
1201
1202    extract_tag_attributes(html, "link")
1203        .into_iter()
1204        .filter(|attrs| ASSET_LINK_RELS.iter().any(|rel| rel_contains(attrs, rel)))
1205        .filter_map(|attrs| attrs.get("href").cloned())
1206        .filter_map(|href| asset_reference(href, base_url))
1207        .collect()
1208}
1209
1210fn extract_script_references(html: &str, base_url: &Url) -> Vec<AssetReference> {
1211    extract_tag_attributes(html, "script")
1212        .into_iter()
1213        .filter_map(|attrs| attrs.get("src").cloned())
1214        .filter_map(|src| asset_reference(src, base_url))
1215        .collect()
1216}
1217
1218fn extract_image_references(html: &str, base_url: &Url) -> Vec<AssetReference> {
1219    extract_tag_attributes(html, "img")
1220        .into_iter()
1221        .filter_map(|attrs| attrs.get("src").cloned())
1222        .filter_map(|src| asset_reference(src, base_url))
1223        .collect()
1224}
1225
1226fn extract_css_urls(css: &str, css_url: &Url) -> Vec<Url> {
1227    let mut urls = Vec::new();
1228    let mut cursor = 0usize;
1229
1230    while let Some(found) = css[cursor..].find("url(") {
1231        let start = cursor + found + 4;
1232        let mut value_start = start;
1233        while let Some(ch) = css[value_start..].chars().next() {
1234            if ch.is_whitespace() {
1235                value_start += ch.len_utf8();
1236                continue;
1237            }
1238            break;
1239        }
1240
1241        let mut quoted = None;
1242        if let Some(ch) = css[value_start..].chars().next() {
1243            if ch == '"' || ch == '\'' {
1244                quoted = Some(ch);
1245                value_start += ch.len_utf8();
1246            }
1247        }
1248
1249        let mut value_end = value_start;
1250        while value_end < css.len() {
1251            let ch = css[value_end..].chars().next().unwrap_or(')');
1252            if let Some(quote) = quoted {
1253                if ch == quote {
1254                    break;
1255                }
1256            } else if ch == ')' {
1257                break;
1258            }
1259            value_end += ch.len_utf8();
1260        }
1261
1262        let raw_value = css[value_start..value_end].trim();
1263        if let Some(resolved) = resolve_resource_url(raw_value, css_url) {
1264            urls.push(resolved);
1265        }
1266
1267        let mut after_value = value_end;
1268        if quoted.is_some() && after_value < css.len() {
1269            after_value += css[after_value..]
1270                .chars()
1271                .next()
1272                .map(|ch| ch.len_utf8())
1273                .unwrap_or(0);
1274        }
1275        while after_value < css.len() {
1276            let ch = css[after_value..].chars().next().unwrap_or(')');
1277            after_value += ch.len_utf8();
1278            if ch == ')' {
1279                break;
1280            }
1281        }
1282
1283        cursor = after_value;
1284    }
1285
1286    urls
1287}
1288
1289fn extract_tag_attributes(html: &str, tag_name: &str) -> Vec<HashMap<String, String>> {
1290    let needle = format!("<{}", tag_name.to_ascii_lowercase());
1291    let lowercase_html = html.to_ascii_lowercase();
1292    let mut results = Vec::new();
1293    let mut cursor = 0usize;
1294
1295    while let Some(found) = lowercase_html[cursor..].find(&needle) {
1296        let start = cursor + found;
1297        let end = match find_tag_end(html, start + 1) {
1298            Some(end) => end,
1299            None => break,
1300        };
1301        let tag_body = &html[start + 1..end];
1302        if tag_body
1303            .split_whitespace()
1304            .next()
1305            .map(|name| name.eq_ignore_ascii_case(tag_name))
1306            .unwrap_or(false)
1307        {
1308            results.push(parse_attributes(tag_body));
1309        }
1310        cursor = end + 1;
1311    }
1312
1313    results
1314}
1315
1316fn find_tag_end(html: &str, mut cursor: usize) -> Option<usize> {
1317    let mut quote = None;
1318    while cursor < html.len() {
1319        let ch = html[cursor..].chars().next()?;
1320        if let Some(active_quote) = quote {
1321            if ch == active_quote {
1322                quote = None;
1323            }
1324        } else if ch == '"' || ch == '\'' {
1325            quote = Some(ch);
1326        } else if ch == '>' {
1327            return Some(cursor);
1328        }
1329        cursor += ch.len_utf8();
1330    }
1331    None
1332}
1333
1334fn parse_attributes(tag_body: &str) -> HashMap<String, String> {
1335    let mut attributes = HashMap::new();
1336    let mut cursor = tag_body
1337        .chars()
1338        .position(char::is_whitespace)
1339        .unwrap_or(tag_body.len());
1340
1341    while cursor < tag_body.len() {
1342        while cursor < tag_body.len() {
1343            let ch = tag_body[cursor..].chars().next().unwrap_or(' ');
1344            if !ch.is_whitespace() && ch != '/' {
1345                break;
1346            }
1347            cursor += ch.len_utf8();
1348        }
1349        if cursor >= tag_body.len() {
1350            break;
1351        }
1352
1353        let name_start = cursor;
1354        while cursor < tag_body.len() {
1355            let ch = tag_body[cursor..].chars().next().unwrap_or(' ');
1356            if ch.is_whitespace() || ch == '=' || ch == '/' {
1357                break;
1358            }
1359            cursor += ch.len_utf8();
1360        }
1361        if name_start == cursor {
1362            break;
1363        }
1364
1365        let name = tag_body[name_start..cursor].to_ascii_lowercase();
1366        while cursor < tag_body.len()
1367            && tag_body[cursor..]
1368                .chars()
1369                .next()
1370                .unwrap_or(' ')
1371                .is_whitespace()
1372        {
1373            cursor += tag_body[cursor..]
1374                .chars()
1375                .next()
1376                .map(|ch| ch.len_utf8())
1377                .unwrap_or(1);
1378        }
1379
1380        let mut value = String::new();
1381        if cursor < tag_body.len() && tag_body[cursor..].starts_with('=') {
1382            cursor += 1;
1383            while cursor < tag_body.len()
1384                && tag_body[cursor..]
1385                    .chars()
1386                    .next()
1387                    .unwrap_or(' ')
1388                    .is_whitespace()
1389            {
1390                cursor += tag_body[cursor..]
1391                    .chars()
1392                    .next()
1393                    .map(|ch| ch.len_utf8())
1394                    .unwrap_or(1);
1395            }
1396            if cursor < tag_body.len() {
1397                let next = tag_body[cursor..].chars().next().unwrap_or('"');
1398                if next == '"' || next == '\'' {
1399                    let quote = next;
1400                    cursor += quote.len_utf8();
1401                    let value_start = cursor;
1402                    while cursor < tag_body.len() {
1403                        let ch = tag_body[cursor..].chars().next().unwrap_or(quote);
1404                        if ch == quote {
1405                            break;
1406                        }
1407                        cursor += ch.len_utf8();
1408                    }
1409                    value = tag_body[value_start..cursor].to_string();
1410                    if cursor < tag_body.len() {
1411                        cursor += quote.len_utf8();
1412                    }
1413                } else {
1414                    let value_start = cursor;
1415                    while cursor < tag_body.len() {
1416                        let ch = tag_body[cursor..].chars().next().unwrap_or(' ');
1417                        if ch.is_whitespace() || ch == '/' {
1418                            break;
1419                        }
1420                        cursor += ch.len_utf8();
1421                    }
1422                    value = tag_body[value_start..cursor].to_string();
1423                }
1424            }
1425        }
1426
1427        attributes.insert(name, value);
1428    }
1429
1430    attributes
1431}
1432
1433fn rel_contains(attrs: &HashMap<String, String>, token: &str) -> bool {
1434    attrs
1435        .get("rel")
1436        .map(|value| {
1437            value
1438                .split_whitespace()
1439                .any(|part| part.eq_ignore_ascii_case(token))
1440        })
1441        .unwrap_or(false)
1442}
1443
1444fn asset_reference(raw_value: String, base_url: &Url) -> Option<AssetReference> {
1445    let resolved_url = resolve_resource_url(&raw_value, base_url)?;
1446    Some(AssetReference {
1447        raw_value,
1448        resolved_url,
1449    })
1450}
1451
1452fn resolve_resource_url(value: &str, base_url: &Url) -> Option<Url> {
1453    let trimmed = value.trim();
1454    if trimmed.is_empty()
1455        || trimmed.starts_with("data:")
1456        || trimmed.starts_with("javascript:")
1457        || trimmed.starts_with("mailto:")
1458        || trimmed.starts_with("tel:")
1459    {
1460        return None;
1461    }
1462    let resolved = base_url.join(trimmed).ok()?;
1463    if matches!(resolved.scheme(), "http" | "https") {
1464        Some(resolved)
1465    } else {
1466        None
1467    }
1468}
1469
1470fn extract_title(html: &str) -> Option<String> {
1471    let lowercase = html.to_ascii_lowercase();
1472    let start = lowercase.find("<title>")?;
1473    let end = lowercase[start + 7..].find("</title>")?;
1474    let value = html[start + 7..start + 7 + end].trim();
1475    if value.is_empty() {
1476        None
1477    } else {
1478        Some(value.to_string())
1479    }
1480}
1481
1482fn url_to_path(url: &Url, base_url: &Url) -> String {
1483    let mut path = url.path().trim_start_matches('/').to_string();
1484    if path.is_empty() || path.ends_with('/') {
1485        path.push_str("index.html");
1486    }
1487
1488    if url.origin() == base_url.origin() {
1489        return path;
1490    }
1491
1492    let host = url.host_str().unwrap_or("external");
1493    format!("_external/{host}/{path}")
1494}
1495
1496fn relative_tree_reference(from_path: &str, target_path: &str) -> String {
1497    let from_clean = normalize_asset_path(from_path);
1498    let target_clean = normalize_asset_path(target_path);
1499    if target_clean.is_empty() {
1500        return "index.html".to_string();
1501    }
1502
1503    let from_parent = parent_path(&from_clean);
1504    let from_segments: Vec<&str> = from_parent
1505        .split('/')
1506        .filter(|segment| !segment.is_empty())
1507        .collect();
1508    let target_segments: Vec<&str> = target_clean
1509        .split('/')
1510        .filter(|segment| !segment.is_empty())
1511        .collect();
1512
1513    let mut common = 0usize;
1514    while common < from_segments.len()
1515        && common < target_segments.len()
1516        && from_segments[common] == target_segments[common]
1517    {
1518        common += 1;
1519    }
1520
1521    let mut relative_segments: Vec<String> =
1522        vec!["..".to_string(); from_segments.len().saturating_sub(common)];
1523    relative_segments.extend(
1524        target_segments[common..]
1525            .iter()
1526            .map(|segment| (*segment).to_string()),
1527    );
1528
1529    if relative_segments.is_empty() {
1530        file_name(&target_clean).unwrap_or_else(|| "index.html".to_string())
1531    } else {
1532        relative_segments.join("/")
1533    }
1534}
1535
1536fn root_relative_path(path: &str) -> String {
1537    let clean = normalize_asset_path(path);
1538    if clean.is_empty() {
1539        "/index.html".to_string()
1540    } else {
1541        format!("/{}", clean)
1542    }
1543}
1544
1545fn absolute_tree_path(path: &str) -> String {
1546    root_relative_path(path)
1547}
1548
1549fn normalize_asset_path(path: &str) -> String {
1550    path.trim_matches('/').to_string()
1551}
1552
1553fn split_parent_and_name(path: &str) -> (String, String) {
1554    match path.rsplit_once('/') {
1555        Some((parent, name)) => (parent.to_string(), name.to_string()),
1556        None => (String::new(), path.to_string()),
1557    }
1558}
1559
1560fn parent_chain(path: &str) -> Vec<String> {
1561    let mut parents = Vec::new();
1562    let mut current = parent_path(path);
1563    parents.push(String::new());
1564    while !current.is_empty() {
1565        parents.push(current.clone());
1566        current = parent_path(&current);
1567    }
1568    parents
1569}
1570
1571fn parent_path(path: &str) -> String {
1572    path.rsplit_once('/')
1573        .map(|(parent, _)| parent.to_string())
1574        .unwrap_or_default()
1575}
1576
1577fn file_name(path: &str) -> Option<String> {
1578    path.rsplit('/').next().map(str::to_owned)
1579}
1580
1581fn dir_depth(path: &str) -> usize {
1582    if path.is_empty() {
1583        0
1584    } else {
1585        path.split('/').count()
1586    }
1587}
1588
1589fn display_dir(path: &str) -> &str {
1590    if path.is_empty() {
1591        "/"
1592    } else {
1593        path
1594    }
1595}
1596
1597#[cfg(test)]
1598mod tests {
1599    use super::*;
1600    use hashtree_core::nhash_decode;
1601    use serde_json::json;
1602    use tempfile::tempdir;
1603
1604    const LIVE_JUMBLE_SMOKE_URL: &str = "https://jumble.social/";
1605    const LIVE_JUMBLE_SMOKE_MANIFEST_URL: &str = "https://jumble.social/manifest.webmanifest";
1606    const LIVE_PHOTOPEA_SMOKE_URL: &str = "https://www.photopea.com/";
1607    const LIVE_PHOTOPEA_SMOKE_MANIFEST_URL: &str = "https://www.photopea.com/manifest.json";
1608    const LIVE_EXCALIDRAW_SMOKE_URL: &str = "https://excalidraw.com/";
1609    const LIVE_EXCALIDRAW_SMOKE_MANIFEST_URL: &str = "https://excalidraw.com/manifest.webmanifest";
1610    const LIVE_FASTMAIL_SMOKE_URL: &str = "https://app.fastmail.com/";
1611    const LIVE_FASTMAIL_SMOKE_MANIFEST_PREFIX: &str = "https://app.fastmail.com/static/jmapui/";
1612    const LIVE_FASTMAIL_SMOKE_MANIFEST_SUFFIX: &str = "/app.webmanifest";
1613    const LIVE_MASTODON_SMOKE_URL: &str = "https://mastodon.social/";
1614    const LIVE_MASTODON_SMOKE_MANIFEST_URL: &str = "https://mastodon.social/manifest";
1615
1616    fn split_htree_nhash_url(url: &str) -> (String, String) {
1617        let trimmed = url.strip_prefix("htree://").expect("htree:// url");
1618        let (host, path) = trimmed.split_once('/').unwrap_or((trimmed, ""));
1619        let normalized_path = if path.is_empty() {
1620            "/".to_string()
1621        } else {
1622            format!("/{path}")
1623        };
1624        (host.to_string(), normalized_path)
1625    }
1626
1627    async fn read_exported_tree_text(
1628        store: &HashtreeStore,
1629        root_nhash: &str,
1630        path: &str,
1631    ) -> String {
1632        let root = nhash_decode(root_nhash).unwrap();
1633        let tree = HashTree::new(HashTreeConfig::new(store.store_arc()).public());
1634        let root_cid = Cid::public(root.hash);
1635        let resolved_path = path
1636            .trim_start_matches('/')
1637            .split(['?', '#'])
1638            .next()
1639            .unwrap_or("");
1640        let file_cid = tree
1641            .resolve_path(&root_cid, resolved_path)
1642            .await
1643            .unwrap()
1644            .expect("resolve exported path");
1645        String::from_utf8(
1646            tree.read_file(&file_cid.hash)
1647                .await
1648                .unwrap()
1649                .expect("read exported file"),
1650        )
1651        .unwrap()
1652    }
1653
1654    #[test]
1655    fn extract_manifest_url_finds_manifest_link() {
1656        let html = r#"
1657          <html>
1658            <head>
1659              <link rel="manifest" href="/manifest.webmanifest">
1660            </head>
1661          </html>
1662        "#;
1663        let base_url = Url::parse("https://jumble.social/").unwrap();
1664
1665        assert_eq!(
1666            extract_manifest_reference(html, &base_url)
1667                .unwrap()
1668                .resolved_url
1669                .as_str(),
1670            "https://jumble.social/manifest.webmanifest"
1671        );
1672    }
1673
1674    #[test]
1675    fn parse_attributes_supports_quoted_values() {
1676        let attrs = parse_attributes(
1677            r#"link rel="manifest preload" href='/manifest.webmanifest' crossorigin"#,
1678        );
1679
1680        assert_eq!(
1681            attrs.get("rel").map(String::as_str),
1682            Some("manifest preload")
1683        );
1684        assert_eq!(
1685            attrs.get("href").map(String::as_str),
1686            Some("/manifest.webmanifest")
1687        );
1688        assert_eq!(attrs.get("crossorigin").map(String::as_str), Some(""));
1689    }
1690
1691    #[test]
1692    fn url_to_path_maps_root_and_trailing_slash_to_index() {
1693        let base_url = Url::parse("https://jumble.social/").unwrap();
1694        assert_eq!(
1695            url_to_path(&Url::parse("https://jumble.social/").unwrap(), &base_url),
1696            "index.html"
1697        );
1698        assert_eq!(
1699            url_to_path(
1700                &Url::parse("https://jumble.social/app/").unwrap(),
1701                &base_url
1702            ),
1703            "app/index.html"
1704        );
1705        assert_eq!(
1706            url_to_path(&Url::parse("https://jumble.social/app").unwrap(), &base_url),
1707            "app"
1708        );
1709        assert_eq!(
1710            url_to_path(
1711                &Url::parse("https://cdn.example.com/fonts/app.woff2").unwrap(),
1712                &base_url
1713            ),
1714            "_external/cdn.example.com/fonts/app.woff2"
1715        );
1716    }
1717
1718    #[test]
1719    fn rewrite_css_urls_rewrites_relative_and_absolute_urls_to_root_paths() {
1720        let css = r#"
1721          body { background-image: url("../img/bg.png"); }
1722          @font-face { src: url("https://cdn.example.com/fonts/app.woff2"); }
1723        "#;
1724        let css_url = Url::parse("https://jumble.social/assets/main.css").unwrap();
1725
1726        let rewritten = rewrite_css_urls(
1727            css,
1728            "assets/main.css",
1729            &css_url,
1730            &Url::parse("https://jumble.social/").unwrap(),
1731        );
1732
1733        assert!(rewritten.contains("url(\"../img/bg.png\")"));
1734        assert!(rewritten.contains("url(\"../_external/cdn.example.com/fonts/app.woff2\")"));
1735    }
1736
1737    #[test]
1738    fn rewrite_html_asset_urls_rewrites_nested_page_dependencies() {
1739        let html = r#"
1740          <link rel="stylesheet" href="/assets/main.css">
1741          <link rel="modulepreload" href="/assets/chunk.js">
1742          <link rel="preload" as="font" href="https://cdn.example.com/fonts/app.woff2">
1743          <script type="module" src="bundle.js"></script>
1744          <img src="https://cdn.example.com/logo.png">
1745        "#;
1746        let html_url = Url::parse("https://jumble.social/app/index.html").unwrap();
1747        let base_url = Url::parse("https://jumble.social/").unwrap();
1748
1749        let (rewritten, mut nested_urls) =
1750            rewrite_html_asset_urls(html, "app/index.html", &html_url, &base_url);
1751        nested_urls.sort_by(|left, right| left.as_str().cmp(right.as_str()));
1752
1753        assert!(rewritten.contains(r#"href="../assets/main.css""#));
1754        assert!(rewritten.contains(r#"href="../assets/chunk.js""#));
1755        assert!(rewritten.contains(r#"href="../_external/cdn.example.com/fonts/app.woff2""#));
1756        assert!(rewritten.contains(r#"src="bundle.js""#));
1757        assert!(rewritten.contains(r#"src="../_external/cdn.example.com/logo.png""#));
1758        assert_eq!(
1759            nested_urls
1760                .into_iter()
1761                .map(|url| url.to_string())
1762                .collect::<Vec<_>>(),
1763            vec![
1764                "https://cdn.example.com/fonts/app.woff2",
1765                "https://cdn.example.com/logo.png",
1766                "https://jumble.social/app/bundle.js",
1767                "https://jumble.social/assets/chunk.js",
1768                "https://jumble.social/assets/main.css",
1769            ]
1770        );
1771    }
1772
1773    #[test]
1774    fn manifest_start_reference_preserves_query_and_fragment() {
1775        let manifest = json!({
1776            "start_url": "../index.html?source=pwa#home"
1777        });
1778        let manifest_url = Url::parse("https://jumble.social/app/manifest.webmanifest").unwrap();
1779
1780        assert_eq!(
1781            manifest_start_reference(&manifest, &manifest_url),
1782            Some("/index.html?source=pwa#home".to_string())
1783        );
1784    }
1785
1786    #[test]
1787    fn manifest_scope_reference_preserves_root_scope() {
1788        let manifest = json!({
1789            "scope": "/"
1790        });
1791        let manifest_url =
1792            Url::parse("https://app.fastmail.com/static/jmapui/hash/app.webmanifest").unwrap();
1793
1794        assert_eq!(
1795            manifest_scope_reference(&manifest, &manifest_url),
1796            Some("/".to_string())
1797        );
1798    }
1799
1800    #[test]
1801    fn manifest_handlers_resolve_to_absolute_tree_paths() {
1802        let manifest = json!({
1803            "shortcuts": [
1804                {
1805                    "name": "Compose",
1806                    "url": "../../../mail/Inbox/compose"
1807                },
1808                {
1809                    "name": "Contacts",
1810                    "url": "../../../contacts/index.html"
1811                }
1812            ],
1813            "protocol_handlers": [
1814                {
1815                    "protocol": "mailto",
1816                    "url": "../../../mail/compose?mailto=%s"
1817                }
1818            ]
1819        });
1820
1821        assert_eq!(
1822            manifest_shortcuts(&manifest, "static/jmapui/hash/app.webmanifest"),
1823            vec![
1824                PwaShortcut {
1825                    name: "Compose".to_string(),
1826                    url: "/mail/Inbox/compose".to_string(),
1827                },
1828                PwaShortcut {
1829                    name: "Contacts".to_string(),
1830                    url: "/contacts/index.html".to_string(),
1831                },
1832            ]
1833        );
1834        assert_eq!(
1835            manifest_protocol_handlers(&manifest, "static/jmapui/hash/app.webmanifest"),
1836            vec![PwaProtocolHandler {
1837                protocol: "mailto".to_string(),
1838                url: "/mail/compose?mailto=%s".to_string(),
1839            }]
1840        );
1841    }
1842
1843    #[test]
1844    fn installed_site_pwa_serialization_includes_manifest_metadata() {
1845        let installed = InstalledSitePwa {
1846            name: "Example App".to_string(),
1847            launch_url: "htree://nhash-example/app/index.html".to_string(),
1848            icon_url: Some("htree://nhash-example/icons/pwa-192.png".to_string()),
1849            source_app_id: Some("https://example.com/app".to_string()),
1850            source_url: "https://example.com/app".to_string(),
1851            source_manifest_url: "https://example.com/manifest.webmanifest".to_string(),
1852            description: Some("Portable notes".to_string()),
1853            display_mode: Some("minimal-ui".to_string()),
1854            scope_url: Some("htree://nhash-example/".to_string()),
1855            shortcuts: vec![PwaShortcut {
1856                name: "Compose".to_string(),
1857                url: "htree://nhash-example/mail/Inbox/compose".to_string(),
1858            }],
1859            protocol_handlers: vec![PwaProtocolHandler {
1860                protocol: "mailto".to_string(),
1861                url: "htree://nhash-example/mail/compose?mailto=%s".to_string(),
1862            }],
1863        };
1864
1865        let value = serde_json::to_value(installed).unwrap();
1866
1867        assert_eq!(
1868            value.get("description").and_then(Value::as_str),
1869            Some("Portable notes")
1870        );
1871        assert_eq!(
1872            value.get("displayMode").and_then(Value::as_str),
1873            Some("minimal-ui")
1874        );
1875        assert_eq!(
1876            value.get("scopeUrl").and_then(Value::as_str),
1877            Some("htree://nhash-example/")
1878        );
1879        assert_eq!(
1880            value["shortcuts"][0].get("name").and_then(Value::as_str),
1881            Some("Compose")
1882        );
1883        assert_eq!(
1884            value["shortcuts"][0].get("url").and_then(Value::as_str),
1885            Some("htree://nhash-example/mail/Inbox/compose")
1886        );
1887        assert_eq!(
1888            value["protocolHandlers"][0]
1889                .get("protocol")
1890                .and_then(Value::as_str),
1891            Some("mailto")
1892        );
1893        assert_eq!(
1894            value["protocolHandlers"][0]
1895                .get("url")
1896                .and_then(Value::as_str),
1897            Some("htree://nhash-example/mail/compose?mailto=%s")
1898        );
1899    }
1900
1901    #[test]
1902    fn manifest_metadata_extractors_read_description_and_supported_display() {
1903        let manifest = json!({
1904            "description": " Portable notes ",
1905            "display": "minimal-ui"
1906        });
1907
1908        assert_eq!(
1909            manifest_description(&manifest),
1910            Some("Portable notes".to_string())
1911        );
1912        assert_eq!(
1913            manifest_display_mode(&manifest),
1914            Some("minimal-ui".to_string())
1915        );
1916    }
1917
1918    #[test]
1919    fn manifest_display_mode_ignores_unsupported_values() {
1920        let manifest = json!({
1921            "display": "window-controls-overlay"
1922        });
1923
1924        assert_eq!(manifest_display_mode(&manifest), None);
1925    }
1926
1927    #[test]
1928    fn pick_manifest_icon_url_prefers_any_icons_over_monochrome_ties() {
1929        let manifest = json!({
1930            "icons": [
1931                {"src": "/pwa-512x512.png", "sizes": "512x512", "purpose": "any"},
1932                {"src": "/pwa-192x192.png", "sizes": "192x192", "purpose": "any"},
1933                {"src": "/pwa-512x512-maskable.png", "sizes": "512x512", "purpose": "maskable"},
1934                {"src": "/pwa-monochrome.svg", "sizes": "512x512", "purpose": "monochrome"}
1935            ]
1936        });
1937        let manifest_url = Url::parse("https://jumble.social/manifest.webmanifest").unwrap();
1938
1939        assert_eq!(
1940            pick_manifest_icon_url(&manifest, &manifest_url).map(|value| value.to_string()),
1941            Some("https://jumble.social/pwa-512x512.png".to_string())
1942        );
1943    }
1944
1945    #[test]
1946    fn extract_manifest_resource_urls_collects_nested_manifest_fields() {
1947        let manifest = json!({
1948            "start_url": "../index.html?source=pwa#home",
1949            "icons": [
1950                {"src": "icons/app.png"},
1951                {"src": "https://cdn.example.com/icons/maskable.png"}
1952            ],
1953            "screenshots": [
1954                {"src": "shots/hero.png"}
1955            ],
1956            "shortcuts": [
1957                {
1958                    "url": "../launch/compose.html?mode=quick#composer",
1959                    "icons": [
1960                        {"src": "icons/shortcut.png"}
1961                    ]
1962                }
1963            ],
1964            "protocol_handlers": [
1965                {"url": "open?uri=placeholder"}
1966            ],
1967            "file_handlers": [
1968                {
1969                    "action": "/open-file",
1970                    "icons": [
1971                        {"src": "icons/file.png"}
1972                    ]
1973                }
1974            ],
1975            "share_target": {
1976                "action": "/share/submit?from=manifest"
1977            },
1978            "note_taking": {
1979                "new_note_url": "/notes/new.html"
1980            },
1981            "tab_strip": {
1982                "home_tab": {
1983                    "url": "/home.html",
1984                    "icons": [
1985                        {"src": "icons/home.png"}
1986                    ]
1987                },
1988                "new_tab_button": {
1989                    "url": "tabs/new.html"
1990                }
1991            }
1992        });
1993        let manifest_url = Url::parse("https://jumble.social/app/manifest.webmanifest").unwrap();
1994
1995        let mut urls: Vec<String> = extract_manifest_resource_urls(&manifest, &manifest_url)
1996            .into_iter()
1997            .map(|url| url.to_string())
1998            .collect();
1999        urls.sort();
2000
2001        assert_eq!(
2002            urls,
2003            vec![
2004                "https://cdn.example.com/icons/maskable.png",
2005                "https://jumble.social/app/icons/app.png",
2006                "https://jumble.social/app/icons/file.png",
2007                "https://jumble.social/app/icons/home.png",
2008                "https://jumble.social/app/icons/shortcut.png",
2009                "https://jumble.social/app/open?uri=placeholder",
2010                "https://jumble.social/app/shots/hero.png",
2011                "https://jumble.social/app/tabs/new.html",
2012                "https://jumble.social/home.html",
2013                "https://jumble.social/index.html?source=pwa#home",
2014                "https://jumble.social/launch/compose.html?mode=quick#composer",
2015                "https://jumble.social/notes/new.html",
2016                "https://jumble.social/open-file",
2017                "https://jumble.social/share/submit?from=manifest",
2018            ]
2019        );
2020    }
2021
2022    #[test]
2023    fn rewrite_manifest_urls_rewrites_nested_manifest_fields() {
2024        let mut manifest = json!({
2025            "start_url": "../index.html?source=pwa#home",
2026            "icons": [
2027                {"src": "icons/app.png"},
2028                {"src": "https://cdn.example.com/icons/maskable.png"}
2029            ],
2030            "screenshots": [
2031                {"src": "shots/hero.png"}
2032            ],
2033            "shortcuts": [
2034                {
2035                    "url": "../launch/compose.html?mode=quick#composer",
2036                    "icons": [
2037                        {"src": "icons/shortcut.png"}
2038                    ]
2039                }
2040            ],
2041            "protocol_handlers": [
2042                {"url": "open?uri=placeholder"}
2043            ],
2044            "file_handlers": [
2045                {
2046                    "action": "/open-file",
2047                    "icons": [
2048                        {"src": "icons/file.png"}
2049                    ]
2050                }
2051            ],
2052            "share_target": {
2053                "action": "/share/submit?from=manifest"
2054            },
2055            "note_taking": {
2056                "new_note_url": "/notes/new.html"
2057            },
2058            "tab_strip": {
2059                "home_tab": {
2060                    "url": "/home.html",
2061                    "icons": [
2062                        {"src": "icons/home.png"}
2063                    ]
2064                },
2065                "new_tab_button": {
2066                    "url": "tabs/new.html"
2067                }
2068            }
2069        });
2070        let manifest_url = Url::parse("https://jumble.social/app/manifest.webmanifest").unwrap();
2071
2072        rewrite_manifest_urls(&mut manifest, &manifest_url, "app/manifest.webmanifest");
2073
2074        assert_eq!(
2075            manifest.get("start_url").and_then(Value::as_str),
2076            Some("../index.html?source=pwa#home")
2077        );
2078        assert_eq!(
2079            manifest["icons"][0].get("src").and_then(Value::as_str),
2080            Some("icons/app.png")
2081        );
2082        assert_eq!(
2083            manifest["icons"][1].get("src").and_then(Value::as_str),
2084            Some("../_external/cdn.example.com/icons/maskable.png")
2085        );
2086        assert_eq!(
2087            manifest["screenshots"][0]
2088                .get("src")
2089                .and_then(Value::as_str),
2090            Some("shots/hero.png")
2091        );
2092        assert_eq!(
2093            manifest["shortcuts"][0].get("url").and_then(Value::as_str),
2094            Some("../launch/compose.html?mode=quick#composer")
2095        );
2096        assert_eq!(
2097            manifest["shortcuts"][0]["icons"][0]
2098                .get("src")
2099                .and_then(Value::as_str),
2100            Some("icons/shortcut.png")
2101        );
2102        assert_eq!(
2103            manifest["protocol_handlers"][0]
2104                .get("url")
2105                .and_then(Value::as_str),
2106            Some("open?uri=placeholder")
2107        );
2108        assert_eq!(
2109            manifest["file_handlers"][0]
2110                .get("action")
2111                .and_then(Value::as_str),
2112            Some("../open-file")
2113        );
2114        assert_eq!(
2115            manifest["file_handlers"][0]["icons"][0]
2116                .get("src")
2117                .and_then(Value::as_str),
2118            Some("icons/file.png")
2119        );
2120        assert_eq!(
2121            manifest["share_target"]
2122                .get("action")
2123                .and_then(Value::as_str),
2124            Some("../share/submit?from=manifest")
2125        );
2126        assert_eq!(
2127            manifest["note_taking"]
2128                .get("new_note_url")
2129                .and_then(Value::as_str),
2130            Some("../notes/new.html")
2131        );
2132        assert_eq!(
2133            manifest["tab_strip"]["home_tab"]
2134                .get("url")
2135                .and_then(Value::as_str),
2136            Some("../home.html")
2137        );
2138        assert_eq!(
2139            manifest["tab_strip"]["home_tab"]["icons"][0]
2140                .get("src")
2141                .and_then(Value::as_str),
2142            Some("icons/home.png")
2143        );
2144        assert_eq!(
2145            manifest["tab_strip"]["new_tab_button"]
2146                .get("url")
2147                .and_then(Value::as_str),
2148            Some("tabs/new.html")
2149        );
2150    }
2151
2152    #[tokio::test]
2153    #[ignore = "live network smoke test against jumble.social"]
2154    async fn installs_live_jumble_social_pwa_with_primary_app_icon() {
2155        let temp_dir = tempdir().unwrap();
2156        let store = HashtreeStore::new(temp_dir.path()).unwrap();
2157
2158        let installed = install_site_pwa_to_store(&store, LIVE_JUMBLE_SMOKE_URL)
2159            .await
2160            .unwrap();
2161
2162        assert_eq!(installed.name, "Jumble");
2163        assert_eq!(installed.source_app_id, None);
2164        assert_eq!(installed.source_url, LIVE_JUMBLE_SMOKE_URL);
2165        assert_eq!(
2166            installed.source_manifest_url,
2167            LIVE_JUMBLE_SMOKE_MANIFEST_URL
2168        );
2169        assert_eq!(
2170            installed.description.as_deref(),
2171            Some("A user-friendly Nostr client for exploring relay feeds")
2172        );
2173        assert_eq!(installed.display_mode.as_deref(), Some("standalone"));
2174        assert!(installed.launch_url.starts_with("htree://nhash1"));
2175        assert!(installed.launch_url.ends_with("/index.html"));
2176
2177        let icon_url = installed.icon_url.clone().expect("installed icon url");
2178        let (launch_nhash, launch_path) = split_htree_nhash_url(&installed.launch_url);
2179        let (icon_nhash, icon_path) = split_htree_nhash_url(&icon_url);
2180        assert_eq!(icon_nhash, launch_nhash);
2181        assert_eq!(icon_path, "/pwa-512x512.png");
2182
2183        let launch_html = read_exported_tree_text(&store, &launch_nhash, &launch_path).await;
2184        assert!(launch_html.contains("manifest.webmanifest"));
2185        assert!(!launch_html.contains("src=\"/assets/"));
2186        assert!(!launch_html.contains("href=\"/assets/"));
2187    }
2188
2189    #[tokio::test]
2190    #[ignore = "live network smoke test against photopea.com"]
2191    async fn installs_live_photopea_pwa_with_rewritten_file_handlers() {
2192        let temp_dir = tempdir().unwrap();
2193        let store = HashtreeStore::new(temp_dir.path()).unwrap();
2194
2195        let installed = install_site_pwa_to_store(&store, LIVE_PHOTOPEA_SMOKE_URL)
2196            .await
2197            .unwrap();
2198
2199        assert_eq!(installed.name, "Photopea");
2200        assert_eq!(installed.source_app_id, None);
2201        assert_eq!(installed.source_url, LIVE_PHOTOPEA_SMOKE_URL);
2202        assert_eq!(
2203            installed.source_manifest_url,
2204            LIVE_PHOTOPEA_SMOKE_MANIFEST_URL
2205        );
2206        assert_eq!(installed.description, None);
2207        assert_eq!(installed.display_mode.as_deref(), Some("standalone"));
2208        assert!(installed.launch_url.starts_with("htree://nhash1"));
2209        assert!(installed
2210            .launch_url
2211            .ends_with("/index.html?utm_source=homescreen"));
2212
2213        let icon_url = installed.icon_url.clone().expect("installed icon url");
2214        let (launch_nhash, launch_path) = split_htree_nhash_url(&installed.launch_url);
2215        let (icon_nhash, icon_path) = split_htree_nhash_url(&icon_url);
2216        assert_eq!(icon_nhash, launch_nhash);
2217        assert_eq!(icon_path, "/promo/icon512.png");
2218
2219        let launch_html = read_exported_tree_text(&store, &launch_nhash, &launch_path).await;
2220        assert!(launch_html.contains("manifest.json"));
2221        assert!(!launch_html.contains("src=\"/img/"));
2222        assert!(!launch_html.contains("href=\"/img/"));
2223
2224        let manifest_text = read_exported_tree_text(&store, &launch_nhash, "/manifest.json").await;
2225        let manifest: Value = serde_json::from_str(&manifest_text).unwrap();
2226        assert_eq!(
2227            manifest.get("start_url").and_then(Value::as_str),
2228            Some("index.html?utm_source=homescreen")
2229        );
2230        assert_eq!(
2231            manifest["share_target"]
2232                .get("action")
2233                .and_then(Value::as_str),
2234            Some("index.html")
2235        );
2236        let file_handlers = manifest["file_handlers"]
2237            .as_array()
2238            .expect("file_handlers array");
2239        assert!(file_handlers.len() >= 20);
2240        for handler in file_handlers {
2241            assert_eq!(
2242                handler.get("action").and_then(Value::as_str),
2243                Some("index.html")
2244            );
2245        }
2246    }
2247
2248    #[tokio::test]
2249    #[ignore = "live network smoke test against excalidraw.com"]
2250    async fn installs_live_excalidraw_pwa_with_rewritten_screenshots() {
2251        let temp_dir = tempdir().unwrap();
2252        let store = HashtreeStore::new(temp_dir.path()).unwrap();
2253
2254        let installed = install_site_pwa_to_store(&store, LIVE_EXCALIDRAW_SMOKE_URL)
2255            .await
2256            .unwrap();
2257
2258        assert_eq!(installed.name, "Excalidraw");
2259        assert_eq!(
2260            installed.source_app_id.as_deref(),
2261            Some("https://excalidraw.com/excalidraw")
2262        );
2263        assert_eq!(installed.source_url, LIVE_EXCALIDRAW_SMOKE_URL);
2264        assert_eq!(
2265            installed.source_manifest_url,
2266            LIVE_EXCALIDRAW_SMOKE_MANIFEST_URL
2267        );
2268        assert_eq!(
2269            installed.description.as_deref(),
2270            Some(
2271                "Excalidraw is a whiteboard tool that lets you easily sketch diagrams that have a hand-drawn feel to them."
2272            )
2273        );
2274        assert_eq!(installed.display_mode.as_deref(), Some("standalone"));
2275        assert!(installed.launch_url.starts_with("htree://nhash1"));
2276        assert!(installed.launch_url.ends_with("/index.html"));
2277
2278        let icon_url = installed.icon_url.clone().expect("installed icon url");
2279        let (launch_nhash, launch_path) = split_htree_nhash_url(&installed.launch_url);
2280        let (icon_nhash, icon_path) = split_htree_nhash_url(&icon_url);
2281        assert_eq!(icon_nhash, launch_nhash);
2282        assert!(icon_path.ends_with(".png"));
2283
2284        let launch_html = read_exported_tree_text(&store, &launch_nhash, &launch_path).await;
2285        assert!(launch_html.contains("manifest.webmanifest"));
2286
2287        let manifest_text =
2288            read_exported_tree_text(&store, &launch_nhash, "/manifest.webmanifest").await;
2289        let manifest: Value = serde_json::from_str(&manifest_text).unwrap();
2290        assert_eq!(
2291            manifest.get("start_url").and_then(Value::as_str),
2292            Some("index.html")
2293        );
2294        assert_eq!(
2295            manifest["share_target"]
2296                .get("action")
2297                .and_then(Value::as_str),
2298            Some("web-share-target")
2299        );
2300        assert_eq!(
2301            manifest["file_handlers"][0]
2302                .get("action")
2303                .and_then(Value::as_str),
2304            Some("index.html")
2305        );
2306
2307        let screenshots = manifest["screenshots"]
2308            .as_array()
2309            .expect("screenshots array");
2310        assert!(screenshots.len() >= 6);
2311        for screenshot in screenshots {
2312            let src = screenshot
2313                .get("src")
2314                .and_then(Value::as_str)
2315                .expect("screenshot src");
2316            assert!(src.starts_with("screenshots/"));
2317        }
2318    }
2319
2320    #[tokio::test]
2321    #[ignore = "live network smoke test against app.fastmail.com"]
2322    async fn installs_live_fastmail_pwa_with_protocol_handlers_and_shortcuts() {
2323        let fetched = fetch_pwa(LIVE_FASTMAIL_SMOKE_URL).await.unwrap();
2324        let temp_dir = tempdir().unwrap();
2325        let store = HashtreeStore::new(temp_dir.path()).unwrap();
2326
2327        let installed = install_site_pwa_to_store(&store, LIVE_FASTMAIL_SMOKE_URL)
2328            .await
2329            .unwrap();
2330
2331        assert_eq!(installed.name, "Fastmail");
2332        assert_eq!(installed.source_app_id, None);
2333        assert_eq!(installed.source_url, LIVE_FASTMAIL_SMOKE_URL);
2334        assert!(installed
2335            .source_manifest_url
2336            .starts_with(LIVE_FASTMAIL_SMOKE_MANIFEST_PREFIX));
2337        assert!(installed
2338            .source_manifest_url
2339            .ends_with(LIVE_FASTMAIL_SMOKE_MANIFEST_SUFFIX));
2340        assert_eq!(
2341            installed.description.as_deref(),
2342            Some("Email + calendar made better")
2343        );
2344        assert_eq!(installed.display_mode.as_deref(), Some("standalone"));
2345        assert!(installed.launch_url.starts_with("htree://nhash1"));
2346        assert!(installed.launch_url.ends_with("/mail/Inbox/index.html"));
2347
2348        let icon_url = installed.icon_url.clone().expect("installed icon url");
2349        let (launch_nhash, _launch_path) = split_htree_nhash_url(&installed.launch_url);
2350        let (icon_nhash, icon_path) = split_htree_nhash_url(&icon_url);
2351        assert_eq!(icon_nhash, launch_nhash);
2352        assert!(icon_path.starts_with("/static/appicons/"));
2353        assert!(icon_path.ends_with(".png"));
2354        let expected_root = format!("htree://{launch_nhash}");
2355        assert_eq!(
2356            installed.scope_url.as_deref(),
2357            Some(format!("{expected_root}/").as_str())
2358        );
2359        assert_eq!(installed.protocol_handlers.len(), 1);
2360        assert_eq!(installed.protocol_handlers[0].protocol, "mailto");
2361        assert_eq!(
2362            installed.protocol_handlers[0].url,
2363            format!("{expected_root}/mail/compose?mailto=%s")
2364        );
2365        assert_eq!(installed.shortcuts.len(), 4);
2366        let installed_shortcut_pairs: Vec<(String, String)> = installed
2367            .shortcuts
2368            .iter()
2369            .map(|shortcut| (shortcut.name.clone(), shortcut.url.clone()))
2370            .collect();
2371        assert!(installed_shortcut_pairs.contains(&(
2372            "Compose".to_string(),
2373            format!("{expected_root}/mail/Inbox/compose")
2374        )));
2375        assert!(installed_shortcut_pairs
2376            .contains(&("Mail".to_string(), format!("{expected_root}/mail/Inbox"))));
2377        assert!(installed_shortcut_pairs.contains(&(
2378            "Contacts".to_string(),
2379            format!("{expected_root}/contacts/index.html")
2380        )));
2381        assert!(installed_shortcut_pairs.contains(&(
2382            "Calendar".to_string(),
2383            format!("{expected_root}/calendar/index.html")
2384        )));
2385
2386        let launch_path = fetched.launch_reference.trim_start_matches('/');
2387        let launch_html = String::from_utf8(
2388            fetched
2389                .assets
2390                .iter()
2391                .find(|asset| asset.path == launch_path)
2392                .expect("launch html asset")
2393                .data
2394                .clone(),
2395        )
2396        .unwrap();
2397        assert!(launch_html.contains("app.webmanifest"));
2398
2399        let base_url = Url::parse(&installed.source_url).unwrap();
2400        let manifest_url = Url::parse(&installed.source_manifest_url).unwrap();
2401        let manifest_path = url_to_path(&manifest_url, &base_url);
2402        let manifest: Value = serde_json::from_slice(
2403            &fetched
2404                .assets
2405                .iter()
2406                .find(|asset| asset.path == manifest_path)
2407                .expect("manifest asset")
2408                .data,
2409        )
2410        .unwrap();
2411
2412        assert_eq!(
2413            manifest.get("start_url").and_then(Value::as_str),
2414            Some("../../../mail/Inbox/index.html")
2415        );
2416        let protocol_handlers = manifest["protocol_handlers"]
2417            .as_array()
2418            .expect("protocol_handlers array");
2419        assert_eq!(protocol_handlers.len(), 1);
2420        assert_eq!(
2421            protocol_handlers[0].get("protocol").and_then(Value::as_str),
2422            Some("mailto")
2423        );
2424        assert_eq!(
2425            protocol_handlers[0].get("url").and_then(Value::as_str),
2426            Some("../../../mail/compose?mailto=%s")
2427        );
2428
2429        let shortcuts = manifest["shortcuts"].as_array().expect("shortcuts array");
2430        assert_eq!(shortcuts.len(), 4);
2431        let shortcut_pairs: Vec<(String, String)> = shortcuts
2432            .iter()
2433            .map(|shortcut| {
2434                (
2435                    shortcut
2436                        .get("name")
2437                        .and_then(Value::as_str)
2438                        .expect("shortcut name")
2439                        .to_string(),
2440                    shortcut
2441                        .get("url")
2442                        .and_then(Value::as_str)
2443                        .expect("shortcut url")
2444                        .to_string(),
2445                )
2446            })
2447            .collect();
2448        assert!(shortcut_pairs.contains(&(
2449            "Compose".to_string(),
2450            "../../../mail/Inbox/compose".to_string()
2451        )));
2452        assert!(shortcut_pairs.contains(&("Mail".to_string(), "../../../mail/Inbox".to_string())));
2453        assert!(shortcut_pairs.contains(&(
2454            "Contacts".to_string(),
2455            "../../../contacts/index.html".to_string()
2456        )));
2457        assert!(shortcut_pairs.contains(&(
2458            "Calendar".to_string(),
2459            "../../../calendar/index.html".to_string()
2460        )));
2461    }
2462
2463    #[tokio::test]
2464    #[ignore = "live network smoke test against mastodon.social"]
2465    async fn installs_live_mastodon_pwa_with_source_app_id_and_shortcuts() {
2466        let fetched = fetch_pwa(LIVE_MASTODON_SMOKE_URL).await.unwrap();
2467        let temp_dir = tempdir().unwrap();
2468        let store = HashtreeStore::new(temp_dir.path()).unwrap();
2469
2470        let installed = install_site_pwa_to_store(&store, LIVE_MASTODON_SMOKE_URL)
2471            .await
2472            .unwrap();
2473
2474        assert_eq!(installed.name, "Mastodon");
2475        assert_eq!(
2476            installed.source_app_id.as_deref(),
2477            Some("https://mastodon.social/home")
2478        );
2479        assert_eq!(installed.source_url, LIVE_MASTODON_SMOKE_URL);
2480        assert_eq!(
2481            installed.source_manifest_url,
2482            LIVE_MASTODON_SMOKE_MANIFEST_URL
2483        );
2484        assert_eq!(installed.description, None);
2485        assert_eq!(installed.display_mode.as_deref(), Some("standalone"));
2486        assert!(installed.launch_url.starts_with("htree://nhash1"));
2487        assert!(installed.launch_url.ends_with("/index.html"));
2488
2489        let icon_url = installed.icon_url.clone().expect("installed icon url");
2490        let (launch_nhash, launch_path) = split_htree_nhash_url(&installed.launch_url);
2491        let (icon_nhash, icon_path) = split_htree_nhash_url(&icon_url);
2492        assert_eq!(icon_nhash, launch_nhash);
2493        assert_eq!(
2494            installed.scope_url.as_deref(),
2495            Some(format!("htree://{launch_nhash}/").as_str())
2496        );
2497        assert!(icon_path.starts_with("/packs/assets/android-chrome-512x512-"));
2498        assert!(icon_path.ends_with(".png"));
2499
2500        let launch_html = read_exported_tree_text(&store, &launch_nhash, &launch_path).await;
2501        assert!(launch_html.contains("manifest"));
2502        for attrs in extract_tag_attributes(&launch_html, "link") {
2503            if ["stylesheet", "modulepreload", "preload", "prefetch"]
2504                .iter()
2505                .any(|rel| rel_contains(&attrs, rel))
2506            {
2507                let href = attrs.get("href").expect("asset link href");
2508                assert!(!href.starts_with('/'), "asset link stayed absolute: {href}");
2509            }
2510        }
2511        assert!(!launch_html.contains("src=\"/packs/"));
2512
2513        assert_eq!(installed.shortcuts.len(), 3);
2514        let expected_root = format!("htree://{launch_nhash}");
2515        let installed_shortcut_pairs: Vec<(String, String)> = installed
2516            .shortcuts
2517            .iter()
2518            .map(|shortcut| (shortcut.name.clone(), shortcut.url.clone()))
2519            .collect();
2520        assert!(installed_shortcut_pairs.contains(&(
2521            "Compose new post".to_string(),
2522            format!("{expected_root}/publish")
2523        )));
2524        assert!(installed_shortcut_pairs.contains(&(
2525            "Notifications".to_string(),
2526            format!("{expected_root}/notifications")
2527        )));
2528        assert!(installed_shortcut_pairs
2529            .contains(&("Explore".to_string(), format!("{expected_root}/explore"))));
2530
2531        let base_url = Url::parse(&installed.source_url).unwrap();
2532        let manifest_url = Url::parse(&installed.source_manifest_url).unwrap();
2533        let manifest_path = url_to_path(&manifest_url, &base_url);
2534        let manifest: Value = serde_json::from_slice(
2535            &fetched
2536                .assets
2537                .iter()
2538                .find(|asset| asset.path == manifest_path)
2539                .expect("manifest asset")
2540                .data,
2541        )
2542        .unwrap();
2543
2544        assert_eq!(
2545            manifest.get("start_url").and_then(Value::as_str),
2546            Some("index.html")
2547        );
2548        let shortcuts = manifest["shortcuts"].as_array().expect("shortcuts array");
2549        assert_eq!(shortcuts.len(), 3);
2550        let shortcut_pairs: Vec<(String, String)> = shortcuts
2551            .iter()
2552            .map(|shortcut| {
2553                (
2554                    shortcut
2555                        .get("name")
2556                        .and_then(Value::as_str)
2557                        .expect("shortcut name")
2558                        .to_string(),
2559                    shortcut
2560                        .get("url")
2561                        .and_then(Value::as_str)
2562                        .expect("shortcut url")
2563                        .to_string(),
2564                )
2565            })
2566            .collect();
2567        assert!(shortcut_pairs.contains(&("Compose new post".to_string(), "publish".to_string())));
2568        assert!(
2569            shortcut_pairs.contains(&("Notifications".to_string(), "notifications".to_string()))
2570        );
2571        assert!(shortcut_pairs.contains(&("Explore".to_string(), "explore".to_string())));
2572    }
2573}