Skip to main content

solid_pod_rs/
mashlib.rs

1//! Mashlib / SolidOS data-browser integration.
2//!
3//! When a browser navigates to an RDF resource on a Solid server, the
4//! `Accept: text/html` header signals that the user wants a rendered
5//! view, not raw triples.  This module generates a thin HTML wrapper
6//! that loads the SolidOS **mashlib** bundle (from a CDN, a local path,
7//! or an ES module URL) and lets it fetch + render the underlying data.
8//!
9//! The approach is called the **"databrowser hack"** in the SolidOS
10//! community: the server never renders RDF itself — it delegates to the
11//! client-side pane system inside mashlib.
12//!
13//! ## Data-island optimisation
14//!
15//! When the resource is small enough (≤ [`DATA_ISLAND_MAX_BYTES`]), its
16//! JSON-LD representation is embedded inline as a
17//! `<script type="application/ld+json">` block.  A companion reader
18//! script patches `$rdf.fetcher.load()` to resolve from that island
19//! first, eliminating one HTTP round-trip on every page load.
20//!
21//! ## Modes
22//!
23//! | Mode | HTML template | Use case |
24//! |------|---------------|----------|
25//! | [`MashlibMode::Cdn`] | `<script src="unpkg.com/mashlib@{version}/…">` | Zero-config default |
26//! | [`MashlibMode::Local`] | `<script defer src="/mashlib.min.js">` | Operator bundles assets with `actix-files` |
27//! | [`MashlibMode::Module`] | `<script type="module" src="{url}">` | LOSOS / custom shell |
28
29/// Cap on how much JSON-LD we inline as a data island.  256 KiB covers
30/// any realistic profile, type index, or container listing.
31pub const DATA_ISLAND_MAX_BYTES: usize = 256 * 1024;
32
33// -------------------------------------------------------------------------
34// Configuration
35// -------------------------------------------------------------------------
36
37/// How to load the mashlib bundle.
38#[derive(Debug, Clone)]
39pub enum MashlibMode {
40    /// Load from unpkg CDN.  `version` is e.g. `"2.0.0"`.
41    Cdn { version: String },
42    /// Serve from a local directory (operator sets up `actix-files` /
43    /// `tower-http::ServeDir`).  The HTML references `/mashlib.min.js`
44    /// and `/mash.css` at the server root.
45    Local,
46    /// ES module entry point (the LOSOS pattern).  The server emits
47    /// `<div id="mashlib">` and a `<script type="module" src="{url}">`.
48    Module { url: String },
49}
50
51impl Default for MashlibMode {
52    fn default() -> Self {
53        Self::Cdn {
54            version: "2.0.0".to_string(),
55        }
56    }
57}
58
59/// Mashlib integration configuration.
60#[derive(Debug, Clone)]
61pub struct MashlibConfig {
62    pub enabled: bool,
63    pub mode: MashlibMode,
64    pub data_island_max_bytes: usize,
65    pub round_trip_optimization: bool,
66}
67
68impl Default for MashlibConfig {
69    fn default() -> Self {
70        Self {
71            enabled: false,
72            mode: MashlibMode::default(),
73            data_island_max_bytes: DATA_ISLAND_MAX_BYTES,
74            round_trip_optimization: true,
75        }
76    }
77}
78
79// -------------------------------------------------------------------------
80// Decision: should we serve mashlib for this request?
81// -------------------------------------------------------------------------
82
83/// Content types that have a mashlib pane to render them: RDF, markdown,
84/// and the Apple HLS playlist type. Any `audio/*` type (mpeg, ogg, wave,
85/// flac, the `audio/mpegurl` / `audio/x-scpls` playlists, …) is matched
86/// by family in [`should_serve`] rather than enumerated here. Do NOT add
87/// video/image types until panes exist for them — wrapping them would
88/// show "No data found" instead of the browser's native inline render.
89/// Mirrors JSS #533 `shouldServeMashlib`.
90const MASHLIB_RENDERABLE: &[&str] = &[
91    "text/turtle",
92    "application/ld+json",
93    "application/json",
94    "text/n3",
95    "application/n-triples",
96    "application/rdf+xml",
97    "text/markdown",
98    "application/vnd.apple.mpegurl",
99];
100
101/// RDF types whose presence *before* `text/html` in the Accept header
102/// indicates the client prefers raw RDF over an HTML wrapper.
103const RDF_ACCEPT_TYPES: &[&str] = &[
104    "application/rdf+xml",
105    "text/turtle",
106    "application/ld+json",
107    "text/n3",
108    "application/n-triples",
109];
110
111/// Determine whether the server should return the mashlib HTML wrapper
112/// instead of the raw resource body.
113///
114/// The decision mirrors JSS `mashlib/index.js::shouldServeMashlib`:
115///
116/// 1. `enabled` must be true.
117/// 2. `Sec-Fetch-Dest` (when present) must be `document` — this
118///    distinguishes top-level browser navigation from `fetch()` / XHR.
119/// 3. `Accept` must include `text/html` and no RDF type may appear
120///    before it (otherwise the client prefers raw data).
121/// 4. The resource's stored content type must be renderable by mashlib.
122pub fn should_serve(
123    accept: &str,
124    sec_fetch_dest: Option<&str>,
125    resource_content_type: &str,
126    enabled: bool,
127) -> bool {
128    if !enabled {
129        return false;
130    }
131
132    if let Some(dest) = sec_fetch_dest {
133        if !dest.is_empty() && dest != "document" {
134            return false;
135        }
136    }
137
138    if !accept.contains("text/html") {
139        return false;
140    }
141
142    let html_pos = match accept.find("text/html") {
143        Some(p) => p,
144        None => return false,
145    };
146    for rdf_type in RDF_ACCEPT_TYPES {
147        if let Some(pos) = accept.find(rdf_type) {
148            if pos < html_pos {
149                return false;
150            }
151        }
152    }
153
154    let base_type = resource_content_type
155        .split(';')
156        .next()
157        .unwrap_or("")
158        .trim()
159        .to_ascii_lowercase();
160    // Any audio/* type has a pane (audio or playlist), so match the whole
161    // family rather than enumerate the exact spellings the mime db uses.
162    if base_type.starts_with("audio/") {
163        return true;
164    }
165    MASHLIB_RENDERABLE.contains(&base_type.as_str())
166}
167
168// -------------------------------------------------------------------------
169// Data-island escaping
170// -------------------------------------------------------------------------
171
172/// Escape a JSON-LD string for embedding inside
173/// `<script type="application/ld+json">`.
174///
175/// Replaces every literal `<` with the JSON unicode escape `<`.
176/// The HTML parser scans raw bytes for `</script` — by eliminating all
177/// `<` bytes we guarantee no end-tag can form.  JSON-LD parsers decode
178/// `<` back to `<` natively, so document semantics are preserved.
179pub fn escape_for_script_block(json_ld: &str) -> String {
180    json_ld.replace('<', "\\u003c")
181}
182
183/// Build the `<script type="application/ld+json">` data-island block.
184///
185/// Returns an empty string when `json_ld` is `None` or exceeds the
186/// byte cap (checked both pre- and post-escape).
187fn data_island(resource_url: &str, json_ld: Option<&str>, max_bytes: usize) -> String {
188    let raw = match json_ld {
189        Some(s) if !s.is_empty() => s,
190        _ => return String::new(),
191    };
192    if raw.len() > max_bytes {
193        return String::new();
194    }
195    let safe_body = escape_for_script_block(raw);
196    if safe_body.len() > max_bytes {
197        return String::new();
198    }
199    let safe_uri = escape_html(resource_url);
200    format!(
201        r#"<script type="application/ld+json" id="dataisland" data-uri="{safe_uri}">{safe_body}</script>"#
202    )
203}
204
205// -------------------------------------------------------------------------
206// Round-trip optimisation reader
207// -------------------------------------------------------------------------
208
209/// Inline `<script>` that patches `$rdf.fetcher.load()` to resolve from
210/// the data island before hitting the network.  Three detection paths
211/// cover the full timing space (synchronous, setter, polling).
212fn round_trip_script() -> &'static str {
213    r#"<script>
214(function(){
215  if(typeof window==='undefined')return;
216  var di=window.__dataIsland;
217  if(di===null||di===undefined||(typeof di!=='object'&&typeof di!=='function'))di=window.__dataIsland={};
218  if(typeof di.get!=='function'){
219    di.get=function(uri){
220      if(!uri)return null;
221      try{var el=document.getElementById('dataisland');
222        if(el&&el.type==='application/ld+json'&&el.getAttribute('data-uri')===String(uri))
223          return{contentType:'application/ld+json',content:el.textContent};
224      }catch(e){}return null;
225    };
226  }
227  function patch(rdf){
228    if(!rdf||!rdf.fetcher||!rdf.fetcher.load)return;
229    if(rdf.fetcher.__diPatched)return;rdf.fetcher.__diPatched=true;
230    var f=rdf.fetcher,orig=f.load.bind(f);
231    f.load=function(uri,opts){
232      var s=(uri&&uri.uri)||(uri&&uri.value)||String(uri);
233      var d=window.__dataIsland.get(s);
234      if(d)return new Promise(function(ok,fail){
235        rdf.parse(d.content,f.store,s,d.contentType,function(err){
236          if(err){fail(err);return;}
237          try{if(f.requested&&typeof f.requested==='object')f.requested[s]='done';
238            var r=typeof Response==='function'?new Response(d.content,{status:200,headers:{'content-type':d.contentType}}):{ok:true,status:200,url:s,headers:{get:function(n){return n&&n.toLowerCase()==='content-type'?d.contentType:null}}};
239            try{Object.defineProperty(r,'url',{value:s,configurable:true})}catch(e){}ok(r);
240          }catch(e){fail(e);}
241        });
242      }).catch(function(){return orig(uri,opts);});
243      return orig(uri,opts);
244    };
245  }
246  if(typeof $rdf!=='undefined')patch($rdf);
247  try{var c=typeof $rdf!=='undefined'?$rdf:undefined;
248    Object.defineProperty(window,'$rdf',{configurable:true,get:function(){return c;},set:function(v){c=v;patch(v);}});
249  }catch(e){}
250  var n=0;(function p(){if(++n>100)return;if(typeof $rdf!=='undefined'&&$rdf&&$rdf.fetcher&&$rdf.fetcher.__diPatched)return;if(typeof $rdf!=='undefined')patch($rdf);setTimeout(p,100);})();
251})();
252</script>"#
253}
254
255// -------------------------------------------------------------------------
256// HTML generators
257// -------------------------------------------------------------------------
258
259/// Generate the mashlib HTML wrapper page.
260///
261/// When `embed_json_ld` is provided and within the byte cap, it is
262/// inlined as a data island for the round-trip optimisation.
263pub fn generate_html(
264    resource_url: &str,
265    config: &MashlibConfig,
266    embed_json_ld: Option<&str>,
267) -> String {
268    let island = data_island(resource_url, embed_json_ld, config.data_island_max_bytes);
269    let reader = if config.round_trip_optimization {
270        round_trip_script()
271    } else {
272        ""
273    };
274
275    match &config.mode {
276        MashlibMode::Cdn { version } => {
277            let base = format!("https://unpkg.com/mashlib@{version}/dist");
278            format!(
279                r#"<!doctype html><html><head><meta charset="utf-8"/><title>SolidOS Web App</title>
280<link href="{base}/mash.css" rel="stylesheet"></head>
281<body id="PageBody">{island}{reader}<header id="PageHeader"></header>
282<div class="TabulatorOutline" id="DummyUUID" role="main"><table id="outline"></table><div id="GlobalDashboard"></div></div>
283<footer id="PageFooter"></footer>
284<script>
285(function(){{var s=document.createElement('script');s.src='{base}/mashlib.min.js';s.onload=function(){{panes.runDataBrowser()}};s.onerror=function(){{document.body.innerHTML='<p>Failed to load Mashlib from CDN</p>'}};document.head.appendChild(s)}})();
286</script></body></html>"#
287            )
288        }
289        MashlibMode::Local => {
290            format!(
291                r#"<!doctype html><html><head><meta charset="utf-8"/><title>SolidOS Web App</title><script>document.addEventListener('DOMContentLoaded',function(){{panes.runDataBrowser()}})</script><script defer="defer" src="/mashlib.min.js"></script><link href="/mash.css" rel="stylesheet"></head><body id="PageBody">{island}{reader}<header id="PageHeader"></header><div class="TabulatorOutline" id="DummyUUID" role="main"><table id="outline"></table><div id="GlobalDashboard"></div></div><footer id="PageFooter"></footer></body></html>"#
292            )
293        }
294        MashlibMode::Module { url } => {
295            let css_url = if url.ends_with(".js") {
296                format!("{}.css", &url[..url.len() - 3])
297            } else {
298                format!("{url}.css")
299            };
300            format!(
301                r#"<!doctype html><html lang="en"><head><meta charset="utf-8"/>
302<meta name="viewport" content="width=device-width, initial-scale=1">
303<title>Solid Data Browser</title>
304<link rel="stylesheet" href="{css_url}"></head>
305<body>{island}{reader}<div id="mashlib"></div>
306<script type="module" src="{url}"></script>
307</body></html>"#
308            )
309        }
310    }
311}
312
313// -------------------------------------------------------------------------
314// Utilities
315// -------------------------------------------------------------------------
316
317fn escape_html(s: &str) -> String {
318    s.replace('&', "&amp;")
319        .replace('<', "&lt;")
320        .replace('>', "&gt;")
321        .replace('"', "&quot;")
322}
323
324// -------------------------------------------------------------------------
325// Tests
326// -------------------------------------------------------------------------
327
328#[cfg(test)]
329mod tests {
330    use super::*;
331
332    fn default_cdn_config() -> MashlibConfig {
333        MashlibConfig {
334            enabled: true,
335            ..Default::default()
336        }
337    }
338
339    // -- should_serve -------------------------------------------------------
340
341    #[test]
342    fn serves_html_for_browser_navigation() {
343        assert!(should_serve(
344            "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
345            Some("document"),
346            "text/turtle",
347            true,
348        ));
349    }
350
351    #[test]
352    fn rejects_when_disabled() {
353        assert!(!should_serve(
354            "text/html",
355            Some("document"),
356            "text/turtle",
357            false
358        ));
359    }
360
361    #[test]
362    fn rejects_xhr_fetch() {
363        assert!(!should_serve(
364            "text/html",
365            Some("empty"),
366            "text/turtle",
367            true
368        ));
369    }
370
371    #[test]
372    fn rejects_no_html_in_accept() {
373        assert!(!should_serve(
374            "application/ld+json, */*;q=0.1",
375            None,
376            "text/turtle",
377            true,
378        ));
379    }
380
381    #[test]
382    fn rejects_rdf_preferred_over_html() {
383        assert!(!should_serve(
384            "text/turtle, text/html;q=0.5",
385            None,
386            "text/turtle",
387            true,
388        ));
389    }
390
391    #[test]
392    fn rejects_non_renderable_content_type() {
393        assert!(!should_serve(
394            "text/html",
395            Some("document"),
396            "image/png",
397            true,
398        ));
399    }
400
401    #[test]
402    fn accepts_jsonld_content_type() {
403        assert!(should_serve(
404            "text/html",
405            Some("document"),
406            "application/ld+json; charset=utf-8",
407            true,
408        ));
409    }
410
411    #[test]
412    fn accepts_markdown_content_type() {
413        assert!(should_serve(
414            "text/html",
415            Some("document"),
416            "text/markdown",
417            true,
418        ));
419    }
420
421    #[test]
422    fn accepts_absent_sec_fetch_dest() {
423        assert!(should_serve("text/html", None, "text/turtle", true));
424    }
425
426    // -- escape_for_script_block -------------------------------------------
427
428    #[test]
429    fn escapes_angle_brackets() {
430        let input = r#"{"@id": "</script>"}"#;
431        let escaped = escape_for_script_block(input);
432        assert!(!escaped.contains('<'));
433        assert!(escaped.contains("\\u003c"));
434    }
435
436    // -- data_island -------------------------------------------------------
437
438    #[test]
439    fn island_empty_when_none() {
440        assert!(data_island("http://x", None, DATA_ISLAND_MAX_BYTES).is_empty());
441    }
442
443    #[test]
444    fn island_empty_when_oversized() {
445        let big = "x".repeat(DATA_ISLAND_MAX_BYTES + 1);
446        assert!(data_island("http://x", Some(&big), DATA_ISLAND_MAX_BYTES).is_empty());
447    }
448
449    #[test]
450    fn island_contains_json_ld_type() {
451        let island = data_island("http://x/r", Some(r#"{"@id":"x"}"#), DATA_ISLAND_MAX_BYTES);
452        assert!(island.contains("application/ld+json"));
453        assert!(island.contains("dataisland"));
454    }
455
456    // -- generate_html -----------------------------------------------------
457
458    #[test]
459    fn cdn_mode_references_unpkg() {
460        let cfg = default_cdn_config();
461        let html = generate_html("http://pod/resource", &cfg, None);
462        assert!(html.contains("unpkg.com/mashlib@2.0.0"));
463        assert!(html.contains("mashlib.min.js"));
464        assert!(html.contains("mash.css"));
465    }
466
467    #[test]
468    fn local_mode_uses_root_relative_paths() {
469        let cfg = MashlibConfig {
470            enabled: true,
471            mode: MashlibMode::Local,
472            ..Default::default()
473        };
474        let html = generate_html("http://pod/resource", &cfg, None);
475        assert!(html.contains(r#"src="/mashlib.min.js""#));
476        assert!(html.contains(r#"href="/mash.css""#));
477    }
478
479    #[test]
480    fn module_mode_emits_mashlib_div() {
481        let cfg = MashlibConfig {
482            enabled: true,
483            mode: MashlibMode::Module {
484                url: "https://host/path/mashlib.js".into(),
485            },
486            ..Default::default()
487        };
488        let html = generate_html("http://pod/resource", &cfg, None);
489        assert!(html.contains(r#"id="mashlib""#));
490        assert!(html.contains(r#"type="module""#));
491        assert!(html.contains("https://host/path/mashlib.js"));
492        assert!(html.contains("https://host/path/mashlib.css"));
493    }
494
495    #[test]
496    fn embeds_data_island_when_provided() {
497        let cfg = default_cdn_config();
498        let json_ld = r#"{"@id":"http://pod/r","@type":"foaf:Person"}"#;
499        let html = generate_html("http://pod/r", &cfg, Some(json_ld));
500        assert!(html.contains("dataisland"));
501        assert!(html.contains("foaf:Person"));
502    }
503
504    #[test]
505    fn round_trip_script_present_by_default() {
506        let cfg = default_cdn_config();
507        let html = generate_html("http://pod/r", &cfg, Some("{}"));
508        assert!(html.contains("__dataIsland"));
509    }
510
511    #[test]
512    fn round_trip_script_absent_when_disabled() {
513        let cfg = MashlibConfig {
514            enabled: true,
515            round_trip_optimization: false,
516            ..Default::default()
517        };
518        let html = generate_html("http://pod/r", &cfg, Some("{}"));
519        assert!(!html.contains("__dataIsland"));
520    }
521}