Skip to main content

solid_pod_rs/
mashlib.rs

1//! Mashlib / SolidOS data-browser integration.
2//!
3//! When a browser navigates to an RDF resource on a Solid server, the
4//! `Accept: text/html` header signals that the user wants a rendered
5//! view, not raw triples.  This module generates a thin HTML wrapper
6//! that loads the SolidOS **mashlib** bundle (from a CDN, a local path,
7//! or an ES module URL) and lets it fetch + render the underlying data.
8//!
9//! The approach is called the **"databrowser hack"** in the SolidOS
10//! community: the server never renders RDF itself — it delegates to the
11//! client-side pane system inside mashlib.
12//!
13//! ## Data-island optimisation
14//!
15//! When the resource is small enough (≤ [`DATA_ISLAND_MAX_BYTES`]), its
16//! JSON-LD representation is embedded inline as a
17//! `<script type="application/ld+json">` block.  A companion reader
18//! script patches `$rdf.fetcher.load()` to resolve from that island
19//! first, eliminating one HTTP round-trip on every page load.
20//!
21//! ## Modes
22//!
23//! | Mode | HTML template | Use case |
24//! |------|---------------|----------|
25//! | [`MashlibMode::Cdn`] | `<script src="unpkg.com/mashlib@{version}/…">` | Zero-config default |
26//! | [`MashlibMode::Local`] | `<script defer src="/mashlib.min.js">` | Operator bundles assets with `actix-files` |
27//! | [`MashlibMode::Module`] | `<script type="module" src="{url}">` | LOSOS / custom shell |
28
29/// Cap on how much JSON-LD we inline as a data island.  256 KiB covers
30/// any realistic profile, type index, or container listing.
31pub const DATA_ISLAND_MAX_BYTES: usize = 256 * 1024;
32
33// -------------------------------------------------------------------------
34// Configuration
35// -------------------------------------------------------------------------
36
37/// How to load the mashlib bundle.
38#[derive(Debug, Clone)]
39pub enum MashlibMode {
40    /// Load from unpkg CDN.  `version` is e.g. `"2.0.0"`.
41    Cdn {
42        version: String,
43    },
44    /// Serve from a local directory (operator sets up `actix-files` /
45    /// `tower-http::ServeDir`).  The HTML references `/mashlib.min.js`
46    /// and `/mash.css` at the server root.
47    Local,
48    /// ES module entry point (the LOSOS pattern).  The server emits
49    /// `<div id="mashlib">` and a `<script type="module" src="{url}">`.
50    Module {
51        url: String,
52    },
53}
54
55impl Default for MashlibMode {
56    fn default() -> Self {
57        Self::Cdn {
58            version: "2.0.0".to_string(),
59        }
60    }
61}
62
63/// Mashlib integration configuration.
64#[derive(Debug, Clone)]
65pub struct MashlibConfig {
66    pub enabled: bool,
67    pub mode: MashlibMode,
68    pub data_island_max_bytes: usize,
69    pub round_trip_optimization: bool,
70}
71
72impl Default for MashlibConfig {
73    fn default() -> Self {
74        Self {
75            enabled: false,
76            mode: MashlibMode::default(),
77            data_island_max_bytes: DATA_ISLAND_MAX_BYTES,
78            round_trip_optimization: true,
79        }
80    }
81}
82
83// -------------------------------------------------------------------------
84// Decision: should we serve mashlib for this request?
85// -------------------------------------------------------------------------
86
87/// RDF content types that mashlib can render.
88const MASHLIB_RENDERABLE: &[&str] = &[
89    "text/turtle",
90    "application/ld+json",
91    "application/json",
92    "text/n3",
93    "application/n-triples",
94    "application/rdf+xml",
95    "text/markdown",
96    "audio/mpegurl",
97    "application/vnd.apple.mpegurl",
98    "audio/x-scpls",
99];
100
101/// RDF types whose presence *before* `text/html` in the Accept header
102/// indicates the client prefers raw RDF over an HTML wrapper.
103const RDF_ACCEPT_TYPES: &[&str] = &[
104    "application/rdf+xml",
105    "text/turtle",
106    "application/ld+json",
107    "text/n3",
108    "application/n-triples",
109];
110
111/// Determine whether the server should return the mashlib HTML wrapper
112/// instead of the raw resource body.
113///
114/// The decision mirrors JSS `mashlib/index.js::shouldServeMashlib`:
115///
116/// 1. `enabled` must be true.
117/// 2. `Sec-Fetch-Dest` (when present) must be `document` — this
118///    distinguishes top-level browser navigation from `fetch()` / XHR.
119/// 3. `Accept` must include `text/html` and no RDF type may appear
120///    before it (otherwise the client prefers raw data).
121/// 4. The resource's stored content type must be renderable by mashlib.
122pub fn should_serve(
123    accept: &str,
124    sec_fetch_dest: Option<&str>,
125    resource_content_type: &str,
126    enabled: bool,
127) -> bool {
128    if !enabled {
129        return false;
130    }
131
132    if let Some(dest) = sec_fetch_dest {
133        if !dest.is_empty() && dest != "document" {
134            return false;
135        }
136    }
137
138    if !accept.contains("text/html") {
139        return false;
140    }
141
142    let html_pos = match accept.find("text/html") {
143        Some(p) => p,
144        None => return false,
145    };
146    for rdf_type in RDF_ACCEPT_TYPES {
147        if let Some(pos) = accept.find(rdf_type) {
148            if pos < html_pos {
149                return false;
150            }
151        }
152    }
153
154    let base_type = resource_content_type
155        .split(';')
156        .next()
157        .unwrap_or("")
158        .trim()
159        .to_ascii_lowercase();
160    MASHLIB_RENDERABLE.contains(&base_type.as_str())
161}
162
163// -------------------------------------------------------------------------
164// Data-island escaping
165// -------------------------------------------------------------------------
166
167/// Escape a JSON-LD string for embedding inside
168/// `<script type="application/ld+json">`.
169///
170/// Replaces every literal `<` with the JSON unicode escape `<`.
171/// The HTML parser scans raw bytes for `</script` — by eliminating all
172/// `<` bytes we guarantee no end-tag can form.  JSON-LD parsers decode
173/// `<` back to `<` natively, so document semantics are preserved.
174pub fn escape_for_script_block(json_ld: &str) -> String {
175    json_ld.replace('<', "\\u003c")
176}
177
178/// Build the `<script type="application/ld+json">` data-island block.
179///
180/// Returns an empty string when `json_ld` is `None` or exceeds the
181/// byte cap (checked both pre- and post-escape).
182fn data_island(resource_url: &str, json_ld: Option<&str>, max_bytes: usize) -> String {
183    let raw = match json_ld {
184        Some(s) if !s.is_empty() => s,
185        _ => return String::new(),
186    };
187    if raw.len() > max_bytes {
188        return String::new();
189    }
190    let safe_body = escape_for_script_block(raw);
191    if safe_body.len() > max_bytes {
192        return String::new();
193    }
194    let safe_uri = escape_html(resource_url);
195    format!(
196        r#"<script type="application/ld+json" id="dataisland" data-uri="{safe_uri}">{safe_body}</script>"#
197    )
198}
199
200// -------------------------------------------------------------------------
201// Round-trip optimisation reader
202// -------------------------------------------------------------------------
203
204/// Inline `<script>` that patches `$rdf.fetcher.load()` to resolve from
205/// the data island before hitting the network.  Three detection paths
206/// cover the full timing space (synchronous, setter, polling).
207fn round_trip_script() -> &'static str {
208    r#"<script>
209(function(){
210  if(typeof window==='undefined')return;
211  var di=window.__dataIsland;
212  if(di===null||di===undefined||(typeof di!=='object'&&typeof di!=='function'))di=window.__dataIsland={};
213  if(typeof di.get!=='function'){
214    di.get=function(uri){
215      if(!uri)return null;
216      try{var el=document.getElementById('dataisland');
217        if(el&&el.type==='application/ld+json'&&el.getAttribute('data-uri')===String(uri))
218          return{contentType:'application/ld+json',content:el.textContent};
219      }catch(e){}return null;
220    };
221  }
222  function patch(rdf){
223    if(!rdf||!rdf.fetcher||!rdf.fetcher.load)return;
224    if(rdf.fetcher.__diPatched)return;rdf.fetcher.__diPatched=true;
225    var f=rdf.fetcher,orig=f.load.bind(f);
226    f.load=function(uri,opts){
227      var s=(uri&&uri.uri)||(uri&&uri.value)||String(uri);
228      var d=window.__dataIsland.get(s);
229      if(d)return new Promise(function(ok,fail){
230        rdf.parse(d.content,f.store,s,d.contentType,function(err){
231          if(err){fail(err);return;}
232          try{if(f.requested&&typeof f.requested==='object')f.requested[s]='done';
233            var r=typeof Response==='function'?new Response(d.content,{status:200,headers:{'content-type':d.contentType}}):{ok:true,status:200,url:s,headers:{get:function(n){return n&&n.toLowerCase()==='content-type'?d.contentType:null}}};
234            try{Object.defineProperty(r,'url',{value:s,configurable:true})}catch(e){}ok(r);
235          }catch(e){fail(e);}
236        });
237      }).catch(function(){return orig(uri,opts);});
238      return orig(uri,opts);
239    };
240  }
241  if(typeof $rdf!=='undefined')patch($rdf);
242  try{var c=typeof $rdf!=='undefined'?$rdf:undefined;
243    Object.defineProperty(window,'$rdf',{configurable:true,get:function(){return c;},set:function(v){c=v;patch(v);}});
244  }catch(e){}
245  var n=0;(function p(){if(++n>100)return;if(typeof $rdf!=='undefined'&&$rdf&&$rdf.fetcher&&$rdf.fetcher.__diPatched)return;if(typeof $rdf!=='undefined')patch($rdf);setTimeout(p,100);})();
246})();
247</script>"#
248}
249
250// -------------------------------------------------------------------------
251// HTML generators
252// -------------------------------------------------------------------------
253
254/// Generate the mashlib HTML wrapper page.
255///
256/// When `embed_json_ld` is provided and within the byte cap, it is
257/// inlined as a data island for the round-trip optimisation.
258pub fn generate_html(
259    resource_url: &str,
260    config: &MashlibConfig,
261    embed_json_ld: Option<&str>,
262) -> String {
263    let island = data_island(resource_url, embed_json_ld, config.data_island_max_bytes);
264    let reader = if config.round_trip_optimization {
265        round_trip_script()
266    } else {
267        ""
268    };
269
270    match &config.mode {
271        MashlibMode::Cdn { version } => {
272            let base = format!("https://unpkg.com/mashlib@{version}/dist");
273            format!(
274                r#"<!doctype html><html><head><meta charset="utf-8"/><title>SolidOS Web App</title>
275<link href="{base}/mash.css" rel="stylesheet"></head>
276<body id="PageBody">{island}{reader}<header id="PageHeader"></header>
277<div class="TabulatorOutline" id="DummyUUID" role="main"><table id="outline"></table><div id="GlobalDashboard"></div></div>
278<footer id="PageFooter"></footer>
279<script>
280(function(){{var s=document.createElement('script');s.src='{base}/mashlib.min.js';s.onload=function(){{panes.runDataBrowser()}};s.onerror=function(){{document.body.innerHTML='<p>Failed to load Mashlib from CDN</p>'}};document.head.appendChild(s)}})();
281</script></body></html>"#
282            )
283        }
284        MashlibMode::Local => {
285            format!(
286                r#"<!doctype html><html><head><meta charset="utf-8"/><title>SolidOS Web App</title><script>document.addEventListener('DOMContentLoaded',function(){{panes.runDataBrowser()}})</script><script defer="defer" src="/mashlib.min.js"></script><link href="/mash.css" rel="stylesheet"></head><body id="PageBody">{island}{reader}<header id="PageHeader"></header><div class="TabulatorOutline" id="DummyUUID" role="main"><table id="outline"></table><div id="GlobalDashboard"></div></div><footer id="PageFooter"></footer></body></html>"#
287            )
288        }
289        MashlibMode::Module { url } => {
290            let css_url = if url.ends_with(".js") {
291                format!("{}.css", &url[..url.len() - 3])
292            } else {
293                format!("{url}.css")
294            };
295            format!(
296                r#"<!doctype html><html lang="en"><head><meta charset="utf-8"/>
297<meta name="viewport" content="width=device-width, initial-scale=1">
298<title>Solid Data Browser</title>
299<link rel="stylesheet" href="{css_url}"></head>
300<body>{island}{reader}<div id="mashlib"></div>
301<script type="module" src="{url}"></script>
302</body></html>"#
303            )
304        }
305    }
306}
307
308// -------------------------------------------------------------------------
309// Utilities
310// -------------------------------------------------------------------------
311
312fn escape_html(s: &str) -> String {
313    s.replace('&', "&amp;")
314        .replace('<', "&lt;")
315        .replace('>', "&gt;")
316        .replace('"', "&quot;")
317}
318
319// -------------------------------------------------------------------------
320// Tests
321// -------------------------------------------------------------------------
322
323#[cfg(test)]
324mod tests {
325    use super::*;
326
327    fn default_cdn_config() -> MashlibConfig {
328        MashlibConfig {
329            enabled: true,
330            ..Default::default()
331        }
332    }
333
334    // -- should_serve -------------------------------------------------------
335
336    #[test]
337    fn serves_html_for_browser_navigation() {
338        assert!(should_serve(
339            "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
340            Some("document"),
341            "text/turtle",
342            true,
343        ));
344    }
345
346    #[test]
347    fn rejects_when_disabled() {
348        assert!(!should_serve("text/html", Some("document"), "text/turtle", false));
349    }
350
351    #[test]
352    fn rejects_xhr_fetch() {
353        assert!(!should_serve("text/html", Some("empty"), "text/turtle", true));
354    }
355
356    #[test]
357    fn rejects_no_html_in_accept() {
358        assert!(!should_serve(
359            "application/ld+json, */*;q=0.1",
360            None,
361            "text/turtle",
362            true,
363        ));
364    }
365
366    #[test]
367    fn rejects_rdf_preferred_over_html() {
368        assert!(!should_serve(
369            "text/turtle, text/html;q=0.5",
370            None,
371            "text/turtle",
372            true,
373        ));
374    }
375
376    #[test]
377    fn rejects_non_renderable_content_type() {
378        assert!(!should_serve(
379            "text/html",
380            Some("document"),
381            "image/png",
382            true,
383        ));
384    }
385
386    #[test]
387    fn accepts_jsonld_content_type() {
388        assert!(should_serve(
389            "text/html",
390            Some("document"),
391            "application/ld+json; charset=utf-8",
392            true,
393        ));
394    }
395
396    #[test]
397    fn accepts_markdown_content_type() {
398        assert!(should_serve(
399            "text/html",
400            Some("document"),
401            "text/markdown",
402            true,
403        ));
404    }
405
406    #[test]
407    fn accepts_absent_sec_fetch_dest() {
408        assert!(should_serve("text/html", None, "text/turtle", true));
409    }
410
411    // -- escape_for_script_block -------------------------------------------
412
413    #[test]
414    fn escapes_angle_brackets() {
415        let input = r#"{"@id": "</script>"}"#;
416        let escaped = escape_for_script_block(input);
417        assert!(!escaped.contains('<'));
418        assert!(escaped.contains("\\u003c"));
419    }
420
421    // -- data_island -------------------------------------------------------
422
423    #[test]
424    fn island_empty_when_none() {
425        assert!(data_island("http://x", None, DATA_ISLAND_MAX_BYTES).is_empty());
426    }
427
428    #[test]
429    fn island_empty_when_oversized() {
430        let big = "x".repeat(DATA_ISLAND_MAX_BYTES + 1);
431        assert!(data_island("http://x", Some(&big), DATA_ISLAND_MAX_BYTES).is_empty());
432    }
433
434    #[test]
435    fn island_contains_json_ld_type() {
436        let island = data_island("http://x/r", Some(r#"{"@id":"x"}"#), DATA_ISLAND_MAX_BYTES);
437        assert!(island.contains("application/ld+json"));
438        assert!(island.contains("dataisland"));
439    }
440
441    // -- generate_html -----------------------------------------------------
442
443    #[test]
444    fn cdn_mode_references_unpkg() {
445        let cfg = default_cdn_config();
446        let html = generate_html("http://pod/resource", &cfg, None);
447        assert!(html.contains("unpkg.com/mashlib@2.0.0"));
448        assert!(html.contains("mashlib.min.js"));
449        assert!(html.contains("mash.css"));
450    }
451
452    #[test]
453    fn local_mode_uses_root_relative_paths() {
454        let cfg = MashlibConfig {
455            enabled: true,
456            mode: MashlibMode::Local,
457            ..Default::default()
458        };
459        let html = generate_html("http://pod/resource", &cfg, None);
460        assert!(html.contains(r#"src="/mashlib.min.js""#));
461        assert!(html.contains(r#"href="/mash.css""#));
462    }
463
464    #[test]
465    fn module_mode_emits_mashlib_div() {
466        let cfg = MashlibConfig {
467            enabled: true,
468            mode: MashlibMode::Module {
469                url: "https://host/path/mashlib.js".into(),
470            },
471            ..Default::default()
472        };
473        let html = generate_html("http://pod/resource", &cfg, None);
474        assert!(html.contains(r#"id="mashlib""#));
475        assert!(html.contains(r#"type="module""#));
476        assert!(html.contains("https://host/path/mashlib.js"));
477        assert!(html.contains("https://host/path/mashlib.css"));
478    }
479
480    #[test]
481    fn embeds_data_island_when_provided() {
482        let cfg = default_cdn_config();
483        let json_ld = r#"{"@id":"http://pod/r","@type":"foaf:Person"}"#;
484        let html = generate_html("http://pod/r", &cfg, Some(json_ld));
485        assert!(html.contains("dataisland"));
486        assert!(html.contains("foaf:Person"));
487    }
488
489    #[test]
490    fn round_trip_script_present_by_default() {
491        let cfg = default_cdn_config();
492        let html = generate_html("http://pod/r", &cfg, Some("{}"));
493        assert!(html.contains("__dataIsland"));
494    }
495
496    #[test]
497    fn round_trip_script_absent_when_disabled() {
498        let cfg = MashlibConfig {
499            enabled: true,
500            round_trip_optimization: false,
501            ..Default::default()
502        };
503        let html = generate_html("http://pod/r", &cfg, Some("{}"));
504        assert!(!html.contains("__dataIsland"));
505    }
506}