Skip to main content

solid_pod_rs/
mashlib.rs

1//! Mashlib / SolidOS data-browser integration.
2//!
3//! When a browser navigates to an RDF resource on a Solid server, the
4//! `Accept: text/html` header signals that the user wants a rendered
5//! view, not raw triples.  This module generates a thin HTML wrapper
6//! that loads the SolidOS **mashlib** bundle (from a CDN, a local path,
7//! or an ES module URL) and lets it fetch + render the underlying data.
8//!
9//! The approach is called the **"databrowser hack"** in the SolidOS
10//! community: the server never renders RDF itself — it delegates to the
11//! client-side pane system inside mashlib.
12//!
13//! ## Data-island optimisation
14//!
15//! When the resource is small enough (≤ [`DATA_ISLAND_MAX_BYTES`]), its
16//! JSON-LD representation is embedded inline as a
17//! `<script type="application/ld+json">` block.  A companion reader
18//! script patches `$rdf.fetcher.load()` to resolve from that island
19//! first, eliminating one HTTP round-trip on every page load.
20//!
21//! ## Modes
22//!
23//! | Mode | HTML template | Use case |
24//! |------|---------------|----------|
25//! | [`MashlibMode::Cdn`] | `<script src="unpkg.com/mashlib@{version}/…">` | Zero-config default |
26//! | [`MashlibMode::Local`] | `<script defer src="/mashlib.min.js">` | Operator bundles assets with `actix-files` |
27//! | [`MashlibMode::Module`] | `<script type="module" src="{url}">` | LOSOS / custom shell |
28
29/// Cap on how much JSON-LD we inline as a data island.  256 KiB covers
30/// any realistic profile, type index, or container listing.
31pub const DATA_ISLAND_MAX_BYTES: usize = 256 * 1024;
32
33// -------------------------------------------------------------------------
34// Configuration
35// -------------------------------------------------------------------------
36
37/// How to load the mashlib bundle.
38#[derive(Debug, Clone)]
39pub enum MashlibMode {
40    /// Load from unpkg CDN.  `version` is e.g. `"2.0.0"`.
41    Cdn { version: String },
42    /// Serve from a local directory (operator sets up `actix-files` /
43    /// `tower-http::ServeDir`).  The HTML references `/mashlib.min.js`
44    /// and `/mash.css` at the server root.
45    Local,
46    /// ES module entry point (the LOSOS pattern).  The server emits
47    /// `<div id="mashlib">` and a `<script type="module" src="{url}">`.
48    Module { url: String },
49}
50
51impl Default for MashlibMode {
52    fn default() -> Self {
53        Self::Cdn {
54            version: "2.0.0".to_string(),
55        }
56    }
57}
58
59/// Mashlib integration configuration.
60#[derive(Debug, Clone)]
61pub struct MashlibConfig {
62    pub enabled: bool,
63    pub mode: MashlibMode,
64    pub data_island_max_bytes: usize,
65    pub round_trip_optimization: bool,
66}
67
68impl Default for MashlibConfig {
69    fn default() -> Self {
70        Self {
71            enabled: false,
72            mode: MashlibMode::default(),
73            data_island_max_bytes: DATA_ISLAND_MAX_BYTES,
74            round_trip_optimization: true,
75        }
76    }
77}
78
79// -------------------------------------------------------------------------
80// Decision: should we serve mashlib for this request?
81// -------------------------------------------------------------------------
82
83/// RDF content types that mashlib can render.
84const MASHLIB_RENDERABLE: &[&str] = &[
85    "text/turtle",
86    "application/ld+json",
87    "application/json",
88    "text/n3",
89    "application/n-triples",
90    "application/rdf+xml",
91    "text/markdown",
92    "audio/mpegurl",
93    "application/vnd.apple.mpegurl",
94    "audio/x-scpls",
95];
96
97/// RDF types whose presence *before* `text/html` in the Accept header
98/// indicates the client prefers raw RDF over an HTML wrapper.
99const RDF_ACCEPT_TYPES: &[&str] = &[
100    "application/rdf+xml",
101    "text/turtle",
102    "application/ld+json",
103    "text/n3",
104    "application/n-triples",
105];
106
107/// Determine whether the server should return the mashlib HTML wrapper
108/// instead of the raw resource body.
109///
110/// The decision mirrors JSS `mashlib/index.js::shouldServeMashlib`:
111///
112/// 1. `enabled` must be true.
113/// 2. `Sec-Fetch-Dest` (when present) must be `document` — this
114///    distinguishes top-level browser navigation from `fetch()` / XHR.
115/// 3. `Accept` must include `text/html` and no RDF type may appear
116///    before it (otherwise the client prefers raw data).
117/// 4. The resource's stored content type must be renderable by mashlib.
118pub fn should_serve(
119    accept: &str,
120    sec_fetch_dest: Option<&str>,
121    resource_content_type: &str,
122    enabled: bool,
123) -> bool {
124    if !enabled {
125        return false;
126    }
127
128    if let Some(dest) = sec_fetch_dest {
129        if !dest.is_empty() && dest != "document" {
130            return false;
131        }
132    }
133
134    if !accept.contains("text/html") {
135        return false;
136    }
137
138    let html_pos = match accept.find("text/html") {
139        Some(p) => p,
140        None => return false,
141    };
142    for rdf_type in RDF_ACCEPT_TYPES {
143        if let Some(pos) = accept.find(rdf_type) {
144            if pos < html_pos {
145                return false;
146            }
147        }
148    }
149
150    let base_type = resource_content_type
151        .split(';')
152        .next()
153        .unwrap_or("")
154        .trim()
155        .to_ascii_lowercase();
156    MASHLIB_RENDERABLE.contains(&base_type.as_str())
157}
158
159// -------------------------------------------------------------------------
160// Data-island escaping
161// -------------------------------------------------------------------------
162
163/// Escape a JSON-LD string for embedding inside
164/// `<script type="application/ld+json">`.
165///
166/// Replaces every literal `<` with the JSON unicode escape `<`.
167/// The HTML parser scans raw bytes for `</script` — by eliminating all
168/// `<` bytes we guarantee no end-tag can form.  JSON-LD parsers decode
169/// `<` back to `<` natively, so document semantics are preserved.
170pub fn escape_for_script_block(json_ld: &str) -> String {
171    json_ld.replace('<', "\\u003c")
172}
173
174/// Build the `<script type="application/ld+json">` data-island block.
175///
176/// Returns an empty string when `json_ld` is `None` or exceeds the
177/// byte cap (checked both pre- and post-escape).
178fn data_island(resource_url: &str, json_ld: Option<&str>, max_bytes: usize) -> String {
179    let raw = match json_ld {
180        Some(s) if !s.is_empty() => s,
181        _ => return String::new(),
182    };
183    if raw.len() > max_bytes {
184        return String::new();
185    }
186    let safe_body = escape_for_script_block(raw);
187    if safe_body.len() > max_bytes {
188        return String::new();
189    }
190    let safe_uri = escape_html(resource_url);
191    format!(
192        r#"<script type="application/ld+json" id="dataisland" data-uri="{safe_uri}">{safe_body}</script>"#
193    )
194}
195
196// -------------------------------------------------------------------------
197// Round-trip optimisation reader
198// -------------------------------------------------------------------------
199
200/// Inline `<script>` that patches `$rdf.fetcher.load()` to resolve from
201/// the data island before hitting the network.  Three detection paths
202/// cover the full timing space (synchronous, setter, polling).
203fn round_trip_script() -> &'static str {
204    r#"<script>
205(function(){
206  if(typeof window==='undefined')return;
207  var di=window.__dataIsland;
208  if(di===null||di===undefined||(typeof di!=='object'&&typeof di!=='function'))di=window.__dataIsland={};
209  if(typeof di.get!=='function'){
210    di.get=function(uri){
211      if(!uri)return null;
212      try{var el=document.getElementById('dataisland');
213        if(el&&el.type==='application/ld+json'&&el.getAttribute('data-uri')===String(uri))
214          return{contentType:'application/ld+json',content:el.textContent};
215      }catch(e){}return null;
216    };
217  }
218  function patch(rdf){
219    if(!rdf||!rdf.fetcher||!rdf.fetcher.load)return;
220    if(rdf.fetcher.__diPatched)return;rdf.fetcher.__diPatched=true;
221    var f=rdf.fetcher,orig=f.load.bind(f);
222    f.load=function(uri,opts){
223      var s=(uri&&uri.uri)||(uri&&uri.value)||String(uri);
224      var d=window.__dataIsland.get(s);
225      if(d)return new Promise(function(ok,fail){
226        rdf.parse(d.content,f.store,s,d.contentType,function(err){
227          if(err){fail(err);return;}
228          try{if(f.requested&&typeof f.requested==='object')f.requested[s]='done';
229            var r=typeof Response==='function'?new Response(d.content,{status:200,headers:{'content-type':d.contentType}}):{ok:true,status:200,url:s,headers:{get:function(n){return n&&n.toLowerCase()==='content-type'?d.contentType:null}}};
230            try{Object.defineProperty(r,'url',{value:s,configurable:true})}catch(e){}ok(r);
231          }catch(e){fail(e);}
232        });
233      }).catch(function(){return orig(uri,opts);});
234      return orig(uri,opts);
235    };
236  }
237  if(typeof $rdf!=='undefined')patch($rdf);
238  try{var c=typeof $rdf!=='undefined'?$rdf:undefined;
239    Object.defineProperty(window,'$rdf',{configurable:true,get:function(){return c;},set:function(v){c=v;patch(v);}});
240  }catch(e){}
241  var n=0;(function p(){if(++n>100)return;if(typeof $rdf!=='undefined'&&$rdf&&$rdf.fetcher&&$rdf.fetcher.__diPatched)return;if(typeof $rdf!=='undefined')patch($rdf);setTimeout(p,100);})();
242})();
243</script>"#
244}
245
246// -------------------------------------------------------------------------
247// HTML generators
248// -------------------------------------------------------------------------
249
250/// Generate the mashlib HTML wrapper page.
251///
252/// When `embed_json_ld` is provided and within the byte cap, it is
253/// inlined as a data island for the round-trip optimisation.
254pub fn generate_html(
255    resource_url: &str,
256    config: &MashlibConfig,
257    embed_json_ld: Option<&str>,
258) -> String {
259    let island = data_island(resource_url, embed_json_ld, config.data_island_max_bytes);
260    let reader = if config.round_trip_optimization {
261        round_trip_script()
262    } else {
263        ""
264    };
265
266    match &config.mode {
267        MashlibMode::Cdn { version } => {
268            let base = format!("https://unpkg.com/mashlib@{version}/dist");
269            format!(
270                r#"<!doctype html><html><head><meta charset="utf-8"/><title>SolidOS Web App</title>
271<link href="{base}/mash.css" rel="stylesheet"></head>
272<body id="PageBody">{island}{reader}<header id="PageHeader"></header>
273<div class="TabulatorOutline" id="DummyUUID" role="main"><table id="outline"></table><div id="GlobalDashboard"></div></div>
274<footer id="PageFooter"></footer>
275<script>
276(function(){{var s=document.createElement('script');s.src='{base}/mashlib.min.js';s.onload=function(){{panes.runDataBrowser()}};s.onerror=function(){{document.body.innerHTML='<p>Failed to load Mashlib from CDN</p>'}};document.head.appendChild(s)}})();
277</script></body></html>"#
278            )
279        }
280        MashlibMode::Local => {
281            format!(
282                r#"<!doctype html><html><head><meta charset="utf-8"/><title>SolidOS Web App</title><script>document.addEventListener('DOMContentLoaded',function(){{panes.runDataBrowser()}})</script><script defer="defer" src="/mashlib.min.js"></script><link href="/mash.css" rel="stylesheet"></head><body id="PageBody">{island}{reader}<header id="PageHeader"></header><div class="TabulatorOutline" id="DummyUUID" role="main"><table id="outline"></table><div id="GlobalDashboard"></div></div><footer id="PageFooter"></footer></body></html>"#
283            )
284        }
285        MashlibMode::Module { url } => {
286            let css_url = if url.ends_with(".js") {
287                format!("{}.css", &url[..url.len() - 3])
288            } else {
289                format!("{url}.css")
290            };
291            format!(
292                r#"<!doctype html><html lang="en"><head><meta charset="utf-8"/>
293<meta name="viewport" content="width=device-width, initial-scale=1">
294<title>Solid Data Browser</title>
295<link rel="stylesheet" href="{css_url}"></head>
296<body>{island}{reader}<div id="mashlib"></div>
297<script type="module" src="{url}"></script>
298</body></html>"#
299            )
300        }
301    }
302}
303
304// -------------------------------------------------------------------------
305// Utilities
306// -------------------------------------------------------------------------
307
308fn escape_html(s: &str) -> String {
309    s.replace('&', "&amp;")
310        .replace('<', "&lt;")
311        .replace('>', "&gt;")
312        .replace('"', "&quot;")
313}
314
315// -------------------------------------------------------------------------
316// Tests
317// -------------------------------------------------------------------------
318
319#[cfg(test)]
320mod tests {
321    use super::*;
322
323    fn default_cdn_config() -> MashlibConfig {
324        MashlibConfig {
325            enabled: true,
326            ..Default::default()
327        }
328    }
329
330    // -- should_serve -------------------------------------------------------
331
332    #[test]
333    fn serves_html_for_browser_navigation() {
334        assert!(should_serve(
335            "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
336            Some("document"),
337            "text/turtle",
338            true,
339        ));
340    }
341
342    #[test]
343    fn rejects_when_disabled() {
344        assert!(!should_serve(
345            "text/html",
346            Some("document"),
347            "text/turtle",
348            false
349        ));
350    }
351
352    #[test]
353    fn rejects_xhr_fetch() {
354        assert!(!should_serve(
355            "text/html",
356            Some("empty"),
357            "text/turtle",
358            true
359        ));
360    }
361
362    #[test]
363    fn rejects_no_html_in_accept() {
364        assert!(!should_serve(
365            "application/ld+json, */*;q=0.1",
366            None,
367            "text/turtle",
368            true,
369        ));
370    }
371
372    #[test]
373    fn rejects_rdf_preferred_over_html() {
374        assert!(!should_serve(
375            "text/turtle, text/html;q=0.5",
376            None,
377            "text/turtle",
378            true,
379        ));
380    }
381
382    #[test]
383    fn rejects_non_renderable_content_type() {
384        assert!(!should_serve(
385            "text/html",
386            Some("document"),
387            "image/png",
388            true,
389        ));
390    }
391
392    #[test]
393    fn accepts_jsonld_content_type() {
394        assert!(should_serve(
395            "text/html",
396            Some("document"),
397            "application/ld+json; charset=utf-8",
398            true,
399        ));
400    }
401
402    #[test]
403    fn accepts_markdown_content_type() {
404        assert!(should_serve(
405            "text/html",
406            Some("document"),
407            "text/markdown",
408            true,
409        ));
410    }
411
412    #[test]
413    fn accepts_absent_sec_fetch_dest() {
414        assert!(should_serve("text/html", None, "text/turtle", true));
415    }
416
417    // -- escape_for_script_block -------------------------------------------
418
419    #[test]
420    fn escapes_angle_brackets() {
421        let input = r#"{"@id": "</script>"}"#;
422        let escaped = escape_for_script_block(input);
423        assert!(!escaped.contains('<'));
424        assert!(escaped.contains("\\u003c"));
425    }
426
427    // -- data_island -------------------------------------------------------
428
429    #[test]
430    fn island_empty_when_none() {
431        assert!(data_island("http://x", None, DATA_ISLAND_MAX_BYTES).is_empty());
432    }
433
434    #[test]
435    fn island_empty_when_oversized() {
436        let big = "x".repeat(DATA_ISLAND_MAX_BYTES + 1);
437        assert!(data_island("http://x", Some(&big), DATA_ISLAND_MAX_BYTES).is_empty());
438    }
439
440    #[test]
441    fn island_contains_json_ld_type() {
442        let island = data_island("http://x/r", Some(r#"{"@id":"x"}"#), DATA_ISLAND_MAX_BYTES);
443        assert!(island.contains("application/ld+json"));
444        assert!(island.contains("dataisland"));
445    }
446
447    // -- generate_html -----------------------------------------------------
448
449    #[test]
450    fn cdn_mode_references_unpkg() {
451        let cfg = default_cdn_config();
452        let html = generate_html("http://pod/resource", &cfg, None);
453        assert!(html.contains("unpkg.com/mashlib@2.0.0"));
454        assert!(html.contains("mashlib.min.js"));
455        assert!(html.contains("mash.css"));
456    }
457
458    #[test]
459    fn local_mode_uses_root_relative_paths() {
460        let cfg = MashlibConfig {
461            enabled: true,
462            mode: MashlibMode::Local,
463            ..Default::default()
464        };
465        let html = generate_html("http://pod/resource", &cfg, None);
466        assert!(html.contains(r#"src="/mashlib.min.js""#));
467        assert!(html.contains(r#"href="/mash.css""#));
468    }
469
470    #[test]
471    fn module_mode_emits_mashlib_div() {
472        let cfg = MashlibConfig {
473            enabled: true,
474            mode: MashlibMode::Module {
475                url: "https://host/path/mashlib.js".into(),
476            },
477            ..Default::default()
478        };
479        let html = generate_html("http://pod/resource", &cfg, None);
480        assert!(html.contains(r#"id="mashlib""#));
481        assert!(html.contains(r#"type="module""#));
482        assert!(html.contains("https://host/path/mashlib.js"));
483        assert!(html.contains("https://host/path/mashlib.css"));
484    }
485
486    #[test]
487    fn embeds_data_island_when_provided() {
488        let cfg = default_cdn_config();
489        let json_ld = r#"{"@id":"http://pod/r","@type":"foaf:Person"}"#;
490        let html = generate_html("http://pod/r", &cfg, Some(json_ld));
491        assert!(html.contains("dataisland"));
492        assert!(html.contains("foaf:Person"));
493    }
494
495    #[test]
496    fn round_trip_script_present_by_default() {
497        let cfg = default_cdn_config();
498        let html = generate_html("http://pod/r", &cfg, Some("{}"));
499        assert!(html.contains("__dataIsland"));
500    }
501
502    #[test]
503    fn round_trip_script_absent_when_disabled() {
504        let cfg = MashlibConfig {
505            enabled: true,
506            round_trip_optimization: false,
507            ..Default::default()
508        };
509        let html = generate_html("http://pod/r", &cfg, Some("{}"));
510        assert!(!html.contains("__dataIsland"));
511    }
512}