Skip to main content

solid_pod_rs/
webid.rs

1//! WebID profile document generation and validation.
2//!
3//! The embedded JSON-LD data island mirrors JSS
4//! `src/webid/profile.js::generateProfileJsonLd` (commits cccd081 #320,
5//! 01e12b0 #299): it carries both the legacy `solid:oidcIssuer` predicate
6//! and the LWS 1.0 Controlled Identifier `service` array, plus
7//! `foaf:isPrimaryTopicOf` / `schema:mainEntityOfPage` self-references.
8
9use serde_json::{json, Value};
10
11/// Render a WebID profile as an HTML document with embedded JSON-LD.
12///
13/// Omits `solid:oidcIssuer`. Prefer [`generate_webid_html_with_issuer`]
14/// for Solid-OIDC flows.
15pub fn generate_webid_html(pubkey: &str, name: Option<&str>, pod_base: &str) -> String {
16    generate_webid_html_with_issuer(pubkey, name, pod_base, None)
17}
18
19/// Render a WebID profile with an optional Solid-OIDC issuer
20/// advertised via `solid:oidcIssuer` and, when present, an LWS 1.0
21/// `service` entry typed `lws:OpenIdProvider`.
22pub fn generate_webid_html_with_issuer(
23    pubkey: &str,
24    name: Option<&str>,
25    pod_base: &str,
26    oidc_issuer: Option<&str>,
27) -> String {
28    let display_name = name.unwrap_or("Solid Pod User");
29    let pod_url = format!("{pod_base}/pods/{pubkey}/");
30    let webid = format!("{pod_base}/pods/{pubkey}/profile/card#me");
31    // Document URL (WebID without fragment) — anchor for relative self
32    // references and for the cid:service fragment id.
33    let doc_url = webid.split('#').next().unwrap_or(&webid).to_string();
34
35    let mut context = json!({
36        "foaf": "http://xmlns.com/foaf/0.1/",
37        "solid": "http://www.w3.org/ns/solid/terms#",
38        "schema": "http://schema.org/",
39        "cid": "https://www.w3.org/ns/cid/v1#",
40        "lws": "https://www.w3.org/ns/lws#",
41        "isPrimaryTopicOf": { "@id": "foaf:isPrimaryTopicOf", "@type": "@id" },
42        "mainEntityOfPage": { "@id": "schema:mainEntityOfPage", "@type": "@id" },
43        "service": { "@id": "cid:service", "@container": "@set" },
44        "serviceEndpoint": { "@id": "cid:serviceEndpoint", "@type": "@id" }
45    });
46    // Keep context shape mutable in case future rows add more terms.
47    let _ = context.as_object_mut();
48
49    let mut body = json!({
50        "@context": context,
51        "@id": webid,
52        "@type": "foaf:Person",
53        "foaf:name": display_name,
54        "foaf:isPrimaryTopicOf": "",
55        "schema:mainEntityOfPage": "",
56        "solid:account": pod_url,
57        "solid:privateTypeIndex": format!("{pod_url}settings/privateTypeIndex"),
58        "solid:publicTypeIndex": format!("{pod_url}settings/publicTypeIndex"),
59        "schema:identifier": format!("did:nostr:{pubkey}")
60    });
61
62    if let Some(iss) = oidc_issuer {
63        // Legacy Solid-OIDC predicate (kept for existing clients).
64        body["solid:oidcIssuer"] = json!({ "@id": iss });
65        // LWS 1.0 Controlled Identifier service entry.
66        body["service"] = json!([{
67            "@id": format!("{doc_url}#oidc"),
68            "@type": "lws:OpenIdProvider",
69            "serviceEndpoint": iss
70        }]);
71    }
72
73    let body_json = serde_json::to_string_pretty(&body)
74        .expect("serde_json::Value always serialises");
75
76    format!(
77        r#"<!DOCTYPE html>
78<html>
79<head>
80  <meta charset="utf-8">
81  <title>{display_name}</title>
82  <script type="application/ld+json">
83{body_json}
84  </script>
85</head>
86<body>
87  <h1>{display_name}</h1>
88  <p>WebID: <a href="{webid}">{webid}</a></p>
89  <p>Pod: <a href="{pod_url}">{pod_url}</a></p>
90</body>
91</html>"#
92    )
93}
94
95/// Locate and parse the JSON-LD data island from a WebID HTML document.
96fn parse_json_ld(data: &[u8]) -> Result<Option<Value>, String> {
97    let text = std::str::from_utf8(data)
98        .map_err(|_| "WebID profile must be valid UTF-8".to_string())?;
99    let start = match text.find("application/ld+json") {
100        Some(s) => s,
101        None => return Ok(None),
102    };
103    let tag_end = match text[start..].find('>') {
104        Some(e) => e,
105        None => return Ok(None),
106    };
107    let json_start = start + tag_end + 1;
108    let script_end = match text[json_start..].find("</script>") {
109        Some(e) => e,
110        None => return Ok(None),
111    };
112    let json_str = text[json_start..json_start + script_end].trim();
113    let value: Value = serde_json::from_str(json_str)
114        .map_err(|e| format!("WebID JSON-LD parse error: {e}"))?;
115    Ok(Some(value))
116}
117
118/// Follow-your-nose discovery — extract `solid:oidcIssuer` from a
119/// WebID HTML document. Returns `Ok(None)` when the profile does not
120/// advertise an issuer.
121pub fn extract_oidc_issuer(data: &[u8]) -> Result<Option<String>, String> {
122    let value = match parse_json_ld(data)? {
123        Some(v) => v,
124        None => return Ok(None),
125    };
126    let issuer = value.get("solid:oidcIssuer").or_else(|| {
127        value.get("http://www.w3.org/ns/solid/terms#oidcIssuer")
128    });
129    match issuer {
130        Some(Value::String(s)) => Ok(Some(s.clone())),
131        Some(Value::Object(m)) => {
132            if let Some(Value::String(s)) = m.get("@id") {
133                Ok(Some(s.clone()))
134            } else {
135                Ok(None)
136            }
137        }
138        _ => Ok(None),
139    }
140}
141
142/// LWS 1.0 Controlled Identifier discovery — return the
143/// `serviceEndpoint` of the first `service` entry whose `@type` is
144/// `lws:OpenIdProvider` (or the fully-expanded IRI). Mirrors the shape
145/// of [`extract_oidc_issuer`]; returns `Ok(None)` when absent.
146pub fn extract_cid_openid_provider(data: &[u8]) -> Result<Option<String>, String> {
147    let value = match parse_json_ld(data)? {
148        Some(v) => v,
149        None => return Ok(None),
150    };
151    let service = value
152        .get("service")
153        .or_else(|| value.get("cid:service"))
154        .or_else(|| value.get("https://www.w3.org/ns/cid/v1#service"));
155    let arr = match service {
156        Some(Value::Array(a)) => a,
157        _ => return Ok(None),
158    };
159    for entry in arr {
160        let Some(obj) = entry.as_object() else {
161            continue;
162        };
163        let ty = obj.get("@type");
164        let matches = match ty {
165            Some(Value::String(s)) => {
166                s == "lws:OpenIdProvider" || s == "https://www.w3.org/ns/lws#OpenIdProvider"
167            }
168            Some(Value::Array(ts)) => ts.iter().any(|t| {
169                matches!(
170                    t.as_str(),
171                    Some("lws:OpenIdProvider")
172                        | Some("https://www.w3.org/ns/lws#OpenIdProvider")
173                )
174            }),
175            _ => false,
176        };
177        if !matches {
178            continue;
179        }
180        let endpoint = obj
181            .get("serviceEndpoint")
182            .or_else(|| obj.get("cid:serviceEndpoint"))
183            .or_else(|| obj.get("https://www.w3.org/ns/cid/v1#serviceEndpoint"));
184        match endpoint {
185            Some(Value::String(s)) => return Ok(Some(s.clone())),
186            Some(Value::Object(m)) => {
187                if let Some(Value::String(s)) = m.get("@id") {
188                    return Ok(Some(s.clone()));
189                }
190            }
191            _ => {}
192        }
193    }
194    Ok(None)
195}
196
197/// Validate that a byte slice is a well-formed WebID profile.
198pub fn validate_webid_html(data: &[u8]) -> Result<(), String> {
199    let text = std::str::from_utf8(data)
200        .map_err(|_| "WebID profile must be valid UTF-8".to_string())?;
201    if !text.contains("application/ld+json") {
202        return Err(
203            "WebID profile must contain a <script type=\"application/ld+json\"> block".to_string(),
204        );
205    }
206    // parse_json_ld surfaces syntactic errors.
207    parse_json_ld(data)?;
208    Ok(())
209}
210
211#[cfg(test)]
212mod tests {
213    use super::*;
214
215    fn json_ld_body(html: &str) -> serde_json::Value {
216        let start = html.find("application/ld+json").expect("ld+json tag");
217        let tag_end = html[start..].find('>').expect("script >");
218        let body_start = start + tag_end + 1;
219        let body_end = html[body_start..].find("</script>").expect("/script");
220        let body = html[body_start..body_start + body_end].trim();
221        serde_json::from_str(body).expect("body parses")
222    }
223
224    #[test]
225    fn contains_pubkey() {
226        let html = generate_webid_html("abc123", None, "https://pods.example.com");
227        assert!(html.contains("abc123"));
228        assert!(html.contains("did:nostr:abc123"));
229    }
230
231    #[test]
232    fn validate_accepts_valid() {
233        let html = generate_webid_html("abc", Some("Alice"), "https://pods.example.com");
234        assert!(validate_webid_html(html.as_bytes()).is_ok());
235    }
236
237    #[test]
238    fn validate_rejects_missing_jsonld() {
239        let html = "<!DOCTYPE html><html><body>no ld+json</body></html>";
240        assert!(validate_webid_html(html.as_bytes()).is_err());
241    }
242
243    #[test]
244    fn generate_with_issuer_embeds_oidc_triple() {
245        let html = generate_webid_html_with_issuer(
246            "abc",
247            Some("Alice"),
248            "https://pods.example.com",
249            Some("https://op.example"),
250        );
251        assert!(html.contains("solid:oidcIssuer"));
252        assert!(html.contains("https://op.example"));
253    }
254
255    #[test]
256    fn extract_oidc_issuer_returns_issuer_id() {
257        let html = generate_webid_html_with_issuer(
258            "abc",
259            Some("Alice"),
260            "https://pods.example.com",
261            Some("https://op.example"),
262        );
263        let iss = extract_oidc_issuer(html.as_bytes()).unwrap();
264        assert_eq!(iss.as_deref(), Some("https://op.example"));
265    }
266
267    #[test]
268    fn extract_oidc_issuer_absent_returns_none() {
269        let html =
270            generate_webid_html_with_issuer("abc", Some("Alice"), "https://p", None);
271        let iss = extract_oidc_issuer(html.as_bytes()).unwrap();
272        assert!(iss.is_none());
273    }
274
275    // --- Parity rows 154/155/165 ---------------------------------------
276
277    #[test]
278    fn emits_cid_service_when_issuer_present() {
279        let html = generate_webid_html_with_issuer(
280            "abc",
281            Some("Alice"),
282            "https://pods.example.com",
283            Some("https://op.example"),
284        );
285        // Context namespaces are present.
286        assert!(
287            html.contains("https://www.w3.org/ns/cid/v1#"),
288            "cid namespace missing"
289        );
290        assert!(
291            html.contains("https://www.w3.org/ns/lws#"),
292            "lws namespace missing"
293        );
294        // Service entry with LWS OpenIdProvider type.
295        assert!(
296            html.contains("lws:OpenIdProvider"),
297            "lws:OpenIdProvider type missing"
298        );
299        // Service @id resolves against document URL (fragment #oidc).
300        assert!(
301            html.contains("https://pods.example.com/pods/abc/profile/card#oidc"),
302            "service @id fragment missing"
303        );
304    }
305
306    #[test]
307    fn omits_cid_service_when_no_issuer() {
308        let html =
309            generate_webid_html_with_issuer("abc", Some("Alice"), "https://p", None);
310        let body = json_ld_body(&html);
311        assert!(
312            body.get("service").is_none(),
313            "service array must be absent without issuer"
314        );
315        assert!(
316            !html.contains("lws:OpenIdProvider"),
317            "OpenIdProvider must not leak when issuer absent"
318        );
319    }
320
321    #[test]
322    fn emits_primary_topic_of_and_main_entity_of_page() {
323        let html = generate_webid_html_with_issuer(
324            "abc",
325            Some("Alice"),
326            "https://pods.example.com",
327            None,
328        );
329        let body = json_ld_body(&html);
330        assert_eq!(
331            body.get("foaf:isPrimaryTopicOf").and_then(|v| v.as_str()),
332            Some(""),
333            "foaf:isPrimaryTopicOf must be empty string (relative self-ref)"
334        );
335        assert_eq!(
336            body.get("schema:mainEntityOfPage").and_then(|v| v.as_str()),
337            Some(""),
338            "schema:mainEntityOfPage must be empty string (relative self-ref)"
339        );
340        // Context must declare both predicates.
341        let ctx = body.get("@context").expect("@context");
342        assert!(ctx.get("isPrimaryTopicOf").is_some());
343        assert!(ctx.get("mainEntityOfPage").is_some());
344    }
345
346    #[test]
347    fn extract_cid_openid_provider_returns_endpoint() {
348        let html = generate_webid_html_with_issuer(
349            "abc",
350            Some("Alice"),
351            "https://pods.example.com",
352            Some("https://op.example"),
353        );
354        let endpoint = extract_cid_openid_provider(html.as_bytes()).unwrap();
355        assert_eq!(endpoint.as_deref(), Some("https://op.example"));
356    }
357
358    #[test]
359    fn extract_cid_openid_provider_absent_returns_none() {
360        let html =
361            generate_webid_html_with_issuer("abc", Some("Alice"), "https://p", None);
362        let endpoint = extract_cid_openid_provider(html.as_bytes()).unwrap();
363        assert!(endpoint.is_none());
364    }
365
366    #[test]
367    fn json_ld_body_is_valid_json() {
368        // Regression guard against hand-escaping: whatever issuer/name we
369        // feed in, the embedded body must parse with serde_json.
370        for issuer in [None, Some("https://op.example/path?q=1&x=2")] {
371            let html = generate_webid_html_with_issuer(
372                "abc",
373                Some(r#"Alice "Quoted" O'Neil"#),
374                "https://pods.example.com",
375                issuer,
376            );
377            let start = html
378                .find("application/ld+json")
379                .expect("ld+json tag present");
380            let tag_end = html[start..].find('>').expect("script open >");
381            let body_start = start + tag_end + 1;
382            let body_end = html[body_start..]
383                .find("</script>")
384                .expect("script close");
385            let body = html[body_start..body_start + body_end].trim();
386            serde_json::from_str::<serde_json::Value>(body).unwrap_or_else(|e| {
387                panic!("embedded JSON-LD failed to parse: {e}\n----\n{body}\n----")
388            });
389        }
390    }
391}