dweb 0.13.3

Decentralised web and storage library for Autonomi
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
/*
 Copyright (c) 2025 Mark Hughes

 This program is free software: you can redistribute it and/or modify
 it under the terms of the GNU Affero General Public License as published by
 the Free Software Foundation, either version 3 of the License, or
 (at your option) any later version.

 This program is distributed in the hope that it will be useful,
 but WITHOUT ANY WARRANTY; without even the implied warranty of
 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 GNU Affero General Public License for more details.

 You should have received a copy of the GNU Affero General Public License
 along with this program. If not, see <https://www.gnu.org/licenses/>.
*/

//! # DwebHost
//!
//! The 'host' part of a dweb web URL uses the domain 'www-dweb.au' plus either
//! one or two subdomains:
//!
//!    `[v<VERSION>.]<DWEB-NAME>.www-dweb.au`
//!
//! The first part is an optional followed by a short name (which correponds to a History
//! stored on Autonomi).
//!
//!    VERSION 1 is the first version, 2 the second etc, and if omitted implies 'most recent'.
//!
//!    A DWEB-NAME corresponds to a particular website history. It begins with a memorable part,
//!    a mnemonic for the website, followed by a hyphen and ends with the first few characters from
//!    the xor encoded HistoryAddress. The memorable part is a lowercase alphabetic string which
//!    may be taken from website metadata or specified by the user. The characters after the hyphen
//!    serve to disambiguiate websites which could have the same memorable part.
//!
//! Example web names:
//!    'awesome-f8b3.www-dweb.au'          - the most recent version a website
//!    'v23.awesome-f8b3.www-dweb.au'      - the 23rd version the same website
//!    'v23.awesome-f2e4.www-dweb.au'      - the 23rd version of a different website
//!
//! DwebHosts allow the correct website version to be retrieved from a History<Tree>
//! on Autonomi and the corresponding content to be returned to a standard web browser. They act
//! as keys for a cache maintained by the local dweb server, but must first be created using
//! the appropriate dweb APIs.
//!
//! Once created, resolving a DwebHost requires a local DNS to redirect the dweb domain www-dweb.au
//! to a local dweb server (e.g. dweb-cli) which decodes the name and accesses the relevant website
//! version from a cache held in the server.
//!
//! DwebHosts could be persisted in various ways, such as in a separate website on Autonomi or
//! the private Vault of a user, which then provides a set of 'favourites' or web bookmarks personal
//! to a user.
//!
//! Without persistence, different DWEB-NAMES can be used with the same HistoryAddress at different
//! times, there is always a one-to-one correspondence between the two, so neither can be coupled
//! to more than one of the other at one time.
//!
//! TODO: implement persistent DWEB-NAMES per user and use to provide a page of sites with brief
//! information to aid identification.
//!

use color_eyre::eyre::{Result, eyre};

use crate::cache::directory_with_name::HISTORY_NAMES;
use crate::history::HistoryAddress;

// Domain name and subdomain constraints based on IETF RFC1035 with links to relevant sections:
pub const MAX_SUBDOMAIN_LEN: usize = 63; //  S2.3.4 Size limits (https://datatracker.ietf.org/doc/html/rfc1035#section-2.3.4)
// A subdomain must start with a letter (a-z) and is followed by one or more letters or numbers
// which may be separated by a hyphen. S2.3.1. Preferred name syntax (https://datatracker.ietf.org/doc/html/rfc1035#section-2.3.1)

pub const DISAMBIGUATION_LEN: usize = 4; // Number of hexadecimal disambiguation characters to include in a DWEB-NAME
pub const MEMORABLE_PART_LEN: usize = MAX_SUBDOMAIN_LEN - DISAMBIGUATION_LEN - 1; // Allow 1 for hyphen

pub const DOMAIN_PART: &str = "www-dweb";
pub const TLD_PART: &str = "au";

const VERSION_CHAR: u8 = b'v';
const FIXED_WEBNAME_SEPARATOR: &str = "-f";

/// DwebHost corresponds to the HOST part of a dweb URL and encapsulates the component
/// subdomain and domain parts which are used to lookup the content for a version of a
/// website.
///
pub struct DwebHost {
    /// `[v<VERSION>.]<DWEB-NAME>.www-dweb.au`
    pub dweb_host_string: String,
    pub dweb_name: String,
    /// None implies most recent version (highest number)
    pub version: Option<u64>,

    #[cfg(feature = "fixed-dweb-hosts")]
    // Development build feature for non-versioned Tree references
    pub is_fixed_dweb_host: bool,
}

/// Make a valid DWEB-NAME for a dweb URL
///
/// See validate_dweb_name() for more.
pub fn make_dweb_name(memorable_part: &String, history_address: HistoryAddress) -> Result<String> {
    if memorable_part.len() == 0 {
        return Err(eyre!(
            "The memorable part of a DWEB-NAME must include at least one alphabetic character"
        ));
    }

    if !memorable_part.as_bytes()[0].is_ascii_alphabetic() {
        return Err(eyre!(
            "The memorable part of a DWEB-NAME must begin with an alphabetic character"
        ));
    }

    if !memorable_part.len() > MEMORABLE_PART_LEN {
        return Err(eyre!(
            "The memorable part of a DWEB-NAME cannot exceed {MEMORABLE_PART_LEN} characters"
        ));
    }

    if memorable_part.contains("--") {
        return Err(eyre!(
            "The memorable part of a DWEB-NAME cannot contain consecutive hyphens"
        ));
    }

    if !memorable_part[1..]
        .chars()
        .all(|c| c.is_alphanumeric() || c == '-')
    {
        return Err(eyre!(
            "The memorable part of a DWEB-NAME can only contain alphanumeric characters and hyphens"
        ));
    }

    // Prevent clash with 'fixed version' web names
    if !memorable_part.ends_with(FIXED_WEBNAME_SEPARATOR) {
        return Err(eyre!(
            "The memorable part of a DWEB-NAME cannot end with '{FIXED_WEBNAME_SEPARATOR}'"
        ));
    }

    let history_part = format!("{}", history_address.to_hex());
    Ok(
        memorable_part[..MEMORABLE_PART_LEN].to_string()
            + "-"
            + &history_part[..DISAMBIGUATION_LEN],
    )
}

/// Create a version part ("v[VERSION]") for a www-dweb URL
pub fn make_version_part(version: u64) -> String {
    if version > 0 {
        format!("v{version}")
    } else {
        String::from("")
    }
}

#[cfg(feature = "fixed-dweb-hosts")]
use autonomi::client::files::archive_public::ArchiveAddress;

#[cfg(feature = "fixed-dweb-hosts")]
pub fn make_fixed_dweb_name(
    memorable_part: &String,
    archive_address: ArchiveAddress,
) -> Result<String> {
    if memorable_part.len() == 0 {
        return Err(eyre!(
            "The memorable part of a DWEB-NAME must include at least one alphabetic character"
        ));
    }

    if !memorable_part.as_bytes()[0].is_ascii_alphabetic() {
        return Err(eyre!(
            "The memorable part of a DWEB-NAME must begin with an alphabetic character"
        ));
    }

    const FIXED_MEMORABLE_PART_LEN: usize = MEMORABLE_PART_LEN - FIXED_WEBNAME_SEPARATOR.len();
    if !memorable_part.len() > FIXED_MEMORABLE_PART_LEN {
        return Err(eyre!(
            "'fixed' version website The memorable part of a DWEB-NAME cannot exceed {FIXED_MEMORABLE_PART_LEN} characters"
        ));
    }

    if memorable_part.contains("--") {
        return Err(eyre!(
            "The memorable part of a DWEB-NAME cannot contain consecutive hyphens"
        ));
    }

    if !memorable_part[1..]
        .chars()
        .all(|c| c.is_alphanumeric() || c == '-')
    {
        return Err(eyre!(
            "The memorable part of a DWEB-NAME can only contain alphanumeric characters and hyphens"
        ));
    }

    // Prevent clash with 'fixed version' web names
    if !memorable_part.ends_with(FIXED_WEBNAME_SEPARATOR) {
        return Err(eyre!(
            "The memorable part of a DWEB-NAME cannot end with '{FIXED_WEBNAME_SEPARATOR}'"
        ));
    }

    let directory_part = format!("{}", archive_address.to_hex());
    let web_name = memorable_part[..MEMORABLE_PART_LEN].to_string()
        + FIXED_WEBNAME_SEPARATOR
        + "-"
        + &directory_part[..];

    Ok(web_name.to_ascii_lowercase())
}

/// Decode a dweb host string
/// Returns a DwebHost which includes the validated web name string, DWEB-NAME and VERSION (if present)
///
///  For example, 'v2.awesome-f834.www-dweb.au' would return
///     Ok(DwebHost{
///         dweb_host: &"v2.awesome-f834.www-dweb.au",
///         dweb_name: &"awesome-f834",
///         version: Some(2)
///     })
///
/// # Examples
///
/// ```
/// use crate::dweb::web::name;
/// assert!(name::decode_dweb_host("v2.awesome-f834.www-dweb.au").is_ok());
/// assert!(name::decode_dweb_host("awesome-f834.www-dweb.au").is_ok());
/// assert!(name::decode_dweb_host("awesome99-f834.www-dweb.au").is_ok());
/// assert!(name::decode_dweb_host("awe-some-f834.www-dweb.au").is_ok());
/// assert!(name::decode_dweb_host("awe-99some-f834.www-dweb.au").is_ok());
///
/// assert!(name::decode_dweb_host("9awesome-f834.www-dweb.au").is_err());
/// assert!(name::decode_dweb_host("awe=some-f834.www-dweb.au").is_err());
/// assert!(name::decode_dweb_host("awe--some-f834.www-dweb.au").is_err());
/// assert!(name::decode_dweb_host(&String::from("v.awesome-f834.www-dweb.au").as_str()).is_err());
///
/// ```
//
// Note: for --features=fixed-dweb-names, this will also decode fixed web names which are
// differentiated by a DWEB-NAME containing the String::from(FIXED_WEBNAME_SEPARATOR) + "-";
pub fn decode_dweb_host(dweb_host: &str) -> Result<DwebHost> {
    println!("DEBUG decode_dweb_host({dweb_host})...");
    if dweb_host.len() == 0 {
        return Err(eyre!("Dweb host cannot be zero length"));
    }

    let fixed_dweb_host_tag = String::from(FIXED_WEBNAME_SEPARATOR) + "-";

    let mut segments = dweb_host.split('.');
    let total_segments = segments.clone().count();
    if total_segments > 4 || total_segments < 3 {
        return Err(eyre!(
            "Dweb host must contain three or four segments, each separated by '.'"
        ));
    }

    let mut found_version_segment = false;
    // If four segments are present, process the first as 'v<VERSION>'
    let version = if segments.clone().count() == 4 && dweb_host.as_bytes()[0] == VERSION_CHAR {
        match segments.next() {
            Some(str) => {
                if !str.starts_with('v') {
                    return Err(eyre!(
                        "Dweb host contains four segments (separated by '.') so first must start with 'v'"
                    ));
                }
                match str[1..].parse::<u64>() {
                    Ok(version) => {
                        if version > 0 {
                            found_version_segment = true;
                            Some(version)
                        } else {
                            return Err(eyre!("Invalid version {version}, lowest version is 1"));
                        }
                    }
                    Err(_) => {
                        return Err(eyre!(
                            "VERSION must be an integer in web name: '{dweb_host}"
                        ));
                    } // }
                }
            }
            None => {
                return Err(eyre!(
                    "Dweb host is missing DWEB-NAME and domain part: '{dweb_host}"
                ));
            }
        }
    } else {
        None
    };

    if segments.clone().count() != 3 {
        return Err(eyre!(
            "Dweb host must contain three or four segments, each separated by '.'"
        ));
    }

    // Next should be a DWEB-NAME
    let dweb_name = match segments.next() {
        Some(dweb_name) => dweb_name,
        None => {
            return Err(eyre!("Missing DWEB-NAME in '{dweb_host}"));
        }
    };

    match validate_dweb_name(&dweb_name) {
        Ok(_) => (),
        Err(e) => return Err(e),
    };

    let mut ends_with_dlp_tld = false;
    if let Some(domain_part) = segments.next() {
        if domain_part == DOMAIN_PART {
            if let Some(tld_part) = segments.next() {
                if tld_part == TLD_PART && segments.next().is_none() {
                    ends_with_dlp_tld = true;
                }
            }
        }
    };
    if !ends_with_dlp_tld {
        return {
            Err(eyre!(
                "Dweb host does not end with '{DOMAIN_PART}.{TLD_PART} after the DWEB-NAME"
            ))
        };
    }

    #[cfg(feature = "fixed-dweb-hosts")]
    let is_fixed_dweb_host = !found_version_segment && dweb_name.contains(&fixed_dweb_host_tag);

    println!("DEBUG returning DwebHost: version: {version:?}, dweb_name: '{dweb_name}'");

    Ok(DwebHost {
        dweb_host_string: dweb_host.to_ascii_lowercase(),
        dweb_name: dweb_name.to_string().to_ascii_lowercase(),
        version,

        #[cfg(feature = "fixed-dweb-hosts")]
        is_fixed_dweb_host,
    })
}

/// Validate a DWEB-NAME string.
///
/// The part off a DWEB-NAME up but excluding the final hyphen is known as the 'memorable part'.
///
/// The memorable_part must start with at least two alphabetic characters. This is to allow it to
/// be distinguished from a version parameter, which is a 'v' or 'V' followed by an integer (u64),
/// which is useful in apps, for parsing links where the version is an optional part of the URL path.
///
/// Following the first two alphabetic characters are a number of alphanumeric characters which may
/// be separated by single hyphens, up to a total length for the memorable part of MEMORABLE_PART_LEN.
///
pub fn validate_dweb_name(dweb_name: &str) -> Result<String> {
    if dweb_name.len() < 2
        || !dweb_name.as_bytes()[0].is_ascii_alphabetic()
        || !dweb_name.as_bytes()[1].is_ascii_alphabetic()
    {
        return Err(eyre!(
            "DWEB-NAME must start with at least two alphabetic characters"
        ));
    }

    if !dweb_name[dweb_name.len() - 1..]
        .chars()
        .all(|c| c.is_alphanumeric())
    {
        return Err(eyre!("DWEB-NAME must end with an alphanumeric character"));
    }

    if !dweb_name.chars().all(|c| c.is_alphanumeric() || c == '-') {
        return Err(eyre!(
            "DWEB-NAME can only contain letters, numbers (and non-consecutive hyphens)"
        ));
    }

    if dweb_name.contains("--") {
        return Err(eyre!("DWEB-NAME cannot contain '--'"));
    }

    Ok(dweb_name.to_string())
}

pub fn register_name_from_string(dweb_name: &str, history_address_str: &str) -> Result<()> {
    let history_address = match crate::helpers::convert::str_to_history_address(history_address_str)
    {
        Ok(history_adddress) => history_adddress,
        Err(e) => {
            return Err(eyre!(
                "Failed to register name due for INVALID history address string - {e}"
            ));
        }
    };

    match register_name(dweb_name, history_address) {
        Ok(_) => {
            println!("DEBUG Registered built-in DWEB-NAME: {dweb_name} -> {history_address_str}");
            Ok(())
        }
        Err(e) => {
            println!("DEBUG failed to register built-in DWEB-NAME '{dweb_name}' - {e}");
            Err(e)
        }
    }
}

/// Register a DWEB-NAME programmatically so it can be used in the browser address bar
pub fn register_name(dweb_name: &str, history_address: HistoryAddress) -> Result<()> {
    match validate_dweb_name(&dweb_name) {
        Ok(_) => (),
        Err(e) => {
            return Err(eyre!("Invalid DWEB-NAME '{dweb_name}' - {e}"));
        }
    };

    match &mut HISTORY_NAMES.lock() {
        Ok(lock) => {
            let cached_history_address = lock.get(dweb_name);
            if cached_history_address.is_some() {
                let cached_history_address = cached_history_address.unwrap();
                if history_address != *cached_history_address {
                    let msg = format!(
                        "DWEB-NAME '{dweb_name}' already in use for HISTORY-ADDRESS '{}'",
                        cached_history_address.to_hex()
                    );
                    println!("{msg}");
                    return Err(eyre!(msg));
                }
                // println!("DWEB-NAME '{dweb_name}' already registered for {history_address_string}");
            } else {
                lock.insert(String::from(dweb_name), history_address);
                // println!(
                //     "DWEB-NAME '{dweb_name}' successfully registered for {history_address_string}"
                // );
            }
        }
        Err(e) => {
            return Err(eyre!("Failed to access dweb name cache - {e}"));
        }
    };

    Ok(())
}

// Register builtin history addresses so they can be used immediately in browser (and CLI/app if supported in cli_options.rs)
pub fn register_builtin_names(is_local: bool) {
    use crate::generated_rs::{builtins_local, builtins_public};

    if is_local {
        let _ = register_name_from_string("awesome", builtins_local::AWESOME_SITE_HISTORY_LOCAL);
    } else {
        let _ = register_name_from_string(
            "atlas",
            "99e3b8df52814b379e216caf797426071000905a2cd93a9f5e90eef2b32517a9ec1ef0bfe27d79360014fd97639ac612",
        );
        let _ = register_name_from_string("awesome", builtins_public::AWESOME_SITE_HISTORY_PUBLIC);
        let _ = register_name_from_string(
            "billboard",
            "b6da6740bc5394f9ac0e6a6fa5a42f7f587d3aeaa48fd23ae9a45bef95b571a32429b0353148aa9e04f17cd6da57d179",
        );
        let _ = register_name_from_string(
            "friends",
            "a447871043968be2be1628584026cad30b824009a30eab43db3ee6dd8c0990051c27160cc8d1662da763d57c41c091f6",
        );
        // Mainnet History is at: a27b3fdb495870ace8f91005223998dc675c8e1bceb50bac66c993bb720a013c9f83d7a46e6d0daecbb3530d5249e587
        // v1 Archive: 40ea2e530a60645363ae561c8a50c165f79d8a034c4458f68f1b848c11386e45
        let _ = register_name_from_string(
            "scratchchat",
            "94592af349cafd579d2ed77da1679306caca7aff9564ecf0f0a5b2eccb76aeb94925a1c9939f392e891378f8c492f4a0",
        );
        let _ = register_name_from_string(
            "toast",
            "95be239165b7016b7f6dada20134438e038d0456bff04ec37943e95742726854225aa03faeed4e7bbd96f5383a8f9448",
        );
    }
}
use serde::{Deserialize, Serialize};
// use serde_json::Result;

#[derive(Serialize, Deserialize, utoipa::ToSchema)]
pub struct RecognisedName {
    pub key: String,
    pub history_address: String,
}

/// Return a Vec with one entry per recognised name in the form of RecognisedName struct
// TODO provide ways to sort this
pub fn recognised_dwebnames() -> Result<Vec<RecognisedName>> {
    let mut names_vec = Vec::<RecognisedName>::new();

    match &mut HISTORY_NAMES.lock() {
        Ok(lock) => {
            for cached_item in lock.iter() {
                names_vec.push(RecognisedName {
                    key: cached_item.0.to_string(),
                    history_address: cached_item.1.to_hex(),
                });
            }
        }
        Err(e) => {
            return Err(eyre!("Failed to access dweb name cache - {e}"));
        }
    }

    Ok(names_vec)
}

#[test]
fn check_malformed_web_name() {
    use crate::web::name;
    assert!(name::decode_dweb_host("awe=some-f834.www-dweb.au").is_err());
    assert!(name::decode_dweb_host("awe@some-f834.www-dweb.au").is_err());
    assert!(name::decode_dweb_host("awe--some-f834.www-dweb.au").is_err());
    assert!(name::decode_dweb_host("awesom-f-e-f834.www-dweb.ant").is_err());
    assert!(name::decode_dweb_host("awesome-f834-.www-dweb.au").is_err());
    assert!(name::decode_dweb_host("awesome-f834.ww-dweb.au").is_err());
    assert!(name::decode_dweb_host("awesome-f834.www-dweb.ant").is_err());
    assert!(name::decode_dweb_host("awesome-f834.www-dweb.au.com").is_err());
    assert!(name::decode_dweb_host("v2.9awesome-f834.www-dweb.au").is_err());
    assert!(name::decode_dweb_host("v0.awesome-f834.www-dweb.au").is_err());
    assert!(name::decode_dweb_host("v2nd.awesome-f834.www-dweb.au").is_err());
}