1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
//! Traits to access a CRI (or CRI reference) using accessor methods.
//!
//! While a CRI *could* be implemented with type state (where a full CRI has a .pull() that gives a
//! scheme and a tail), this makes needlessly complex and verbose code.
//!
//! The hope with the sketched API is that when used on, say, a CRI that's a plain memory slice (or
//! pointer to a known-wellformed CRI, as they are self-delimiting), for which calling `.path()`
//! would incur a call to `.host_and_pathindex()` and in turn to `.scheme_and_hostindex()`, that a
//! program that calls `.scheme()`, `.host()` and `.path()` in succession would be understood by the
//! compiler to remove the costly nested calls and use the already-available results. Analysis of a
//! compiled program will be required to see whether that is actually done.
//!
//! This may need some help from `const`ification and `#[inline]` annotations.
//!
//! Compared to a type-stated version, this is easier to use when not trying to squeeze the last bit
//! of performance, but may be harder to use when squeezing (for there might be some manual labor
//! involved to ensure that, for example, noting too register heavy is done between iterating over
//! the path and asking for the query).

use crate::traits;
use crate::characterclasses::{PATH_UE, QUERY_UE, FRAGMENT_UE, HOST_UE};

mod resolved;
pub use resolved::RuntimeResolved;

/// The different values the `discard` component of a CRI reference can have
#[derive(Debug, Copy, Clone, PartialEq)]
pub enum Discard {
    /// Discard all existing path components,
    ///
    /// Corresponds to `true` in the serialization.
    All,
    /// Discard as many path components as indicated in the argument.
    Some(u8),
}

/// Commonalities between CriRef and Cri
///
/// This interface comes with no error handling, as it assumes that the underlying object *is* a
/// CRI. When processing a received unchecked CRI, see [AllegedCri] for an example of how to handle
/// that.
///
/// ## Invariants
///
/// The accessor based interface shown here does not inherently map some of the invariants that the
/// model is built on, and that are enforced by the CDDL syntax. All implementations are expected
/// to adhere to them, but (being safe to implenent), users can not declare code unreachable based
/// on them, and are expected to either panic if they run into unexpected situations. (Most users
/// will likely not observe the malbehavior, as for example they won't even query the port once
/// they find no host). Likewise, implementations may panic if components are accessed that can not
/// be present as per other components' output.
///
/// For implementers:
///
/// * The host (and port) is only accessed if it is indicated that they are available.
///
/// For users:
///
/// * There can only be Some port if there is a host.
pub trait CriBase {
    type Scheme<'a>: traits::Scheme where Self: 'a;
    type Host<'a>: traits::Host where Self: 'a;

    type PathItem<'a>: traits::TextOrPet<PATH_UE> where Self: 'a;
    type QueryItem<'a>: traits::TextOrPet<QUERY_UE> where Self: 'a;
    type FragmentItem<'a>: traits::TextOrPet<FRAGMENT_UE> where Self: 'a;
    type UserInfoItem<'a>: traits::TextOrPet<HOST_UE> where Self: 'a;

    type PathIter<'a>: Iterator<Item=Self::PathItem<'a>> + ExactSizeIterator where Self: 'a;
    type QueryIter<'a>: Iterator<Item=Self::QueryItem<'a>> where Self: 'a;

    fn path(&self) -> Self::PathIter<'_>;
    fn query(&self) -> Self::QueryIter<'_>;
    fn fragment(&self) -> Option<Self::FragmentItem<'_>>;

    fn userinfo(&self) -> Option<Self::UserInfoItem<'_>>;
    fn host(&self) -> Self::Host<'_>;
    fn port(&self) -> Option<u16>;
}

/// A CRI reference (which may be full or relative)
///
/// This interface comes with no error handling, as it assumes that the underlying object *is* a
/// CRI. When processing a received unchecked CRI, see [AllegedCri] for an example of how to handle
/// that.
///
/// Some CriRef implementations may inherently not support a scheme or a host -- for example, the
/// relative CriRef implied by CoAP Location-* options can only ever suppress CRI references with
/// discard=True and no scheme or host. Such implementations would use the Never (`!`) type for
/// Scheme and Host.
///
/// ## Invariants
///
/// Building on CriBase, a CriRef has an additional invariant not expressed in its interface:
///
/// For users:
///
/// * If [Discard] is not [Discard::All], then scheme, host and port are expected to be None.
pub trait CriRef: CriBase {
    fn discard(&self) -> Discard;
    /// The scheme of the CRI reference, if one is set.
    fn scheme(&self) -> Option<Self::Scheme<'_>>;
    /// The type of authority of the CRI reference
    ///
    /// This may be absent if the scheme is absent (indicating that the base URI's authority is
    /// left as is).
    fn authority(&self) -> Option<traits::Authority>;

    /// Attempt to express the CRI into something that will probably pass for a URI reference.
    ///
    /// Two forms of CRI referencees are inexpressible as URI references:
    ///
    /// * discard=0 but path present: These append a path segment (eg. http://example.com/foo +
    ///   append bar = http://example.com/foo/bar)
    ///
    ///   In this case, the "→/" character sequence (which is not a valid URI) is produced.
    fn format_uri_ref_like(&self, w: &mut impl core::fmt::Write) -> core::fmt::Result {
        use traits::*;

        #[derive(PartialEq, Debug)]
        enum Pending {
            // Path elements can just be produced like that
            Not,
            // A →/ is pending (but not necessary to emit if there are no path components)
            DiscardZero,
            // A slash is probably pending...
            InsidePath,
            BecauseOfAuthority,
            BecauseOfScheme, // but only if an authority is set
            BecauseOfDiscardAll, // but that's expressed a bit differently as we can't know what it's resolved against
        }
        use Pending::*;
        let mut separator_slash_pending = Not;

        match self.discard() {
            Discard::Some(0) => {
                // Can't act immediately -- may either be OK to continue with '?' / '#', or cause
                // an error.
                separator_slash_pending = DiscardZero;
            }
            Discard::Some(1) => {
                // This is not the sharpest criterion for the necessity of a `./`, but it is
                // sufficient, and especially it is consistent with the test vectors.
                if self.path().next().map(|p| p.contains_unescaped(':')) == Some(true) {
                    write!(w, "./")?;
                }
            }
            Discard::Some(n) => {
                for _ in 1..n {
                    write!(w, "../")?;
                }
            }
            Discard::All => {
                // Can't act immediately -- a '/' needs to be emitted before the path unless
                // scheme-and-no-host, but we don't know that yet.
                separator_slash_pending = BecauseOfDiscardAll;
            }
        }

        if let Some(scheme) = self.scheme() {
            write!(w, "{}:", scheme.to_text_scheme())?;
            separator_slash_pending = BecauseOfScheme;
        }

        match self.authority() {
            Some(Authority::HostPort) => {
                write!(w, "//")?;
                if let Some(a) = self.userinfo() {
                    write!(w, "{}@", a.to_uri_component())?;
                }
                self.host().format_uri_host(w)?;
                if let Some(port) = self.port() {
                    write!(w, ":{}", port)?;
                }
                separator_slash_pending = BecauseOfAuthority;
            }
            Some(Authority::NoAuthoritySlashStart) => {
                separator_slash_pending = BecauseOfScheme; // FIXME: Or BecauseofAutority? Might make a difference when the path list is empty.
            }
            Some(Authority::NoAuthoritySlashless) => {
                separator_slash_pending = Not;
            }
            None => {
                assert!(separator_slash_pending != BecauseOfScheme, "If a scheme was given, there needs to be some authority");
            }
        }
        for p in self.path() {
            match separator_slash_pending {
                Not => (),
                DiscardZero => write!(w, "→/")?,
                BecauseOfDiscardAll | BecauseOfAuthority | BecauseOfScheme | InsidePath => write!(w, "/")?,
            };
            separator_slash_pending = InsidePath;
            write!(w, "{}", p.to_uri_component())?;
        }
        let mut is_first_query = true;
        for q in self.query() {
            write!(w, "{}{}", if is_first_query { "?" } else { "&" }, q.to_uri_component())?;
            is_first_query = false;
        }
        if let Some(f) = self.fragment() {
            write!(w, "#{}", f.to_uri_component())?;
        }
        Ok(())
    }

    fn render_uri_ref_like(&self) -> String {
        let mut s = String::new();
        self.format_uri_ref_like(&mut s).expect("Strings accept all writes");
        s
    }
}

/// A Cri (a full one, not a CRI reference)
///
/// Unlike a CriRef, this has a scheme unconditionally, and no applicable value for discard.
pub trait Cri: CriBase {
    /// The scheme of the CRI
    ///
    /// This is always present in a (full) CRI.
    fn scheme(&self) -> Self::Scheme<'_>;
    /// The type of authority of the CRI
    ///
    /// There is always a value for this in a (full) CRI, although some variants indicate that
    /// actually there is no authority is present.
    fn authority(&self) -> traits::Authority;

    /// Write the corresponding URI into a writer
    fn format_uri(&self, w: &mut impl core::fmt::Write) -> core::fmt::Result {
        use traits::*;

        write!(w, "{}:", self.scheme().to_text_scheme())?;
        let mut separator_slash_pending;
        match self.authority() {
            Authority::HostPort => {
                write!(w, "//")?;
                if let Some(userinfo) = self.userinfo() {
                    write!(w, "{}@", userinfo.to_uri_component());
                }
                self.host().format_uri_host(w)?;
                if let Some(port) = self.port() {
                    write!(w, ":{}", port)?;
                }
                separator_slash_pending = true;
            }
            Authority::NoAuthoritySlashStart => {
                separator_slash_pending = true;
            }
            Authority::NoAuthoritySlashless => {
                separator_slash_pending = false;
            }
        }
        for p in self.path() {
            if separator_slash_pending {
                write!(w, "/")?;
            }
            separator_slash_pending = true;
            write!(w, "{}", p.to_uri_component())?;
        }
        let mut is_first_query = true;
        for q in self.query() {
            write!(w, "{}{}", if is_first_query { "?" } else { "&" }, q.to_uri_component())?;
            is_first_query = false;
        }
        if let Some(f) = self.fragment() {
            write!(w, "#{}", f.to_uri_component())?;
        }
        Ok(())
    }

    /// Write the corresponding URI into a string
    fn render_uri(&self) -> String {
        let mut s = String::new();
        self.format_uri(&mut s).expect("Strings accept all writes");
        s
    }

    /// Resolve a reference against this base
    ///
    /// This produces a runtime resolution -- containing just the pointers, and determining any
    /// attributes at runtime.
    ///
    /// Concrete types might get more optimized versions of this; in particular, some might modify
    /// a base in place when following a link.
    fn resolve<R: CriRef>(&self, reference: R) -> RuntimeResolved<Self, R> {
        RuntimeResolved { base: self, reference }
    }

    /// Compare for equality
    ///
    /// This returns true if two CRIs are sure to be equal in the CRI normalization model, and is
    /// equivalent to URI equality after syntax based normalization. No scheme based normalization
    /// is performed. It relies on some of the CRI well-formedness requirements to be met (no PET
    /// on characters that don't need it, no initial empty path segments on NoAuthoritySlashless
    /// URNs).
    fn equals(&self, other: &impl Cri) -> bool {
        use crate::traits::{TextOrPet, Scheme, Host};

        fn equal_option_pet<const U: crate::characterclasses::AsciiSet>(a: Option<impl TextOrPet<U>>, b: Option<impl TextOrPet<U>>) -> bool{
            // FIXME that's way too verbose for what should be `!=` but can't be because we can't
            // blanket implement PartialEq over a trait. Maybe encapsulate return values in an own
            // type rather than an Option?
            match (a, b) {
                (None, None) => true,
                (Some(_), None) | (None, Some(_)) => false,
                (Some(s), Some(o)) => s.equals(&o),
            }
        }

        if !self.scheme().equals(other.scheme()) || self.authority() != other.authority() {
            return false;
        }
        if self.authority() == traits::Authority::HostPort {
            if !equal_option_pet(self.userinfo(), other.userinfo()) || !self.host().equals(&other.host()) || self.port() != other.port() {
                return false;
            }
        }
        if !TextOrPet::iter_equals(self.path(), other.path()) || !TextOrPet::iter_equals(self.query(), other.query()) || !equal_option_pet(self.fragment(), other.fragment()) {
            return false;
        }
        return true;
    }
}

// /// A (full, i.e. non-relative) CRI of whose wellformedness is not determined from the start
// ///
// /// An AllegedCri can be consumed as a CRI in the typical scheme / host_port / path / query /
// /// fragment fashion, and will not err out in the course of that (the [Cri] trait not providing
// /// fallible access). The data produced will represent a CRI (as per the trait's type constraints),
// /// but if the CRI provided at construction is not well-formed, it will be arbitrary.
// ///
// /// After 
// pub struct AllegedCri<'a> {
//     data: &'a[u8],
//     // To be tested: We *could* place the already obtained cursor positions in memo fields here.
//     // That probably does not actually force the compiler to keep it (as long as all use of a local
//     // struct is inlined, I wouldn't know what forces the compiler to have this on the stack in
//     // full). Both versions rely on some advanced optimizations in the compiler -- if the memos are
//     // in the struct, it'll need to inspect the program flow and see that they are only accessed in
//     // sequence and thus that it suffices to keep the currently used one in a register or on the
//     // stack. If there are no memos, the compiler needs to see instead that there is an expensive
//     // computation done twice over the immutable struct, and that the result can be stored
//     // inbetween.
// 
//     // Under the same assumptions, even this could be calculated on demand.
//     verdict: core::cell::Cell<Verdict>,
// }
// 
// impl<'a> AllegedCri<'a> {
//     fn new(data: &'a [u8]) -> Self {
//         AllegedCri {
//             data,
//             verdict: core::cell::Cell::new(Verdict::Undecided)
//         }
//     }
// 
//     pub fn wellformed_so_far(&self) -> bool {
//         self.verdict.get() != Verdict::Erroneous
//     }
// 
//     pub fn wellformed(&self) -> bool {
//         let _ = self.fragment();
//         self.wellformed_so_far()
//     }
// }
// 
// #[derive(PartialEq)]
// enum Verdict {
//     Undecided,
//     OK,
//     Erroneous,
// }