cliw 0.1.1

Command Line In Web
Documentation
//! Slice a URL into command line arguments.
//!
//! This functionality is feature gated.  To enable getting [`UrlArgs`] on wasm
//! you must enable the "urlargs" feature.
//!

use percent_encoding::percent_decode_str;

/// What type of arg is comming next.
#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
#[derive(Clone, Debug, Default)]
enum State {
    #[default]
    /// Full path up to the queries.  '?'
    Command,
    /// Query srings are delimited by a '&'
    Query,
    /// We are done if we get a hash '#' or have no queries
    Done,
}

/// Slice a URL into command line arguments.
///
/// The first arg is the url path.
/// \
/// The rest of the args, if any, are the full [`decoded`] query strings.
/// \
/// The query strings start after the first '?' character and are delimited by '&'
/// \
/// Anything after a hash '#' is discarded.
///
/// ## Example
/// ```rust
///     let url = "http:///www.example.org/index.html?--first&second&third#discard".to_string();
///     let mut args : cliw::url_args::UrlArgs = url.into();
///
///     assert_eq!(args.next().unwrap(),"http:///www.example.org/index.html");
///     assert_eq!(args.next().unwrap(),"--first");
///     assert_eq!(args.next().unwrap(),"second");
///     assert_eq!(args.next().unwrap(),"third");
///     assert_eq!(args.next(),None);
/// ```
///
/// [`decoded`]: http://docs.rs/percent_encoding/percent_decode_str
#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
#[derive(Clone, Debug)]
pub struct UrlArgs {
    url: String,
    next_arg: usize,
    state: State,
}

/// Slice a URL into command line arguments.
impl UrlArgs {
    /// Return args from the webpage url
    ///
    /// When running on wasm with the "urlargs" feature set use the webpage url from the
    /// [`location.href`](https://developer.mozilla.org/en-US/docs/Web/API/Location/href)
    /// property and spit it into command line arguments.
    /// \
    /// The first arg is the url path.
    /// \
    /// The rest of the args, if any, are the full [`decoded`] query strings.
    /// \
    /// The query strings start at a '?' character and are delimited by '&'
    /// \
    /// Anything after a hash '#' is discarded.
    ///
    /// Notes:
    ///
    /// If websys can not access the window or the location.href then the
    /// iterator returned will have just one arg containing the error message.
    ///
    /// When compiled to native or without the "urlargs" feature set this function uses an empty
    /// string "" for the url and will return an iterator that gives no args.
    /// ## Example
    /// If the webpage URL is <http:///www.example.org/index.html?--first&second&third#discard>
    /// ```rust
    ///     let mut args = cliw::url_args::UrlArgs::new();
    ///
    /// #    let url = "http:///www.example.org/index.html?--first&second&third#discard".to_string();
    /// #    args = url.into();
    ///     assert_eq!(args.next().unwrap(),"http:///www.example.org/index.html");
    ///     assert_eq!(args.next().unwrap(),"--first");
    ///     assert_eq!(args.next().unwrap(),"second");
    ///     assert_eq!(args.next().unwrap(),"third");
    ///     assert_eq!(args.next(),None);
    /// ```
    ///
    /// [`decoded`]: http://docs.rs/percent_encoding/percent_decode_str
    #[must_use]
    pub fn new() -> Self {
        // This gives no args.  Uses an empty string "" for url when running on native.
        #[cfg(not(all(target_arch = "wasm32", feature = "urlargs")))]
        {
            "".into()
        }
        // Parse webpage url into args when compiled to wasm with urlargs feature set.
        #[cfg(all(target_arch = "wasm32", feature = "urlargs"))]
        {
            match web_sys::window() {
                Some(window) => match window.location().href() {
                    Ok(href) => href,
                    Err(_) => "Unable to access `Location`".into(),
                },
                None => "Unable to access `Window`".into(),
            }
            .into()
        }
    }
}

/// Iterate the URL into command line arguments.
impl Iterator for UrlArgs {
    type Item = std::ffi::OsString;

    /// Get the next argument from url.
    ///
    /// The first arg is the url path.
    /// \
    /// The rest of the args, if any, are the full decoded query strings.
    /// \
    /// The query strings start at a '?' character and are delimited by '&'
    /// \
    /// Anything after a hash '#' is discarded.
    /// ## Example
    /// ```rust
    ///     let url = "hostname/index.html?foo%20%3Cbar%3E&second&third#discard";
    ///     let mut args : cliw::url_args::UrlArgs = url.into();
    ///
    ///     assert_eq!(args.next().unwrap(),"hostname/index.html");
    ///     assert_eq!(args.next().unwrap(),"foo <bar>");
    ///     assert_eq!(args.next().unwrap(),"second");
    ///     assert_eq!(args.next().unwrap(),"third");
    ///     assert_eq!(args.next(),None);
    /// ```
    ///
    /// [`decoded`]: http://docs.rs/percent_encoding/percent_decode_str
    fn next(&mut self) -> Option<Self::Item> {
        let begin = self.next_arg;

        let delim = match self.state {
            State::Command => Some('?'),
            State::Query => Some('&'),
            State::Done => None,
        };

        match delim {
            None => None,
            Some(delim) => {
                let arg = match self.url[begin..].find(|c| (c == delim) | (c == '#')) {
                    None => {
                        self.state = State::Done;
                        self.next_arg = self.url.len();
                        &self.url[begin..]
                    }
                    Some(index) => {
                        let end = begin + index;
                        self.next_arg = end + 1;
                        let arg = &self.url[begin..=end];

                        if let Some(arg) = arg.strip_suffix(delim) {
                            self.state = State::Query;
                            arg
                        } else if let Some(arg) = arg.strip_suffix('#') {
                            self.state = State::Done;
                            arg
                        } else {
                            unreachable!("Arg did not end in delim or #");
                        }
                    }
                };
                let arg: String = percent_decode_str(arg).decode_utf8_lossy().into();
                Some(arg.into())
            }
        }
    }
}

/// Create a new [`UrlArgs`] from a url &str
/// ## Example
/// ```rust
///     let url = "http:///www.example.org/index.html?--first&second&third#discard";
///     let mut args = cliw::url_args::UrlArgs::from(url);
/// ```
impl From<&str> for UrlArgs {
    fn from(url: &str) -> Self {
        String::from(url).into()
    }
}

/// Create a new [`UrlArgs`] from url String.
/// ## Example
/// ```rust
///     let url = "http:///www.example.org/index.html?--first&second&third#discard".to_string();
///     let mut args = cliw::url_args::UrlArgs::from(url);
/// ```
impl From<String> for UrlArgs {
    fn from(url: String) -> Self {
        Self {
            url,
            next_arg: 0,
            state: State::default(),
        }
    }
}

impl Default for UrlArgs {
    fn default() -> Self {
        // This gives no args.
        "".into()
    }
}

#[cfg(test)]
mod tests {
    use crate::url_args::UrlArgs;
    use std::ffi::OsString;
    fn do_parse(url: &str, expected_args: Vec<OsString>) {
        let args: UrlArgs = url.into();

        let args: Vec<_> = args.collect();
        assert_eq!(args, expected_args);
    }

    #[test]
    fn with_hash() {
        // simple with no query
        let url = "http:///www.example.org/index.html#hash";
        let expected_args = vec!["http:///www.example.org/index.html".into()];
        do_parse(url, expected_args);
    }

    #[test]
    fn simple_query() {
        let url = "http:///www.example.org/index.html?--first&second&third#hash";
        let expected_args = vec![
            "http:///www.example.org/index.html".into(),
            "--first".into(),
            "second".into(),
            "third".into(),
        ];
        do_parse(url, expected_args);
    }

    #[test]
    fn percent_encoded() {
        let url = "http:///www.exam%20ple.org/index.html?foo%20%3Cbar%3E";
        let expected_args = vec![
            "http:///www.exam ple.org/index.html".into(),
            "foo <bar>".into(),
        ];
        do_parse(url, expected_args);
    }

    #[test]
    fn strange_chars() {
        let url = "(http\"///www&example&org*index\\html?first&second&third)#hash";
        let expected_args = vec![
            "(http\"///www&example&org*index\\html".into(),
            "first".into(),
            "second".into(),
            "third)".into(),
        ];
        do_parse(url, expected_args);
    }

    #[test]
    fn invalid() {
        // note replacement 0xFFFD
        let url = "http:///www.example.org/index.html?foo%f0%3Cbar%3E&second&third#hash";
        let expected_args = vec![
            "http:///www.example.org/index.html".into(),
            "foo\u{FFFD}<bar>".into(),
            "second".into(),
            "third".into(),
        ];
        do_parse(url, expected_args);
    }

    #[test]
    fn nulls() {
        let url = "?&&#";
        let expected_args = vec![OsString::new(), OsString::new(), OsString::new(), "".into()];
        do_parse(url, expected_args);

        let url = "#?&&";
        let expected_args = vec![OsString::from("")];
        do_parse(url, expected_args);

        let url = "";
        let expected_args = vec![OsString::from("")];
        do_parse(url, expected_args);
    }
}