gecliht 0.2.0

A disparate collection of text manipulation and formatting algorithms.
Documentation
//! Chapter Ordering compares strings as if they were describing 
//! chapters or sections in a book. 
//!
//! For example, "4a" comes before "4c", and "a_4" comes before "a_10".
//!
//! The idea for this module comes from Aubrey Jaffer's chapter-ordering
//! functions within slib: <https://people.csail.mit.edu/jaffer/slib/Chapter-Ordering.html>

use std::cmp;
use std::cmp::Ordering;

/// Compares two strings, returning the order between them.
/// 
/// The strings are ordered by finding the first non-matching run of 
/// characters, either capital, lowercase or numeric. Numeric characters
/// are parsed into their equivalent number before comparison.
/// Where a 'chapter-like' identifier cannot be found, then the normal
/// string order will be used.
///
/// # Examples
///
/// ```
/// assert_eq!(gecliht::chap_cmp("a_4", "a_10"), std::cmp::Ordering::Less);
/// assert_eq!(gecliht::chap_cmp("Section 1.2.1", "Section 1.1.2"), std::cmp::Ordering::Greater);
/// assert_eq!(gecliht::chap_cmp("Appendix D", "Appendix A"), std::cmp::Ordering::Greater);
/// assert_eq!(gecliht::chap_cmp("Appendix(W)", "Appendix(AA)"), std::cmp::Ordering::Less);
/// ```
///
pub fn chap_cmp (string1: &str, string2: &str) -> Ordering {
    // 1. find first point of difference
    let mut point = None;
    for i in 0..cmp::min(string1.len(),string2.len()) {
        let i = i as usize;
        if string1[i..i+1] != string2[i..i+1] {
            point = Some(i);
            break;
        }
    }

    match point {
        Some(point) => {
            // 2. collect letters of same type from point onwards, for both strings
            if let Some(chap1) = gather_from(&string1, point) {
                if let Some(chap2) = gather_from(&string2, point) {
                    // 3. if we have numbers, parse and compare
                    if chap1.chars().next().unwrap().is_ascii_digit() {
                        if let Ok(num1) = chap1.parse::<u32>() {
                            if let Ok(num2) = chap2.parse::<u32>() {
                                return num1.cmp(&num2);
                            }
                        }
                    } // non-numbers and unparsable numbers end up here
                    // -- for strings, longer strings are always greater
                    if chap1.len() == chap2.len() {
                        return chap1.cmp(&chap2);
                    } else {
                        return chap1.len().cmp(&chap2.len());
                    }
                }
            }
            return string1.cmp(&string2); // default comparison    
        },
        None => { // one is a subset of other
            return string1.len().cmp(&string2.len());
        },
    }
}

/// Given a string, returns the next string according to 'chapter order'.
///
/// As there is no string to compare with, we look from the end of the string
/// for a number or letter, and advance that to the next value. If none are 
/// found, then the string is returned unchanged.
/// 
/// # Examples
///
/// ```
/// assert_eq!(gecliht::chap_next("a_9"), "a_10");
/// assert_eq!(gecliht::chap_next("Section 1.2.1"), "Section 1.2.2");
/// assert_eq!(gecliht::chap_next("Appendix b"), "Appendix c");
/// assert_eq!(gecliht::chap_next("Appendix(Z)"), "Appendix(AA)");
/// ```
///
pub fn chap_next (string: &str) -> String {
    let mut result = string.to_string ();

    if let Some((posn, chap)) = gather_backwards(&string) {
        let mut new_val = "".to_string();

        // fn to 'add 1' to letter sequences, i.e. A->B, AZ -> BA, Z -> AA
        let mut letter_inc = |chap: &str| {
            let chars: Vec<char> = chap.chars().collect();
            let mut wrap = false;
            let mut i = chars.len();

            while i > 0 {
                i -= 1;
                new_val.insert(0, next_char(chars[i]));

                if chars[i] == 'Z' && i == 0 {
                    wrap = true;
                } else if chars[i] != 'Z' {
                    break;
                }
            }
            if wrap {
                new_val.push('A');
            }
        };

        // if we have numbers, parse and increment
        if chap.contains(|c:char| c.is_ascii_digit()) {
            if let Ok(num) = chap.parse::<u32>() {
                new_val = format!("{}", num+1);
            } // ignore unparsable numbers
        } else if chap.contains(|c:char| c.is_ascii_uppercase()) {
            letter_inc(&chap);
        } else { // must be lowercase - use same code as above, but upper->use->lower
            let chap = chap.to_uppercase();
            letter_inc(&chap);
            new_val = new_val.to_lowercase();
        }

        result.replace_range(posn..(posn+chap.len()), &new_val);
        return result;
    } else {
        "".to_string()
    }
}

fn next_char (c: char) -> char {
    match c {
        'A' => 'B',
        'B' => 'C',
        'C' => 'D',
        'D' => 'E',
        'E' => 'F',
        'F' => 'G',
        'G' => 'H',
        'H' => 'I',
        'I' => 'J',
        'J' => 'K',
        'K' => 'L',
        'L' => 'M',
        'M' => 'N',
        'N' => 'O',
        'O' => 'P',
        'P' => 'Q',
        'Q' => 'R',
        'R' => 'S',
        'S' => 'T',
        'T' => 'U',
        'U' => 'V',
        'V' => 'W',
        'W' => 'X',
        'X' => 'Y',
        'Y' => 'Z',
        'Z' => 'A',
        _ => c,
    }
}

// If character at point is uppercase, collect uppercase letters
// Else if character is lowercase, collect lowercase
// Else if character id numeric, collect numbers
// Else return None
fn gather_from (string: &str, point: usize) -> Option<String> {
    let chars: Vec<char> = string.chars().collect();
    let mut result = "".to_string();
    let mut i = point;

    if chars[i].is_ascii_uppercase() {
        while i < chars.len() && chars[i].is_ascii_uppercase() {
            result.push(chars[i]);
            i += 1;
        }
    } else if chars[i].is_ascii_lowercase() {
        while i < chars.len() && chars[i].is_ascii_lowercase() {
            result.push(chars[i]);
            i += 1;
        }
    } else if chars[i].is_ascii_digit() {
        while i < chars.len() && chars[i].is_ascii_digit() {
            result.push(chars[i]);
            i += 1;
        }
    } else {
        return None;
    }

    Some(result)
}

// As with gather_from, except we start from end of string
// and return a tuple giving the position of the string
fn gather_backwards (string: &str) -> Option<(usize,String)> {
    let chars: Vec<char> = string.chars().collect();
    let mut result = "".to_string();
    let mut i = chars.len()-1;

    // find first 'chapter-like' character
    while i > 0 && !chars[i].is_ascii_uppercase() && !chars[i].is_ascii_lowercase() && !chars[i].is_ascii_digit() {
        i -= 1;
    }

    // find rest of 'chapter' descriptor
    if chars[i].is_ascii_uppercase() {
        while i > 0 && chars[i].is_ascii_uppercase() {
            result.insert(0, chars[i]);
            i -= 1;
        }
    } else if chars[i].is_ascii_lowercase() {
        while i > 0 && chars[i].is_ascii_lowercase() {
            result.insert(0, chars[i]);
            i -= 1;
        }
    } else if chars[i].is_ascii_digit() {
        while i > 0 && chars[i].is_ascii_digit() {
            result.insert(0, chars[i]);
            i -= 1;
        }
    } else {
        return None;
    }

    Some((i+1, result))
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn test_gather_from () {
        assert_eq!(gather_from("ABC", 0), Some("ABC".to_string()));
        assert_eq!(gather_from("Section ABc", 8), Some("AB".to_string()));
        assert_eq!(gather_from("Section ABc", 4), Some("ion".to_string()));
        assert_eq!(gather_from("Sect_19 ", 4), None);
        assert_eq!(gather_from("Sect_19 ", 5), Some("19".to_string()));
    }

    #[test]
    fn test_chap_cmp () {
        assert_eq!(chap_cmp("a_4", "a_4"), Ordering::Equal);
        assert_eq!(chap_cmp("a_4", "a_10"), Ordering::Less);
        assert_eq!(chap_cmp("Section 1.2.1", "Section 1.1.2"), Ordering::Greater);
        assert_eq!(chap_cmp("Appendix D", "Appendix A"), Ordering::Greater);
        assert_eq!(chap_cmp("Appendix W", "Appendix AA"), Ordering::Less);
        assert_eq!(chap_cmp("Result^{3.99}", "Result^{4}"), Ordering::Less);
        assert_eq!(chap_cmp("a 4", "a_10"), Ordering::Less); // str::cmp
    }

    #[test]
    fn test_chap_next () {
        assert_eq!(chap_next("a_9"), "a_10");
        assert_eq!(chap_next("Section 1.2.1"), "Section 1.2.2");
        assert_eq!(chap_next("Appendix B"), "Appendix C");
        assert_eq!(chap_next("Appendix b"), "Appendix c");
        assert_eq!(chap_next("Appendix(Z)"), "Appendix(AA)");
        assert_eq!(chap_next("Appendix(AZ)"), "Appendix(BA)");
        assert_eq!(chap_next("Appendix(zz)"), "Appendix(aaa)");
        assert_eq!(chap_next("Appendix(ZZ)"), "Appendix(AAA)");
    }
}