cli-justify 0.1.20

A CLI text justify tool
Documentation
use crate::text_utils::{
  is_ascii_numeric, leading_whitespace_width,
  split_trailing_numeric_token_with_min_gap,
};

use super::layout_signals::{
  looks_like_command_prompt_line, looks_like_toc_entry,
};

const TOKEN_TRIM_CHARS: [char; 14] =
  ['"', '\'', '(', ')', '[', ']', '{', '}', ',', '.', ';', ':', '!', '?'];

fn trim_heading_token(word: &str) -> &str {
  word.trim_matches(TOKEN_TRIM_CHARS.as_slice())
}

pub(super) fn is_centered_short_heading(line: &str) -> bool {
  let trimmed = line.trim();
  if trimmed.is_empty() {
    return false;
  }

  if trimmed.chars().next().is_some_and(|ch| ch.is_lowercase()) {
    return false;
  }

  let leading_ws = leading_whitespace_width(line);
  let word_count = trimmed.split_whitespace().count();
  leading_ws >= 16
    && word_count <= 6
    && trimmed.len() <= 40
    && !trimmed.ends_with(['.', ',', ';', ':'])
    && !looks_like_toc_entry(trimmed)
}

fn is_heading_connector_word(word: &str) -> bool {
  matches!(
    word,
    "a"
      | "an"
      | "and"
      | "as"
      | "at"
      | "but"
      | "by"
      | "for"
      | "from"
      | "in"
      | "into"
      | "of"
      | "on"
      | "or"
      | "the"
      | "to"
      | "via"
      | "vs"
      | "vs."
      | "with"
      | "without"
      | "is"
      | "are"
      | "was"
      | "were"
      | "be"
      | "been"
      | "being"
      | "do"
      | "does"
      | "did"
      | "has"
      | "have"
      | "had"
      | "can"
      | "could"
      | "shall"
      | "should"
      | "will"
      | "would"
      | "may"
      | "might"
      | "must"
      | "if"
      | "than"
      | "then"
  )
}

fn is_numbered_label_token(token: &str) -> bool {
  let token = token.trim_matches([':', '.', ')']);
  !token.is_empty()
    && token
      .chars()
      .all(|ch| ch.is_ascii_digit() || matches!(ch, '.' | '-' | '/'))
}

fn looks_like_labeled_caption_line(trimmed: &str) -> bool {
  let mut words = trimmed.split_whitespace();
  let Some(label) = words.next() else {
    return false;
  };
  let Some(number) = words.next() else {
    return false;
  };

  (5..=12).contains(&label.len())
    && label.chars().all(|ch| ch.is_ascii_uppercase())
    && is_numbered_label_token(number)
}

pub(super) fn looks_like_numbered_label_heading(line: &str) -> bool {
  let trimmed = line.trim();
  !trimmed.is_empty()
    && looks_like_labeled_caption_line(trimmed)
    && trimmed.split_whitespace().nth(2).is_some()
}

fn looks_like_title_case_heading_word(word: &str) -> bool {
  let token = trim_heading_token(word);
  if token.is_empty() {
    return false;
  }

  if is_ascii_numeric(token) {
    return true;
  }

  let lowercase = token.to_ascii_lowercase();
  if is_heading_connector_word(&lowercase) {
    return true;
  }

  if token.len() <= 6
    && token.chars().any(|ch| ch.is_alphabetic())
    && token
      .chars()
      .all(|ch| ch.is_uppercase() || ch.is_ascii_digit() || ch == '&')
  {
    return true;
  }

  let mut chars = token.chars();
  let Some(first) = chars.next() else {
    return false;
  };
  if !first.is_uppercase() {
    return false;
  }

  chars.all(|ch| {
    ch.is_lowercase()
      || ch.is_ascii_digit()
      || matches!(ch, '\'' | '-' | '’' | '/' | '&')
  })
}

pub(super) fn looks_like_single_word_section_heading(line: &str) -> bool {
  let trimmed = line.trim();
  let leading_ws = leading_whitespace_width(line);
  if leading_ws > 6 {
    return false;
  }

  let mut words = trimmed.split_whitespace();
  let Some(only_word) = words.next() else {
    return false;
  };
  if words.next().is_some() {
    return false;
  }

  let chars: Vec<char> = only_word.chars().collect();
  if !(3..=24).contains(&chars.len()) {
    return false;
  }
  let mut iter = chars.iter();
  let Some(&first) = iter.next() else {
    return false;
  };
  if !first.is_uppercase() {
    return false;
  }
  let mut letters = 1usize;
  for &ch in iter {
    if ch.is_lowercase() || matches!(ch, '-' | '\'' | '’') {
      letters += 1;
    } else {
      return false;
    }
  }
  letters >= 3
}

pub(super) fn looks_like_left_aligned_section_heading(line: &str) -> bool {
  let trimmed = line.trim();
  if trimmed.is_empty() {
    return false;
  }

  let Some(first) = trimmed.chars().next() else {
    return false;
  };
  if !first.is_uppercase() && !first.is_ascii_digit() {
    return false;
  }

  let leading_ws = leading_whitespace_width(line);
  if leading_ws > 6 {
    return false;
  }

  let word_count = trimmed.split_whitespace().count();
  if !(2..=10).contains(&word_count) {
    return false;
  }

  let char_count = trimmed.chars().count();
  if !(8..=88).contains(&char_count) {
    return false;
  }

  if trimmed.ends_with(['.', ',', ';']) {
    return false;
  }

  if trimmed.contains("://")
    || looks_like_command_prompt_line(trimmed)
    || trimmed.contains("  ")
    || trimmed.contains("   ")
    || looks_like_labeled_caption_line(trimmed)
  {
    return false;
  }

  let mut meaningful_words = 0usize;
  for word in trimmed.split_whitespace() {
    let token = trim_heading_token(word);
    if token.is_empty() {
      return false;
    }
    if token.chars().all(|ch| ch.is_ascii_digit() || ch == '.' || ch == '-') {
      continue;
    }
    if is_heading_connector_word(&token.to_ascii_lowercase()) {
      continue;
    }
    meaningful_words += 1;
    if !looks_like_title_case_heading_word(token) {
      return false;
    }
  }

  meaningful_words >= 2
}

pub(super) fn looks_like_multi_column_row(line: &str) -> bool {
  if line.split_whitespace().count() < 3 {
    return false;
  }

  let leading_ws = leading_whitespace_width(line);
  if leading_ws <= 3 && count_internal_gaps_of_width(line, 5) >= 1 {
    return true;
  }

  // Tables, callouts, and code-aligned rows can be indented further but still
  // carry multiple wide internal gaps that we want to preserve verbatim.
  count_internal_gaps_of_width(line, 5) >= 2
}

fn count_internal_gaps_of_width(line: &str, threshold: usize) -> usize {
  let trimmed = line.trim_start_matches([' ', '\t']);
  let mut count = 0usize;
  let mut run = 0usize;
  let mut seen_non_space = false;
  for ch in trimmed.chars() {
    if ch == ' ' {
      if seen_non_space {
        run += 1;
      }
      continue;
    }
    if run >= threshold {
      count += 1;
    }
    run = 0;
    seen_non_space = true;
  }
  count
}

fn has_wide_gap_before_page_number(trimmed: &str) -> bool {
  let (label, page_number) =
    split_trailing_numeric_token_with_min_gap(trimmed, 4);
  if page_number.is_none() {
    return false;
  }
  if label.is_empty() {
    return false;
  }

  let label_words = label.split_whitespace().count();
  let label_chars = label.chars().count();
  label_words <= 8 && label_chars <= 64
}

pub(super) fn looks_like_page_header_or_footer(line: &str) -> bool {
  let trimmed = line.trim();
  if trimmed.is_empty() {
    return false;
  }
  let tokens: Vec<&str> = trimmed.split_whitespace().collect();
  let short_numeric_suffix = tokens.len() <= 4
    && tokens.last().is_some_and(|token| is_ascii_numeric(token));

  if is_ascii_numeric(trimmed) {
    return true;
  }

  let leading_ws = leading_whitespace_width(line);
  if leading_ws >= 16 && short_numeric_suffix {
    return true;
  }

  if has_wide_gap_before_page_number(trimmed) {
    return true;
  }

  short_numeric_suffix
    && trimmed.len() <= 40
    && tokens
      .first()
      .is_some_and(|token| token.chars().all(|ch| ch.is_alphabetic()))
}