use std::cmp::Ordering;
use uucore::{
CharByte, IntoCharByteIterator,
i18n::{
UEncoding,
collator::{AlternateHandling, CollatorOptions, locale_cmp, try_init_collator},
get_locale_encoding,
},
};
use crate::syntax_tree::{MaybeNonUtf8Str, MaybeNonUtf8String};
pub(crate) fn locale_comparison(a: &MaybeNonUtf8Str, b: &MaybeNonUtf8Str) -> Ordering {
let mut opts = CollatorOptions::default();
opts.alternate_handling = Some(AlternateHandling::Shifted); let _ = try_init_collator(opts);
locale_cmp(a, b)
}
fn index_with_locale(
left: &MaybeNonUtf8Str,
right: &MaybeNonUtf8Str,
encoding: UEncoding,
) -> usize {
match encoding {
UEncoding::Utf8 => {
left.iter_char_bytes()
.position(|ch_h| right.iter_char_bytes().any(|ch_n| ch_n == ch_h))
.map_or(0, |idx| idx + 1)
}
UEncoding::Ascii => {
left.iter()
.position(|ch_h| right.iter().any(|ch_n| ch_n == ch_h))
.map_or(0, |idx| idx + 1)
}
}
}
pub(crate) fn locale_aware_index(left: &MaybeNonUtf8Str, right: &MaybeNonUtf8Str) -> usize {
index_with_locale(left, right, get_locale_encoding())
}
pub(crate) fn locale_aware_length(input: &MaybeNonUtf8Str) -> usize {
match get_locale_encoding() {
UEncoding::Utf8 => std::str::from_utf8(input).map_or(input.len(), |s| s.chars().count()),
UEncoding::Ascii => input.len(),
}
}
fn substr_with_locale(
s: MaybeNonUtf8String,
pos: usize,
len: usize,
encoding: UEncoding,
) -> MaybeNonUtf8String {
match encoding {
UEncoding::Utf8 => {
let mut string = MaybeNonUtf8String::with_capacity(len);
let mut buf = [0; 4];
for cb in s.iter_char_bytes().skip(pos).take(len) {
match cb {
CharByte::Char(c) => {
let len = c.encode_utf8(&mut buf).len();
string.extend(&buf[..len]);
}
CharByte::Byte(b) => string.push(b),
}
}
string
}
UEncoding::Ascii => s.into_iter().skip(pos).take(len).collect(),
}
}
pub(crate) fn locale_aware_substr(
s: MaybeNonUtf8String,
pos: usize,
len: usize,
) -> MaybeNonUtf8String {
substr_with_locale(s, pos, len, get_locale_encoding())
}