mod buckets;
pub mod buffer_management;
mod inducing;
mod text_analysis;
mod util;
use crate::{Character, IndexStorage};
use buffer_management::{BufferConfig, BufferRequestMode, BufferStack, Buffers};
use num_traits::NumCast;
use std::cmp;
use text_analysis::TextMetadata;
pub fn suffix_array_induced_sort<C: Character, I: IndexStorage>(
text: &[C],
max_char: C,
main_buffer: &mut [I],
extra_buffers: &mut BufferStack<I>,
) {
assert!(text.len() < <usize as NumCast>::from(I::max_value()).unwrap());
assert!(text.len() < usize::MAX);
if text.is_empty() {
return;
}
let num_buckets = max_char.rank() + 1;
let buffer_config = BufferConfig::calculate::<I>(text.len(), main_buffer.len(), num_buckets);
let Buffers {
remaining_main_buffer_without_persistent_buffers,
is_s_type_buffer,
persistent_bucket_start_indices_buffer,
maybe_working_bucket_indices_buffer,
} = buffer_management::instantiate_or_recover_buffers(
buffer_config,
main_buffer,
extra_buffers,
num_buckets,
BufferRequestMode::Instatiate,
);
let (final_remaining_main_buffer, working_bucket_indices_buffer) =
if buffer_config.working_bucket_buffer_in_main_buffer {
remaining_main_buffer_without_persistent_buffers
.split_at_mut(remaining_main_buffer_without_persistent_buffers.len() - num_buckets)
} else {
(
&mut remaining_main_buffer_without_persistent_buffers[..],
maybe_working_bucket_indices_buffer.unwrap(),
)
};
let suffix_array_buffer = &mut final_remaining_main_buffer[..text.len()];
let text_metadata = text_analysis::scan_for_counts_and_s_l_types(
text,
persistent_bucket_start_indices_buffer,
is_s_type_buffer,
);
buckets::counts_into_bucket_start_indices(persistent_bucket_start_indices_buffer);
let num_lms_chars = buckets::place_text_order_lms_indices_into_buckets(
suffix_array_buffer,
persistent_bucket_start_indices_buffer,
working_bucket_indices_buffer,
text,
&text_metadata,
);
inducing::induce_to_sort_lms_substrings(
suffix_array_buffer,
persistent_bucket_start_indices_buffer,
working_bucket_indices_buffer,
&text_metadata,
text,
);
let (front, _, lms_indices) =
util::split_off_same_front_and_back_mut(suffix_array_buffer, num_lms_chars.as_());
front.copy_from_slice(lms_indices);
let num_different_names = create_reduced_text(
num_lms_chars,
remaining_main_buffer_without_persistent_buffers,
&text_metadata,
text,
);
let (_, first_char_rank) = text_metadata.into_parts();
let (main_buffer_for_recursion, reduced_text) =
remaining_main_buffer_without_persistent_buffers.split_at_mut(
remaining_main_buffer_without_persistent_buffers.len() - num_lms_chars.as_(),
);
buffer_management::setup_for_recursion(buffer_config, extra_buffers);
if num_different_names == num_lms_chars {
directly_construct_suffix_array(reduced_text, main_buffer_for_recursion);
} else {
main_buffer_for_recursion[..num_lms_chars.as_()].fill(I::max_value());
suffix_array_induced_sort(
reduced_text,
num_different_names - I::one(),
main_buffer_for_recursion,
extra_buffers,
);
};
let Buffers {
remaining_main_buffer_without_persistent_buffers,
is_s_type_buffer,
persistent_bucket_start_indices_buffer,
maybe_working_bucket_indices_buffer,
} = buffer_management::instantiate_or_recover_buffers(
buffer_config,
main_buffer,
extra_buffers,
num_buckets,
BufferRequestMode::Recover,
);
let text_metadata =
TextMetadata::from_filled_buffer_and_parts(is_s_type_buffer, first_char_rank);
let (main_buffer_for_recursion, backtransformation_table) =
remaining_main_buffer_without_persistent_buffers.split_at_mut(
remaining_main_buffer_without_persistent_buffers.len() - num_lms_chars.as_(),
);
create_backtransformation_table(backtransformation_table, &text_metadata, text.len());
backtransform_into_original_text_lms_indices(
&mut main_buffer_for_recursion[..num_lms_chars.as_()],
backtransformation_table,
);
let (final_remaining_main_buffer, working_bucket_indices_buffer) =
if buffer_config.working_bucket_buffer_in_main_buffer {
remaining_main_buffer_without_persistent_buffers
.split_at_mut(remaining_main_buffer_without_persistent_buffers.len() - num_buckets)
} else {
(
remaining_main_buffer_without_persistent_buffers,
maybe_working_bucket_indices_buffer.unwrap(),
)
};
let suffix_array_buffer = &mut final_remaining_main_buffer[..text.len()];
buckets::place_sorted_lms_indices_into_buckets(
suffix_array_buffer,
num_lms_chars,
persistent_bucket_start_indices_buffer,
working_bucket_indices_buffer,
text,
);
inducing::induce_to_finalize_suffix_array(
suffix_array_buffer,
persistent_bucket_start_indices_buffer,
working_bucket_indices_buffer,
&text_metadata,
text,
);
buffer_management::clean_up_extra_buffers(buffer_config, extra_buffers);
}
fn create_reduced_text<C: Character, I: IndexStorage>(
num_lms_chars: I,
remaining_main_buffer_without_persistent_buffers: &mut [I],
text_metadata: &TextMetadata<I>,
text: &[C],
) -> I {
if num_lms_chars == I::zero() {
return I::zero();
}
let (sorted_lms_substring_indices, _, reduced_text_placement_buffer) =
util::split_off_front_and_back_mut(
remaining_main_buffer_without_persistent_buffers,
num_lms_chars.as_(),
text.len().div_ceil(2),
);
reduced_text_placement_buffer.fill(I::max_value());
let mut current_name = I::zero();
for index_of_sorted_lms_substring_indices in 0..sorted_lms_substring_indices.len() - 1 {
let curr_lms_substring_index =
sorted_lms_substring_indices[index_of_sorted_lms_substring_indices];
let placement_index = curr_lms_substring_index >> 1;
reduced_text_placement_buffer[placement_index.as_()] = current_name;
let next_lms_substring_index =
sorted_lms_substring_indices[index_of_sorted_lms_substring_indices + 1];
if lms_substrings_are_unequal(
curr_lms_substring_index,
next_lms_substring_index,
text_metadata,
text,
) {
current_name = current_name + I::one();
}
}
let last_placement_index = *sorted_lms_substring_indices.last().unwrap() >> 1;
reduced_text_placement_buffer[last_placement_index.as_()] = current_name;
let mut write_index = reduced_text_placement_buffer.len() - 1;
for read_index in (0..reduced_text_placement_buffer.len()).rev() {
let maybe_lms_substring_name = reduced_text_placement_buffer[read_index];
if maybe_lms_substring_name != I::max_value() {
reduced_text_placement_buffer[write_index] = maybe_lms_substring_name;
write_index -= 1;
}
}
current_name + I::one()
}
fn directly_construct_suffix_array<I: IndexStorage>(
reduced_text: &mut [I],
main_buffer_for_recursion: &mut [I],
) {
for (reduced_text_suffix_index, &reduced_text_char) in reduced_text.iter().enumerate() {
main_buffer_for_recursion[reduced_text_char.as_()] =
<I as NumCast>::from(reduced_text_suffix_index).unwrap();
}
}
fn create_backtransformation_table<I: IndexStorage>(
backtransformation_table: &mut [I],
text_metadata: &TextMetadata<I>,
text_len: usize,
) {
let mut write_index = 0;
for text_index in num::range(I::one(), <I as NumCast>::from(text_len - 1).unwrap()) {
if text_metadata.is_lms_type(text_index) {
backtransformation_table[write_index] = text_index;
write_index += 1;
}
}
}
fn backtransform_into_original_text_lms_indices<I: IndexStorage>(
reduced_text_suffix_array_buffer: &mut [I],
backtransformation_table: &[I],
) {
for reduced_text_index in reduced_text_suffix_array_buffer {
*reduced_text_index = backtransformation_table[reduced_text_index.as_()];
}
}
fn lms_substrings_are_unequal<C: Character, I: IndexStorage>(
first_lms_substring_index: I,
second_lms_substring_index: I,
text_metadata: &TextMetadata<I>,
text: &[C],
) -> bool {
let mut smaller_lms_substring_index =
cmp::min(first_lms_substring_index, second_lms_substring_index);
let mut larger_lms_substring_index =
cmp::max(first_lms_substring_index, second_lms_substring_index);
if text[smaller_lms_substring_index.as_()] != text[larger_lms_substring_index.as_()] {
return true;
}
let text_len = <I as NumCast>::from(text.len()).unwrap();
smaller_lms_substring_index = smaller_lms_substring_index + I::one();
larger_lms_substring_index = larger_lms_substring_index + I::one();
loop {
if text[smaller_lms_substring_index.as_()] != text[larger_lms_substring_index.as_()] {
return true;
}
let first_substring_is_over = text_metadata.is_lms_type(smaller_lms_substring_index);
let second_substring_is_over = text_metadata.is_lms_type(larger_lms_substring_index);
if first_substring_is_over || second_substring_is_over {
return first_substring_is_over != second_substring_is_over;
}
smaller_lms_substring_index = smaller_lms_substring_index + I::one();
larger_lms_substring_index = larger_lms_substring_index + I::one();
if larger_lms_substring_index == text_len {
return true;
}
}
}