use std::sync::{atomic::{AtomicUsize, Ordering}, Arc};
use polars::prelude::{AnyValue, TimeUnit};
use quick_xml::{events::{attributes::{Attribute, Attributes}, Event}, name::QName};
use rayon::prelude::*;
use crate::{cellvalue::{Cell,CellFormat}, wb::DateSystem};
const XL_MAX_COL: u32 = 16384;
const XL_MIN_COL: u32 = 1;
#[inline]
pub fn get_attribute(attrs: Attributes, which: &[u8]) -> Option<String> {
for attr in attrs {
let a = attr.unwrap();
if a.key == QName(which) {
return Some(attr_value(&a))
}
}
None
}
#[inline]
pub fn attr_value(a: &Attribute) -> String {
String::from_utf8(a.value.to_vec()).unwrap()
}
pub fn used_area(used_area_range: &str) -> (u32, u32) { let mut end: isize = -1;
for (i, c) in used_area_range.chars().enumerate() {
if c == ':' { end = i as isize; break }
}
if end == -1 {
(0, 0)
} else {
let end_range = &used_area_range[end as usize..];
let mut end = 0;
for (i, c) in end_range[1..].chars().enumerate() {
if !c.is_ascii_alphabetic() {
end = i + 1;
break
}
}
let col = col2num(&end_range[1..end]).unwrap();
let row: u32 = end_range[end..].parse().unwrap();
(row, col)
}
}
#[inline]
pub fn col2num(letter: &str) -> Option<u32> {
let letter = letter.to_uppercase();
let mut num: u32 = 0;
for c in letter.chars() {
if !('A'..='Z').contains(&c) { return None }
num = num * 26 + ((c as u32) - ('A' as u32)) + 1;
}
if !(XL_MIN_COL..=XL_MAX_COL).contains(&num) { return None }
Some(num)
}
pub fn reference2pos(reference: &str) -> Result<(usize, usize), String> {
let mut col: usize = 0;
let mut row: usize = 0;
let mut iter = reference.chars();
while let Some(c) = iter.next() {
if c.is_ascii_alphabetic() {
let col_value = c.to_ascii_uppercase() as usize - 'A' as usize + 1;
if col.checked_mul(26).and_then(|x| x.checked_add(col_value)).is_none() {
return Err("Column index overflow".to_string());
}
col = col * 26 + col_value;
} else {
let row_value = c.to_digit(10).unwrap() as usize;
if row.checked_mul(10).and_then(|x| x.checked_add(row_value)).is_none() {
return Err("Row index overflow".to_string());
}
row = row * 10 + row_value;
}
}
Ok((row, col))
}
pub const ROW_END: &str = "</row>";
pub struct XmlChunker<'a> {
data: &'a[u8],
cursor: AtomicUsize,
chunk_size: usize,
chunk_end:&'a str,
}
impl <'a>XmlChunker<'a> {
pub fn new(data: &'a [u8],chunks:usize, chunk_end:&'a str) -> Self {
Self {
data,
cursor: AtomicUsize::new(0),
chunk_size:data.len()/chunks,
chunk_end,
}
}
fn next_chunk(&self) -> Option<String> {
let start = self.cursor.load(Ordering::Acquire);
if start >= self.data.len() {
return None;
}
let end = (start + self.chunk_size).min(self.data.len());
let mut buffer = String::with_capacity(self.chunk_size * 2);
let mut stack = 0;
let mut last_end = start;
for i in (start..end).rev(){
if i> start{
if i <self.chunk_end.len() {
return None;
}
let window = &self.data[i-self.chunk_end.len()..i];
if window == self.chunk_end.as_bytes() {
last_end = i;
break;
}
}else{
return None;
}
}
if last_end > start {
buffer.push_str(
std::str::from_utf8(&self.data[start..last_end])
.unwrap_or_default(),
);
self.cursor.store(last_end, Ordering::Release);
Some(buffer)
} else {
None
}
}
pub fn chunks(&self) ->Vec<String>{
let mut chunks = Vec::new();
while let Some(chunk) = self.next_chunk() {
chunks.push(chunk);
}
chunks
}
}
pub fn get_cell_data_from_chunk<'a>(
chunk: &str,
share_strings:Arc<Vec<String>>,
styles: Arc<Vec<CellFormat>>,
date_system: Arc<DateSystem>
) -> Option<Vec<Cell<'a>>> {
let mut cells: Vec<Cell<'a>> = Vec::with_capacity(2000);
let mut reader = quick_xml::Reader::from_str(chunk);
let mut buf = Vec::with_capacity(1024);
let mut in_value = false;
let mut is_string = false;
let mut is_inlinstring = false;
let mut each_col_count =0;
let mut cell_row = 0;
let mut cell_col = 0;
let mut cell_format = CellFormat::Number;
let mut cell_value = AnyValue::Null;
loop {
match reader.read_event_into(&mut buf) {
Ok(Event::Start(ref e)) if e.name().as_ref() == b"c" => {
e.attributes().for_each(|a| {
let a = a.unwrap();
match a.key.as_ref() {
b"r" => {
let reference = attr_value(&a);
if let Ok((row, col)) = reference2pos(reference.as_ref()) {
cell_row = row;
cell_col = col;
}
}
b"t" => {
let t = attr_value(&a);
if t == "s" || t == "str" || t == "inlineStr" {
is_string = true;
}
if t =="inlineStr"{
is_inlinstring = true;
}
}
b"s" => {
let s = attr_value(&a);
cell_format = *styles.get(s.parse::<usize>().unwrap()).unwrap();
}
_ => {}
}
});
}
Ok(Event::End(ref e)) if e.name().as_ref() == b"c" => {
}
Ok(Event::End(ref e)) if e.name().as_ref() == b"row" => {
}
Ok(Event::Start(ref e)) if e.name().as_ref() == b"v"||e.name().as_ref() == b"t" => {
in_value = true;
}
Ok(Event::Text(ref e)) if in_value => {
in_value = false;
let raw_value = &e.unescape().unwrap()[..];
if is_string {
if is_inlinstring{
cell_value = AnyValue::StringOwned(raw_value.to_string().into());
cells.push(Cell{value:cell_value,pos:(cell_row,cell_col)});
}else{
let s = share_strings.get(raw_value.parse::<usize>().unwrap())
.map(|x| x.clone())
.unwrap_or_default();
cell_value = AnyValue::StringOwned(s.into());
cells.push(Cell{value:cell_value,pos:(cell_row,cell_col)});
}
is_string = false;
is_inlinstring = false;
} else {
let num = raw_value.parse::<f64>().unwrap();
match cell_format {
CellFormat::Number => {
cell_value = AnyValue::Float64(num);
cells.push(Cell{value:cell_value,pos:(cell_row,cell_col)});
}
CellFormat::DateTime => {
let gap_days = match *date_system {
DateSystem::V1900 => 25569,
DateSystem::V1904 => 24109,
};
cell_value = AnyValue::Date(num as i32 - gap_days);
cells.push(Cell{value:cell_value,pos:(cell_row,cell_col)});
}
CellFormat::TimeDelta => {
let gap_days = match *date_system {
DateSystem::V1900 => 25569,
DateSystem::V1904 => 24109,
};
let milliseconds = ((num - gap_days as f64) * 86400000.0) as i64;
cell_value = AnyValue::Datetime(milliseconds, TimeUnit::Milliseconds, None);
cells.push(Cell{value:cell_value,pos:(cell_row,cell_col)});
}
}
cell_format = CellFormat::Number;
}
}
Ok(Event::Eof) => break,
Err(e) => panic!("Error at position {}: {:?}", reader.buffer_position(), e),
_ => (),
}
buf.clear();
}
Some(cells)
}