1use crate::series::{SeriesTrait, Series};
2use crate::dtype::{DataType, DataTypeTrait};
3use crate::error::{AxionError, AxionResult};
4use super::groupby::GroupBy;
5use std::collections::{HashMap, HashSet};
6use std::fmt::{self, Debug};
7use std::cmp::Ordering;
8use rayon::prelude::*;
9use crate::io::csv::WriteCsvOptions;
10use csv;
11use std::io::Write;
12use std::fs::File;
13use std::path::Path;
14
15#[derive(Clone)]
45pub struct DataFrame {
46 height: usize,
48 pub columns: Vec<Box<dyn SeriesTrait>>,
50 schema: HashMap<String, DataType>,
52}
53
54impl DataFrame {
55 pub fn new(columns: Vec<Box<dyn SeriesTrait>>) -> AxionResult<Self> {
80 let height = columns.first().map_or(0, |col| col.len());
81 let mut schema = HashMap::with_capacity(columns.len());
82
83 for col in &columns {
84 if col.len() != height {
85 return Err(AxionError::MismatchedLengths {
86 expected: height,
87 found: col.len(),
88 name: col.name().to_string(),
89 });
90 }
91 if schema.insert(col.name().to_string(), col.dtype()).is_some() {
92 return Err(AxionError::DuplicateColumnName(col.name().to_string()));
93 }
94 }
95
96 Ok(DataFrame { height, columns, schema })
97 }
98
99 pub fn new_empty() -> Self {
112 DataFrame {
113 height: 0,
114 columns: Vec::new(),
115 schema: HashMap::new(),
116 }
117 }
118
119 pub fn shape(&self) -> (usize, usize) {
132 (self.height, self.columns.len())
133 }
134
135 pub fn height(&self) -> usize {
137 self.height
138 }
139
140 pub fn width(&self) -> usize {
142 self.columns.len()
143 }
144
145 pub fn columns_names(&self) -> Vec<&str> {
151 self.columns.iter().map(|col| col.name()).collect()
152 }
153
154 pub fn dtypes(&self) -> Vec<DataType> {
160 self.columns.iter().map(|col| col.dtype()).collect()
161 }
162
163 pub fn schema(&self) -> &HashMap<String, DataType> {
165 &self.schema
166 }
167
168 pub fn column(&self, name: &str) -> AxionResult<&dyn SeriesTrait> {
178 self.columns
179 .iter()
180 .find(|col| col.name() == name)
181 .map(|col| col.as_ref())
182 .ok_or_else(|| AxionError::ColumnNotFound(name.to_string()))
183 }
184
185 pub fn column_mut<'a>(&'a mut self, name: &str) -> AxionResult<&'a mut dyn SeriesTrait> {
187 self.columns
188 .iter_mut()
189 .find(|col| col.name() == name)
190 .map(|col| col.as_mut() as &mut dyn SeriesTrait)
191 .ok_or_else(|| AxionError::ColumnNotFound(name.to_string()))
192 }
193
194 pub fn column_at(&self, index: usize) -> AxionResult<&dyn SeriesTrait> {
200 self.columns
201 .get(index)
202 .map(|col| col.as_ref())
203 .ok_or_else(|| AxionError::ColumnNotFound(format!("index {}", index)))
204 }
205
206 pub fn column_at_mut(&mut self, index: usize) -> AxionResult<&mut dyn SeriesTrait> {
208 self.columns
209 .get_mut(index)
210 .map(|col| col.as_mut() as &mut dyn SeriesTrait)
211 .ok_or_else(|| AxionError::ColumnNotFound(format!("index {}", index)))
212 }
213
214 pub fn add_column(&mut self, series: Box<dyn SeriesTrait>) -> AxionResult<()> {
232 if self.columns.is_empty() && self.height == 0 {
233 self.height = series.len();
234 } else if series.len() != self.height {
235 return Err(AxionError::MismatchedLengths {
236 expected: self.height,
237 found: series.len(),
238 name: series.name().to_string(),
239 });
240 }
241
242 if self.schema.contains_key(series.name()) {
243 return Err(AxionError::DuplicateColumnName(series.name().to_string()));
244 }
245
246 self.schema.insert(series.name().to_string(), series.dtype());
247 self.columns.push(series);
248 Ok(())
249 }
250
251 pub fn drop_column(&mut self, name: &str) -> AxionResult<Box<dyn SeriesTrait>> {
265 let position = self.columns.iter().position(|col| col.name() == name);
266
267 if let Some(pos) = position {
268 self.schema.remove(name);
269 let removed_col = self.columns.remove(pos);
270 if self.columns.is_empty() {
271 self.height = 0;
272 }
273 Ok(removed_col)
274 } else {
275 Err(AxionError::ColumnNotFound(name.to_string()))
276 }
277 }
278
279 pub fn rename_column(&mut self, old_name: &str, new_name: &str) -> AxionResult<()> {
291 if old_name == new_name {
292 return Ok(());
293 }
294
295 if !self.schema.contains_key(old_name) {
296 return Err(AxionError::ColumnNotFound(old_name.to_string()));
297 }
298
299 if self.schema.contains_key(new_name) {
300 return Err(AxionError::DuplicateColumnName(new_name.to_string()));
301 }
302
303 let dtype = self.schema.remove(old_name).unwrap();
304 self.schema.insert(new_name.to_string(), dtype);
305
306 for col in self.columns.iter_mut() {
307 if col.name() == old_name {
308 col.rename(new_name);
309 break;
310 }
311 }
312 Ok(())
313 }
314
315 pub fn downcast_column<T>(&self, name: &str) -> AxionResult<&Series<T>>
329 where
330 T: DataTypeTrait + 'static,
331 Series<T>: 'static,
332 {
333 let series_trait = self.column(name)?;
334 series_trait
335 .as_any()
336 .downcast_ref::<Series<T>>()
337 .ok_or_else(|| AxionError::TypeMismatch {
338 expected: T::DTYPE,
339 found: series_trait.dtype(),
340 name: name.to_string(),
341 })
342 }
343
344 pub fn is_empty(&self) -> bool {
350 self.height == 0 || self.columns.is_empty()
351 }
352
353 pub fn head(&self, n: usize) -> DataFrame {
363 let n = std::cmp::min(n, self.height);
364 if n == self.height {
365 return self.clone();
366 }
367 let new_columns = self.columns.iter().map(|col| col.slice(0, n)).collect();
368 DataFrame::new(new_columns).unwrap_or_else(|_| {
369 DataFrame::new(vec![]).unwrap()
370 })
371 }
372
373 pub fn tail(&self, n: usize) -> DataFrame {
379 let n = std::cmp::min(n, self.height);
380 if n == self.height {
381 return self.clone();
382 }
383 let offset = self.height - n;
384 let new_columns = self.columns.iter().map(|col| col.slice(offset, n)).collect();
385 DataFrame::new(new_columns).unwrap_or_else(|_| {
386 DataFrame::new(vec![]).unwrap()
387 })
388 }
389
390 pub fn select(&self, names: &[&str]) -> AxionResult<DataFrame> {
400 let mut new_columns = Vec::with_capacity(names.len());
401 for name in names {
402 let col = self.column(name)?;
403 new_columns.push(col.clone_box());
404 }
405 DataFrame::new(new_columns)
406 }
407
408 pub fn drop(&self, name_to_drop: &str) -> AxionResult<DataFrame> {
414 if !self.schema.contains_key(name_to_drop) {
415 return Err(AxionError::ColumnNotFound(name_to_drop.to_string()));
416 }
417
418 let new_columns = self.columns
419 .iter()
420 .filter(|col| col.name() != name_to_drop)
421 .map(|col| col.clone_box())
422 .collect();
423
424 DataFrame::new(new_columns)
425 }
426
427 pub fn filter(&self, mask: &Series<bool>) -> AxionResult<DataFrame> {
441 if mask.len() != self.height {
442 return Err(AxionError::MismatchedLengths {
443 expected: self.height,
444 found: mask.len(),
445 name: "过滤掩码".to_string(),
446 });
447 }
448
449 let mut filtered_columns = Vec::with_capacity(self.columns.len());
450 for col in &self.columns {
451 let filtered_col = col.filter(mask)?;
452 filtered_columns.push(filtered_col);
453 }
454
455 DataFrame::new(filtered_columns)
456 }
457
458 pub fn par_filter(&self, mask: &Series<bool>) -> AxionResult<DataFrame> {
471 if mask.len() != self.height {
472 return Err(AxionError::MismatchedLengths {
473 expected: self.height,
474 found: mask.len(),
475 name: "过滤掩码".to_string(),
476 });
477 }
478 if self.is_empty() {
479 return Ok(self.clone());
480 }
481 if mask.is_empty() && self.height > 0 {
482 return Err(AxionError::MismatchedLengths {
483 expected: self.height,
484 found: mask.len(),
485 name: "非空DataFrame的过滤掩码".to_string(),
486 });
487 }
488 if mask.is_empty() && self.height == 0 {
489 return Ok(self.clone());
490 }
491
492 let new_columns_results: Vec<AxionResult<Box<dyn SeriesTrait>>> = self
493 .columns
494 .par_iter()
495 .map(|col| col.filter(mask))
496 .collect();
497
498 let mut new_columns = Vec::with_capacity(new_columns_results.len());
499 for result in new_columns_results {
500 new_columns.push(result?);
501 }
502
503 DataFrame::new(new_columns)
504 }
505
506 pub fn inner_join(
520 &self,
521 right: &DataFrame,
522 left_on: &str,
523 right_on: &str,
524 ) -> AxionResult<DataFrame> {
525 let left_key_col: &Series<String> = self.downcast_column(left_on).map_err(|e| match e {
526 AxionError::ColumnNotFound(_) => AxionError::ColumnNotFound(format!("左侧连接键列 '{}'", left_on)),
527 AxionError::TypeMismatch { expected: _, found, name } => AxionError::JoinKeyTypeError {
528 side: "左侧".to_string(),
529 name,
530 expected: DataType::String,
531 found,
532 },
533 other => other,
534 })?;
535 let right_key_col: &Series<String> = right.downcast_column(right_on).map_err(|e| match e {
536 AxionError::ColumnNotFound(_) => AxionError::ColumnNotFound(format!("右侧连接键列 '{}'", right_on)),
537 AxionError::TypeMismatch { expected: _, found, name } => AxionError::JoinKeyTypeError {
538 side: "右侧".to_string(),
539 name,
540 expected: DataType::String,
541 found,
542 },
543 other => other,
544 })?;
545
546 let mut right_indices_map: HashMap<&Option<String>, Vec<usize>> = HashMap::new();
547 for (idx, opt_key) in right_key_col.data_internal().iter().enumerate() {
548 right_indices_map.entry(opt_key).or_default().push(idx);
549 }
550
551 let mut join_indices: Vec<(usize, usize)> = Vec::new();
552 for (left_idx, left_opt_key) in left_key_col.data_internal().iter().enumerate() {
553 if let Some(right_indices) = right_indices_map.get(left_opt_key) {
554 for &right_idx in right_indices {
555 join_indices.push((left_idx, right_idx));
556 }
557 }
558 }
559
560 let (left_result_indices, right_result_indices): (Vec<usize>, Vec<usize>) =
561 join_indices.into_iter().unzip();
562
563 let mut result_columns: Vec<Box<dyn SeriesTrait>> =
564 Vec::with_capacity(self.width() + right.width() - 1);
565 let mut left_column_names: HashSet<String> = HashSet::with_capacity(self.width());
566
567 for col in &self.columns {
568 let taken_left_col = col.take_indices(&left_result_indices)?;
569 left_column_names.insert(taken_left_col.name().to_string());
570 result_columns.push(taken_left_col);
571 }
572
573 for col in &right.columns {
574 if col.name() != right_on {
575 let original_right_name = col.name();
576 let mut taken_right_col = col.take_indices(&right_result_indices)?;
577
578 if left_column_names.contains(original_right_name) {
579 let new_name = format!("{}_right", original_right_name);
580 taken_right_col.rename(&new_name);
581 result_columns.push(taken_right_col);
582 } else {
583 result_columns.push(taken_right_col);
584 }
585 }
586 }
587
588 DataFrame::new(result_columns)
589 }
590
591 pub fn left_join(
595 &self,
596 right: &DataFrame,
597 left_on: &str,
598 right_on: &str,
599 ) -> AxionResult<DataFrame> {
600 let left_key_col: &Series<String> = self
601 .downcast_column(left_on)
602 .map_err(|e| match e {
603 AxionError::ColumnNotFound(_) => AxionError::ColumnNotFound(format!("left key column '{}'", left_on)),
604 AxionError::TypeMismatch { expected: _, found, name } => AxionError::JoinKeyTypeError {
605 side: "left".to_string(), name, expected: DataType::String, found,
606 },
607 other => other,
608 })?;
609
610 let right_key_col: &Series<String> = right
611 .downcast_column(right_on)
612 .map_err(|e| match e {
613 AxionError::ColumnNotFound(_) => AxionError::ColumnNotFound(format!("right key column '{}'", right_on)),
614 AxionError::TypeMismatch { expected: _, found, name } => AxionError::JoinKeyTypeError {
615 side: "right".to_string(), name, expected: DataType::String, found,
616 },
617 other => other,
618 })?;
619
620 let mut right_indices_map: HashMap<&Option<String>, Vec<usize>> = HashMap::new();
621 for (idx, opt_key) in right_key_col.data_internal().iter().enumerate() {
622 right_indices_map.entry(opt_key).or_default().push(idx);
623 }
624
625 let mut join_indices: Vec<(usize, Option<usize>)> = Vec::new();
626 for (left_idx, left_opt_key) in left_key_col.data_internal().iter().enumerate() {
627 if let Some(right_indices) = right_indices_map.get(left_opt_key) {
628 for &right_idx in right_indices {
629 join_indices.push((left_idx, Some(right_idx)));
630 }
631 } else {
632 join_indices.push((left_idx, None));
633 }
634 }
635
636 let (left_result_indices, right_result_indices): (Vec<usize>, Vec<Option<usize>>) =
637 join_indices.into_iter().unzip();
638
639 let mut result_columns: Vec<Box<dyn SeriesTrait>> =
640 Vec::with_capacity(self.width() + right.width() - 1);
641 let mut left_column_names: HashSet<String> = HashSet::with_capacity(self.width());
642
643 for col in &self.columns {
644 let taken_left_col = col.take_indices(&left_result_indices)?;
645 left_column_names.insert(taken_left_col.name().to_string());
646 result_columns.push(taken_left_col);
647 }
648
649 for col in &right.columns {
650 if col.name() != right_on {
651 let original_right_name = col.name();
652 let mut taken_right_col = col.take_indices_option(&right_result_indices)?;
653
654 if left_column_names.contains(original_right_name) {
655 let new_name = format!("{}_right", original_right_name);
656 taken_right_col.rename(&new_name);
657 result_columns.push(taken_right_col);
658 } else {
659 result_columns.push(taken_right_col);
660 }
661 }
662 }
663
664 DataFrame::new(result_columns)
665 }
666
667 pub fn right_join(
671 &self,
672 right: &DataFrame,
673 left_on: &str,
674 right_on: &str,
675 ) -> AxionResult<DataFrame> {
676 let left_key_col: &Series<String> = self
677 .downcast_column(left_on)
678 .map_err(|e| match e {
679 AxionError::ColumnNotFound(_) => AxionError::ColumnNotFound(format!("left key column '{}'", left_on)),
680 AxionError::TypeMismatch { expected: _, found, name } => AxionError::JoinKeyTypeError {
681 side: "left".to_string(), name, expected: DataType::String, found,
682 },
683 other => other,
684 })?;
685
686 let right_key_col: &Series<String> = right
687 .downcast_column(right_on)
688 .map_err(|e| match e {
689 AxionError::ColumnNotFound(_) => AxionError::ColumnNotFound(format!("right key column '{}'", right_on)),
690 AxionError::TypeMismatch { expected: _, found, name } => AxionError::JoinKeyTypeError {
691 side: "right".to_string(), name, expected: DataType::String, found,
692 },
693 other => other,
694 })?;
695
696 let mut left_indices_map: HashMap<&Option<String>, Vec<usize>> = HashMap::new();
697 for (idx, opt_key) in left_key_col.data_internal().iter().enumerate() {
698 left_indices_map.entry(opt_key).or_default().push(idx);
699 }
700
701 let mut join_indices: Vec<(Option<usize>, usize)> = Vec::new();
702 for (right_idx, right_opt_key) in right_key_col.data_internal().iter().enumerate() {
703 if let Some(left_indices) = left_indices_map.get(right_opt_key) {
704 for &left_idx in left_indices {
705 join_indices.push((Some(left_idx), right_idx));
706 }
707 } else {
708 join_indices.push((None, right_idx));
709 }
710 }
711
712 let (left_result_indices, right_result_indices): (Vec<Option<usize>>, Vec<usize>) =
713 join_indices.into_iter().unzip();
714
715 let mut result_columns: Vec<Box<dyn SeriesTrait>> =
716 Vec::with_capacity(self.width() + right.width() - 1);
717 let mut right_column_names: HashSet<String> = HashSet::with_capacity(right.width());
718
719 for col in &right.columns {
720 let taken_right_col = col.take_indices(&right_result_indices)?;
721 right_column_names.insert(taken_right_col.name().to_string());
722 result_columns.push(taken_right_col);
723 }
724
725 for col in &self.columns {
726 if col.name() != left_on {
727 let original_left_name = col.name();
728 let mut taken_left_col = col.take_indices_option(&left_result_indices)?;
729
730 if right_column_names.contains(original_left_name) {
731 let new_name = format!("{}_left", original_left_name);
732 taken_left_col.rename(&new_name);
733 result_columns.push(taken_left_col);
734 } else {
735 result_columns.push(taken_left_col);
736 }
737 }
738 }
739
740 DataFrame::new(result_columns)
741 }
742
743 pub fn outer_join(
747 &self,
748 right: &DataFrame,
749 left_on: &str,
750 right_on: &str,
751 ) -> AxionResult<DataFrame> {
752 let left_key_col: &Series<String> = self
753 .downcast_column(left_on)
754 .map_err(|e| match e {
755 AxionError::ColumnNotFound(_) => AxionError::ColumnNotFound(format!("left key column '{}'", left_on)),
756 AxionError::TypeMismatch { expected: _, found, name } => AxionError::JoinKeyTypeError {
757 side: "left".to_string(),
758 name,
759 expected: DataType::String,
760 found,
761 },
762 other => other,
763 })?;
764 let right_key_col: &Series<String> = right
765 .downcast_column(right_on)
766 .map_err(|e| match e {
767 AxionError::ColumnNotFound(_) => AxionError::ColumnNotFound(format!("right key column '{}'", right_on)),
768 AxionError::TypeMismatch { expected: _, found, name } => AxionError::JoinKeyTypeError {
769 side: "right".to_string(),
770 name,
771 expected: DataType::String,
772 found,
773 },
774 other => other,
775 })?;
776
777 let mut right_indices_map: HashMap<&Option<String>, Vec<usize>> = HashMap::new();
778 for (idx, opt_key) in right_key_col.data_internal().iter().enumerate() {
779 right_indices_map.entry(opt_key).or_default().push(idx);
780 }
781
782 let mut join_indices: Vec<(Option<usize>, Option<usize>)> = Vec::new();
783 let mut used_right_indices: HashSet<usize> = HashSet::new();
784
785 for (left_idx, left_opt_key) in left_key_col.data_internal().iter().enumerate() {
786 if let Some(right_indices) = right_indices_map.get(left_opt_key) {
787 for &right_idx in right_indices {
788 join_indices.push((Some(left_idx), Some(right_idx)));
789 used_right_indices.insert(right_idx);
790 }
791 } else {
792 join_indices.push((Some(left_idx), None));
793 }
794 }
795
796 for (right_idx, _right_opt_key) in right_key_col.data_internal().iter().enumerate() {
797 if !used_right_indices.contains(&right_idx) {
798 join_indices.push((None, Some(right_idx)));
799 }
800 }
801
802 let (left_result_indices, right_result_indices): (Vec<Option<usize>>, Vec<Option<usize>>) =
803 join_indices.into_iter().unzip();
804
805 let mut result_columns: Vec<Box<dyn SeriesTrait>> =
806 Vec::with_capacity(self.width() + right.width() - 1);
807 let mut left_column_names: HashSet<String> = HashSet::with_capacity(self.width());
808
809 for col in &self.columns {
810 let taken_left_col = col.take_indices_option(&left_result_indices)?;
811 left_column_names.insert(taken_left_col.name().to_string());
812 result_columns.push(taken_left_col);
813 }
814
815 for col in &right.columns {
816 if col.name() != right_on {
817 let original_right_name = col.name();
818 let mut taken_right_col = col.take_indices_option(&right_result_indices)?;
819
820 if left_column_names.contains(original_right_name) {
821 let new_name = format!("{}_right", original_right_name);
822 taken_right_col.rename(&new_name);
823 result_columns.push(taken_right_col);
824 } else {
825 result_columns.push(taken_right_col);
826 }
827 }
828 }
829
830 DataFrame::new(result_columns)
831 }
832
833 pub fn groupby<'a>(&'a self, keys: &[&str]) -> AxionResult<GroupBy<'a>> {
850 let key_strings: Vec<String> = keys.iter().map(|s| s.to_string()).collect();
851 GroupBy::new(self, key_strings)
852 }
853
854 pub fn sort(&self, by: &[&str], descending: &[bool]) -> AxionResult<DataFrame> {
877 if by.is_empty() {
878 return Ok(self.clone());
879 }
880 if by.len() != descending.len() {
881 return Err(AxionError::InvalidArgument(
882 "排序键数量和降序标志数量必须匹配".to_string(),
883 ));
884 }
885
886 let mut sort_key_columns: Vec<&dyn SeriesTrait> = Vec::with_capacity(by.len());
887 for key_name in by {
888 let col = self.column(key_name)?;
889 if let DataType::List(_) = col.dtype() {
890 return Err(AxionError::UnsupportedOperation(format!(
891 "列 '{}' 的 List 类型不支持排序", key_name
892 )));
893 }
894 sort_key_columns.push(col);
895 }
896
897 let height = self.height();
898 let mut indices: Vec<usize> = (0..height).collect();
899
900 indices.sort_unstable_by(|&a_idx, &b_idx| {
901 for (i, key_col) in sort_key_columns.iter().enumerate() {
902 let order = key_col.compare_row(a_idx, b_idx);
903 let current_order = if descending[i] { order.reverse() } else { order };
904
905 if current_order != Ordering::Equal {
906 return current_order;
907 }
908 }
909 Ordering::Equal
910 });
911
912 let mut sorted_columns: Vec<Box<dyn SeriesTrait>> = Vec::with_capacity(self.columns.len());
913 for col in &self.columns {
914 let sorted_col = col.take_indices(&indices)?;
915 sorted_columns.push(sorted_col);
916 }
917
918 DataFrame::new(sorted_columns)
919 }
920
921 pub fn to_csv(&self, filepath: impl AsRef<Path>, options: Option<WriteCsvOptions>) -> AxionResult<()> {
949 let path_ref = filepath.as_ref();
950 let mut file_writer = File::create(path_ref)
951 .map_err(|e| AxionError::IoError(format!("无法创建或打开文件 {:?}: {}", path_ref, e)))?;
952 self.to_csv_writer(&mut file_writer, options)
953 }
954
955 pub fn to_csv_writer<W: Write>(&self, writer: &mut W, options: Option<WriteCsvOptions>) -> AxionResult<()> {
964 let opts = options.unwrap_or_default();
965
966 let mut csv_builder = csv::WriterBuilder::new();
967 csv_builder.delimiter(opts.delimiter);
968
969 csv_builder.quote_style(match opts.quote_style {
970 crate::io::csv::QuoteStyle::Always => csv::QuoteStyle::Always,
971 crate::io::csv::QuoteStyle::Necessary => csv::QuoteStyle::Necessary,
972 crate::io::csv::QuoteStyle::Never => csv::QuoteStyle::Never,
973 crate::io::csv::QuoteStyle::NonNumeric => csv::QuoteStyle::NonNumeric,
974 });
975
976 if opts.line_terminator == "\r\n" {
977 csv_builder.terminator(csv::Terminator::CRLF);
978 } else if opts.line_terminator == "\n" {
979 csv_builder.terminator(csv::Terminator::Any(b'\n'));
980 } else if opts.line_terminator.len() == 1 {
981 csv_builder.terminator(csv::Terminator::Any(opts.line_terminator.as_bytes()[0]));
982 } else {
983 return Err(AxionError::CsvError(format!(
984 "不支持的行终止符: {:?}",
985 opts.line_terminator
986 )));
987 }
988
989 let mut csv_writer = csv_builder.from_writer(writer);
990
991 if opts.has_header && self.width() > 0 {
992 if let Err(e) = csv_writer.write_record(self.columns_names()) {
993 return Err(AxionError::from(e));
994 }
995 }
996
997 if self.width() > 0 {
998 let mut record_buffer: Vec<String> = Vec::with_capacity(self.width());
999 for row_idx in 0..self.height() {
1000 record_buffer.clear();
1001 for col_idx in 0..self.width() {
1002 let series = self.column_at(col_idx)?;
1003 let value_to_write: String;
1004
1005 if series.is_null_at(row_idx) {
1006 value_to_write = opts.na_rep.clone();
1007 } else {
1008 match series.get_str(row_idx) {
1009 Some(s_val) => {
1010 value_to_write = s_val;
1011 }
1012 None => {
1013 return Err(AxionError::InternalError(format!(
1014 "无法获取位置 ({}, {}) 的字符串表示,列名: '{}'",
1015 row_idx, col_idx, series.name()
1016 )));
1017 }
1018 }
1019 }
1020 record_buffer.push(value_to_write);
1021 }
1022 if let Err(e) = csv_writer.write_record(&record_buffer) {
1023 return Err(AxionError::from(e));
1024 }
1025 }
1026 }
1027
1028 if let Err(e) = csv_writer.flush() {
1029 return Err(AxionError::from(e));
1030 }
1031
1032 Ok(())
1033 }
1034}
1035
1036impl PartialEq for DataFrame {
1037 fn eq(&self, other: &Self) -> bool {
1038 if self.shape() != other.shape() {
1039 return false;
1040 }
1041
1042 if self.columns_names() != other.columns_names() {
1043 return false;
1044 }
1045
1046 for col_name in self.columns_names() {
1047 let self_col = self.column(col_name).unwrap();
1048 let other_col = other.column(col_name).unwrap();
1049
1050 if format!("{:?}", self_col) != format!("{:?}", other_col) {
1051 return false;
1052 }
1053 }
1054
1055 true
1056 }
1057}
1058
1059impl Debug for DataFrame {
1060 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1061 f.debug_struct("DataFrame")
1062 .field("height", &self.height)
1063 .field("columns_count", &self.columns.len())
1064 .field("schema", &self.schema)
1065 .finish()
1066 }
1067}
1068
1069impl fmt::Display for DataFrame {
1070 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1071 if self.is_empty() {
1072 return writeln!(f, "DataFrame (0x0)");
1073 }
1074
1075 const MAX_ROWS_TO_PRINT: usize = 10;
1076 const MIN_COL_WIDTH: usize = 5;
1077 const NULL_STR: &str = "null";
1078
1079 let height = self.height();
1080 let width = self.width();
1081 let num_rows_to_print = std::cmp::min(height, MAX_ROWS_TO_PRINT);
1082
1083 let col_names = self.columns_names();
1084 let dtypes: Vec<String> = self.dtypes().iter().map(|dt| format!("{:?}", dt)).collect();
1085
1086 let mut col_widths: Vec<usize> = Vec::with_capacity(width);
1087 for i in 0..width {
1088 let name_len = col_names[i].len();
1089 let type_len = dtypes[i].len();
1090 let mut max_data_len = MIN_COL_WIDTH;
1091
1092 for row_idx in 0..num_rows_to_print {
1093 if let Some(val_str) = self.columns[i].get_str(row_idx) {
1094 max_data_len = std::cmp::max(max_data_len, val_str.len());
1095 } else {
1096 max_data_len = std::cmp::max(max_data_len, NULL_STR.len());
1097 }
1098 }
1099 col_widths.push(std::cmp::max(MIN_COL_WIDTH, std::cmp::max(name_len, std::cmp::max(type_len, max_data_len))));
1100 }
1101
1102 write!(f, "+")?;
1103 for w in &col_widths { write!(f, "{:-<width$}+", "", width = w + 2)?; }
1104 writeln!(f)?;
1105
1106 write!(f, "|")?;
1107 for (i, name) in col_names.iter().enumerate() {
1108 write!(f, " {:<width$} |", name, width = col_widths[i])?;
1109 }
1110 writeln!(f)?;
1111
1112 write!(f, "|")?;
1113 for w in &col_widths { write!(f, "{:-<width$}|", "", width = w + 2)?; }
1114 writeln!(f)?;
1115
1116 write!(f, "|")?;
1117 for (i, dtype_str) in dtypes.iter().enumerate() {
1118 write!(f, " {:<width$} |", dtype_str, width = col_widths[i])?;
1119 }
1120 writeln!(f)?;
1121
1122 write!(f, "+")?;
1123 for w in &col_widths { write!(f, "{:=<width$}+", "", width = w + 2)?; }
1124 writeln!(f)?;
1125
1126 for row_idx in 0..num_rows_to_print {
1127 write!(f, "|")?;
1128 for (col_idx, col) in self.columns.iter().enumerate() {
1129 let val_str = col.get_str(row_idx).unwrap_or_else(|| NULL_STR.to_string());
1130 write!(f, " {:<width$} |", val_str, width = col_widths[col_idx])?;
1131 }
1132 writeln!(f)?;
1133 write!(f, "+")?;
1134 for w in &col_widths { write!(f, "{:-<width$}+", "", width = w + 2)?; }
1135 writeln!(f)?;
1136 }
1137
1138 if height > num_rows_to_print {
1139 writeln!(f, "... (还有 {} 行)", height - num_rows_to_print)?;
1140 }
1141
1142 Ok(())
1143 }
1144}
1145
1146