Skip to main content

textalyzer/
types.rs

1extern crate clap;
2extern crate memmap2;
3
4use self::clap::Subcommand;
5use serde::Serialize;
6
7#[derive(Subcommand)]
8pub enum Command {
9  /// Prints a histogram of word frequency in a file
10  Histogram {
11    filepath: String,
12    /// Output the histogram data as JSON
13    #[clap(long)]
14    json: bool,
15  },
16  /// Prints duplicated sections in all files at the given paths
17  Duplication {
18    /// Paths to files or directories to scan for duplicates
19    paths: Vec<String>,
20    /// Minimum number of non-empty lines to be considered a duplication
21    #[clap(long, default_value = "3")]
22    min_lines: usize,
23    /// Only show the file paths with duplications, not the duplicated content
24    #[clap(long)]
25    files_only: bool,
26    /// Output the duplication data as JSON
27    #[clap(long)]
28    json: bool,
29  },
30  /// Analyzes and prints a histogram of line lengths in source files
31  LineLength {
32    /// Paths to files or directories to scan
33    paths: Vec<String>,
34    /// Output the histogram data as JSON
35    #[clap(long)]
36    json: bool,
37  },
38}
39
40pub struct Config {
41  pub command: Command,
42}
43
44#[derive(Debug)]
45pub struct FileEntry {
46  pub name: String,
47  pub content: MappedContent,
48}
49
50#[derive(Debug)]
51pub enum MappedContent {
52  Mapped(memmap2::Mmap),
53  String(String),
54}
55
56// Implement methods for MappedContent for easier use
57impl MappedContent {
58  // Get content as a string slice
59  pub fn as_str(&self) -> Option<&str> {
60    match self {
61      MappedContent::Mapped(mmap) => std::str::from_utf8(mmap).ok(),
62      MappedContent::String(s) => Some(s),
63    }
64  }
65
66  // Get content as a string
67  pub fn to_string(&self) -> Option<String> {
68    self.as_str().map(String::from)
69  }
70}
71
72// Implement PartialEq to compare with strings
73impl PartialEq<str> for MappedContent {
74  fn eq(&self, other: &str) -> bool {
75    match self.as_str() {
76      Some(s) => s == other,
77      None => false,
78    }
79  }
80}
81
82impl PartialEq<&str> for MappedContent {
83  fn eq(&self, other: &&str) -> bool {
84    match self.as_str() {
85      Some(s) => s == *other,
86      None => false,
87    }
88  }
89}
90
91impl PartialEq<String> for MappedContent {
92  fn eq(&self, other: &String) -> bool {
93    match self.as_str() {
94      Some(s) => s == other,
95      None => false,
96    }
97  }
98}
99
100#[derive(PartialEq, Debug)]
101pub struct LineEntry {
102  pub file_name: String,
103  pub line_number: u32,
104  pub content: String,
105}
106
107// Helper type for JSON serialization of frequency maps
108#[derive(Serialize)]
109pub struct FrequencyItem {
110  pub word: String,
111  pub count: i32,
112}
113
114// Helper type for JSON serialization of line length histograms
115#[derive(Serialize)]
116pub struct LineLengthItem {
117  pub length: usize,
118  pub count: usize,
119}
120
121/// JSON-friendly location for a duplication occurrence
122#[derive(Serialize)]
123pub struct DuplicationLocation {
124  /// Path to the file containing the duplication
125  pub path: String,
126  /// Line number where the duplication occurs
127  pub line: u32,
128}
129
130/// JSON-friendly representation of a duplicated block
131#[derive(Serialize)]
132pub struct DuplicationItem {
133  /// The duplicated content block
134  pub content: String,
135  /// All file locations where this block appears
136  pub locations: Vec<DuplicationLocation>,
137}