hawk_data/
arg.rs

1use std::path::PathBuf;
2
3use clap::Parser;
4
5use crate::Error;
6
7/// hawk - Modern data analysis tool for structured data (JSON, YAML, CSV)
8///
9/// hawk combines the simplicity of awk with the power of pandas for data exploration.
10/// Perfect for analyzing JSON APIs, YAML configs, and CSV datasets.
11#[derive(Debug, Parser)]
12#[command(name = "hawk")]
13#[command(version = "0.2.2")]
14#[command(about = "Modern data analysis tool for structured data and text files")]
15#[command(long_about = "
16hawk is a command-line data analysis tool that brings pandas-like functionality
17to your terminal. It supports JSON, YAML, CSV, and plain text formats with automatic
18detection, powerful filtering, grouping, aggregation, and string manipulation capabilities.
19
20EXAMPLES:
21# Basic field access
22    hawk ‘.users[0].name’ data.json
23    hawk ‘.users.name’ data.csv
24
25
26# Text processing (NEW in v0.2.0!)
27    hawk '. | select(. | contains(\"ERROR\"))' app.log
28    hawk '. | map(. | trim | upper)' data.txt
29    hawk '. | map(. | substring(0, 19))' access.log
30
31# String operations
32    hawk '. | map(. | replace(\"old\", \"new\"))' text.txt
33    hawk '. | map(. | split(\",\") | join(\" | \"))' csv_lines.txt
34
35# Filtering and aggregation
36    hawk '.users[] | select(.age > 30)' data.yaml
37    hawk '.sales | group_by(.region) | avg(.amount)' sales.csv
38
39# Statistical analysis (NEW!)
40    hawk '. | unique | sort' numbers.txt
41    hawk '.scores[] | median(.value)' scores.json
42    hawk '.data[] | stddev(.measurement)' sensor_data.csv
43
44# Complex pipelines
45    hawk '. | select(. | contains(\"WARN\")) | map(. | substring(11, 8)) | unique' app.log
46    hawk '.users[] | map(.email | lower | trim) | select(. | ends_with(\".com\"))' users.csv
47
48# Data exploration
49    hawk '. | info' data.json
50    hawk '.users | count' data.csv
51    hawk '. | length' any_file.txt
52
53
54SUPPORTED FORMATS:
55    JSON, YAML, CSV, Plain Text (automatically detected)
56
57QUERY SYNTAX:
58    # Field Access
59    .field                    - Access field
60    .array[0]                 - Access array element
61    .array[]                  - Access all array elements
62
63
64# Text Processing (NEW!)
65    . | map(. | upper)        - Convert to uppercase
66    . | map(. | lower)        - Convert to lowercase
67    . | map(. | trim)         - Remove whitespace
68    . | map(. | length)       - Get string length
69    . | map(. | reverse)      - Reverse string
70
71# String Manipulation
72    . | map(. | replace(\"a\", \"b\"))  - Replace text
73    . | map(. | substring(0, 5))      - Extract substring
74    . | map(. | split(\",\"))          - Split by delimiter
75    .array[] | join(\", \")            - Join array elements
76
77# String Filtering
78    . | select(. | contains(\"text\"))     - Contains pattern
79    . | select(. | starts_with(\"pre\"))   - Starts with pattern
80    . | select(. | ends_with(\"suf\"))     - Ends with pattern
81
82# Statistical Functions (NEW!)
83    . | unique                - Remove duplicates
84    . | sort                  - Sort values
85    . | median                - Calculate median
86    . | stddev                - Calculate standard deviation
87    . | length                - Get array/text length
88
89# Filtering & Aggregation
90    . | select(.field > 10)   - Filter data
91    . | group_by(.category)   - Group data
92    . | count/sum/avg/min/max - Aggregate functions
93
94# Data Transformation
95    . | map(.field | operation) - Transform data with string operations
96
97
98OUTPUT FORMATS:
99    –format table           - Colored table output (default for structured data)
100    –format json            - JSON output with syntax highlighting
101    –format list            - Simple list output
102    –format auto            - Smart format detection (default)
103
104COLORED OUTPUT:
105    Automatic color detection (TTY), respects NO_COLOR environment variable
106")]
107
108pub struct Args {
109    /// JSONPath-style query to execute
110    ///
111    /// Examples:
112    ///
113    ///   .users[0].name              - Get first user's name
114    ///
115    ///   .users | select(.age > 30)  - Filter users by age
116    ///
117    ///   . | group_by(.department)   - Group by department
118    pub query: String,
119
120    /// Input file path (JSON, YAML, or CSV)
121    ///
122    /// If not provided, reads from stdin.
123    /// File format is automatically detected.
124    pub path: Option<PathBuf>,
125
126    /// Output format
127    ///
128    ///    auto: Smart detection (table for arrays, list for values, json for complex)
129    ///
130    ///    table: Force tabular output
131    ///
132    ///    json: Force JSON output
133    ///
134    ///    list: Force list output
135    ///
136    ///    csv: Force CSV output
137    #[arg(long, default_value = "auto")]
138    #[arg(value_parser = ["auto", "table", "json", "list", "csv"])]
139    pub format: String,
140
141    #[arg(long, short)]
142    #[arg(help = "Force text format (skip auto-detection)")]
143    pub text: bool,
144}
145
146#[derive(Debug, Clone)]
147pub enum OutputFormat {
148    Auto,
149    Json,
150    Table,
151    List,
152    Csv,
153}
154
155impl std::str::FromStr for OutputFormat {
156    type Err = Error;
157
158    fn from_str(s: &str) -> Result<Self, Self::Err> {
159        match s.to_lowercase().as_str() {
160            "auto" => Ok(OutputFormat::Auto),
161            "json" => Ok(OutputFormat::Json),
162            "table" => Ok(OutputFormat::Table),
163            "list" => Ok(OutputFormat::List),
164            "csv" => Ok(OutputFormat::Csv),
165            _ => Err(Error::InvalidFormat(format!(
166                "Invalid format: {}. Valid options: auto, json, table, list",
167                s
168            ))),
169        }
170    }
171}