wp_parse_api/
lib.rs

1//! Minimal Plugin API for WPL parsers.
2//! This crate defines core types for plugin development while maintaining
3//! compatibility with the wp-lang ecosystem.
4
5use std::fmt::{Display, Formatter};
6use std::sync::Arc;
7
8use bytes::Bytes;
9use wp_model_core::model::DataRecord;
10
11mod error;
12pub use error::{WparseError, WparseReason, WparseResult};
13#[allow(deprecated)]
14pub use error::{WplParseError, WplParseReason, WplParseResult};
15// Re-export necessary types from wp-lang that we still need
16
17/// Result type for plugin parsing operations.
18///
19/// On success, returns a tuple of `(DataRecord, remaining_raw)`.
20/// On failure, returns a WparseError (旧名称 `WplParseError` 仍可用,但已弃用)。
21pub type DataResult = Result<(DataRecord, RawData), WparseError>;
22
23#[derive(Debug, Clone)]
24pub enum RawData {
25    String(String),
26    Bytes(Bytes),
27    ArcBytes(Arc<Vec<u8>>),
28}
29
30impl RawData {
31    pub fn from_string<T: Into<String>>(value: T) -> RawData {
32        RawData::String(value.into())
33    }
34
35    pub fn from_arc_bytes(data: Arc<Vec<u8>>) -> Self {
36        RawData::ArcBytes(data)
37    }
38
39    /// 辅助构造:从 `Arc<[u8]>` 构建。该接口用于兼容旧版(0.4.6 之前)`ArcBytes` 表示,
40    /// 会额外复制一次数据,建议尽快迁移到 `Arc<Vec<u8>>`。
41    pub fn from_arc_slice(data: Arc<[u8]>) -> Self {
42        RawData::ArcBytes(Arc::new(data.as_ref().to_vec()))
43    }
44
45    // 统一的数据访问接口
46    pub fn as_bytes(&self) -> &[u8] {
47        match self {
48            RawData::String(s) => s.as_bytes(),
49            RawData::Bytes(b) => b.as_ref(),
50            RawData::ArcBytes(arc) => arc.as_slice(),
51        }
52    }
53
54    // 向后兼容的 Bytes 转换(仅在需要时,始终复制)
55    pub fn to_bytes(&self) -> Bytes {
56        match self {
57            RawData::String(s) => Bytes::copy_from_slice(s.as_bytes()),
58            RawData::Bytes(b) => b.clone(),
59            RawData::ArcBytes(arc) => Bytes::copy_from_slice(arc.as_slice()),
60        }
61    }
62
63    /// 按需取得 Bytes,消耗自身以在 `String`/`Bytes` 分支复用缓冲区。
64    pub fn into_bytes(self) -> Bytes {
65        match self {
66            RawData::String(s) => Bytes::from(s),
67            RawData::Bytes(b) => b,
68            RawData::ArcBytes(arc) => match Arc::try_unwrap(arc) {
69                Ok(vec) => Bytes::from(vec),
70                Err(shared) => Bytes::copy_from_slice(shared.as_slice()),
71            },
72        }
73    }
74
75    // 零拷贝检测
76    pub fn is_zero_copy(&self) -> bool {
77        matches!(self, RawData::ArcBytes(_))
78    }
79
80    pub fn len(&self) -> usize {
81        self.as_bytes().len()
82    }
83
84    pub fn is_empty(&self) -> bool {
85        match self {
86            RawData::String(value) => value.is_empty(),
87            RawData::Bytes(value) => value.is_empty(),
88            RawData::ArcBytes(arc) => arc.is_empty(),
89        }
90    }
91}
92
93impl Display for RawData {
94    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
95        match self {
96            RawData::String(value) => f.write_str(value),
97            // 安全转换:尽量显示为 UTF-8;不可解码时使用替代字符
98            RawData::Bytes(value) => f.write_str(&String::from_utf8_lossy(value)),
99            RawData::ArcBytes(arc) => f.write_str(&String::from_utf8_lossy(arc.as_slice())),
100        }
101    }
102}
103
104/// Trait for pipeline data processing operations.
105///
106/// This trait defines the interface for components that process RawData
107/// within a data pipeline, transforming it from one format to another
108/// (e.g., base64 decoding, hex decoding, string unescaping, etc.).
109///
110/// Pipeline processors are executed in sequence as part of a data processing
111/// pipeline, with the output of one processor becoming the input of the next.
112pub trait PipeProcessor {
113    /// Process the input data and return the transformed result.
114    ///
115    /// # Arguments
116    /// * `data` - The input data to be processed
117    ///
118    /// # Returns
119    /// The processed data in the appropriate output format
120    fn process(&self, data: RawData) -> WparseResult<RawData>;
121
122    /// Get the name/identifier of this pipeline processor.
123    ///
124    /// # Returns
125    /// A string slice representing the processor name
126    fn name(&self) -> &'static str;
127}
128
129pub type PipeHold = Arc<dyn PipeProcessor + Send + Sync>;
130
131#[cfg(test)]
132mod tests {
133    use super::RawData;
134    use bytes::Bytes;
135    use std::sync::Arc;
136
137    #[test]
138    fn rawdata_as_bytes_and_len_cover_all_variants() {
139        let text = RawData::from_string("hello");
140        assert_eq!(text.as_bytes(), b"hello");
141        assert_eq!(text.len(), 5);
142        assert!(!text.is_zero_copy());
143
144        let bytes = RawData::Bytes(Bytes::from_static(b"bin"));
145        assert_eq!(bytes.as_bytes(), b"bin");
146        assert_eq!(bytes.len(), 3);
147        assert!(bytes.to_bytes().eq(&Bytes::from_static(b"bin")));
148        assert!(!bytes.is_zero_copy());
149
150        let arc = Arc::new(vec![1u8, 2, 3, 4]);
151        let arc_raw = RawData::from_arc_bytes(arc.clone());
152        assert_eq!(arc_raw.as_bytes(), arc.as_slice());
153        assert_eq!(arc_raw.len(), 4);
154        assert!(arc_raw.is_zero_copy());
155        let bytes_from_arc = arc_raw.to_bytes();
156        assert_eq!(bytes_from_arc.as_ref(), &[1, 2, 3, 4]);
157
158        let owned = RawData::from_arc_bytes(Arc::new(vec![5u8, 6, 7]));
159        let converted = owned.into_bytes();
160        assert_eq!(converted.as_ref(), &[5, 6, 7]);
161    }
162
163    #[test]
164    fn rawdata_is_empty_handles_all_variants() {
165        assert!(RawData::from_string("").is_empty());
166        assert!(RawData::Bytes(Bytes::new()).is_empty());
167        assert!(RawData::from_arc_bytes(Arc::new(vec![])).is_empty());
168        assert!(!RawData::from_string("x").is_empty());
169    }
170}