Skip to main content

perl_lsp_input_validation/
lib.rs

1#![warn(missing_docs)]
2//! Input validation and sanitization utilities for production hardening.
3
4use anyhow::{Result, anyhow};
5use perl_path_security::validate_workspace_path;
6use std::ffi::OsStr;
7use std::path::{Path, PathBuf};
8
9/// Maximum allowed file size for parsing (10MB).
10const MAX_FILE_SIZE: usize = 10 * 1024 * 1024;
11
12/// Maximum allowed path length.
13const MAX_PATH_LENGTH: usize = 4096;
14
15/// Allowed file extensions for Perl files.
16const ALLOWED_EXTENSIONS: &[&str] = &["pl", "pm", "t", "pod"];
17
18/// Validates and sanitizes a file path to prevent path traversal attacks.
19pub fn validate_file_path<P: AsRef<Path>>(path: P, workspace_root: &Path) -> Result<PathBuf> {
20    let path = path.as_ref();
21
22    if path.to_string_lossy().len() > MAX_PATH_LENGTH {
23        return Err(anyhow!("Path too long: {}", path.display()));
24    }
25
26    let validated = validate_workspace_path(path, workspace_root)
27        .map_err(|error| anyhow!("Invalid workspace path {}: {error}", path.display()))?;
28
29    if let Some(extension) = validated.extension().and_then(OsStr::to_str)
30        && !ALLOWED_EXTENSIONS.contains(&extension)
31    {
32        return Err(anyhow!(
33            "File extension '{}' not allowed. Allowed: {:?}",
34            extension,
35            ALLOWED_EXTENSIONS
36        ));
37    }
38
39    Ok(validated)
40}
41
42/// Validates file content before parsing to prevent resource exhaustion.
43pub fn validate_file_content(content: &str, file_path: &Path) -> Result<()> {
44    if content.len() > MAX_FILE_SIZE {
45        return Err(anyhow!(
46            "File {} too large: {} bytes (max: {})",
47            file_path.display(),
48            content.len(),
49            MAX_FILE_SIZE
50        ));
51    }
52
53    if content.contains('\0') {
54        return Err(anyhow!("File {} contains null bytes", file_path.display()));
55    }
56
57    for (index, line) in content.lines().enumerate() {
58        if line.len() > 100_000 {
59            return Err(anyhow!(
60                "Line {} in file {} is too long: {} characters",
61                index + 1,
62                file_path.display(),
63                line.len()
64            ));
65        }
66    }
67
68    let suspicious_patterns = ["<script", "javascript:", "data:text/html", "<?php", "<%"];
69    let lowercase = content.to_lowercase();
70    for pattern in suspicious_patterns {
71        if lowercase.contains(pattern) {
72            return Err(anyhow!(
73                "File {} contains suspicious pattern: {}",
74                file_path.display(),
75                pattern
76            ));
77        }
78    }
79
80    Ok(())
81}
82
83/// Validates LSP request parameters to ensure they're safe.
84pub fn validate_lsp_request(method: &str, params: &serde_json::Value) -> Result<()> {
85    if method.len() > 100 || !method.chars().all(|c| c.is_alphanumeric() || c == '/' || c == '$') {
86        return Err(anyhow!("Invalid LSP method: {}", method));
87    }
88
89    let params_str = serde_json::to_string(params)?;
90    if params_str.len() > 1_000_000 {
91        return Err(anyhow!("LSP parameters too large for method: {}", method));
92    }
93
94    match method {
95        "textDocument/didOpen" | "textDocument/didChange" | "textDocument/didSave" => {
96            validate_text_document_params(params)?;
97        }
98        "workspace/executeCommand" => {
99            validate_execute_command_params(params)?;
100        }
101        _ => {
102            if params_str.contains("javascript:") || params_str.contains("<script") {
103                return Err(anyhow!("Suspicious content in parameters for method: {}", method));
104            }
105        }
106    }
107
108    Ok(())
109}
110
111fn validate_text_document_params(params: &serde_json::Value) -> Result<()> {
112    if let Some(uri) = params
113        .get("textDocument")
114        .and_then(|text_document| text_document.get("uri"))
115        .and_then(serde_json::Value::as_str)
116    {
117        if !uri.starts_with("file://") && !uri.starts_with("untitled:") {
118            return Err(anyhow!("Invalid URI scheme: {}", uri));
119        }
120
121        if uri.len() > 4096 {
122            return Err(anyhow!("URI too long: {}", uri));
123        }
124    }
125
126    if let Some(text) = params
127        .get("textDocument")
128        .and_then(|text_document| text_document.get("text"))
129        .and_then(serde_json::Value::as_str)
130    {
131        validate_file_content(text, Path::new("<lsp_input>"))?;
132    }
133
134    Ok(())
135}
136
137fn validate_execute_command_params(params: &serde_json::Value) -> Result<()> {
138    if let Some(command) = params.get("command").and_then(serde_json::Value::as_str) {
139        let allowed_commands = [
140            "perl.runCritic",
141            "perl.formatDocument",
142            "perl.extractVariable",
143            "perl.extractSubroutine",
144            "perl.optimizeImports",
145        ];
146
147        if !allowed_commands.contains(&command) {
148            return Err(anyhow!("Command not allowed: {}", command));
149        }
150    }
151
152    Ok(())
153}
154
155/// Sanitizes a string by removing potentially dangerous characters.
156pub fn sanitize_string(input: &str) -> String {
157    input
158        .chars()
159        .filter(|character| {
160            *character == '\t'
161                || *character == '\n'
162                || *character == '\r'
163                || (*character >= ' ' && *character <= '~')
164                || *character as u32 > 127
165        })
166        .collect()
167}
168
169/// Validates workspace root to ensure it's safe.
170pub fn validate_workspace_root(workspace_root: &Path) -> Result<()> {
171    if !workspace_root.exists() {
172        return Err(anyhow!("Workspace root does not exist: {}", workspace_root.display()));
173    }
174
175    if !workspace_root.is_dir() {
176        return Err(anyhow!("Workspace root is not a directory: {}", workspace_root.display()));
177    }
178
179    let path_str = workspace_root.to_string_lossy();
180    if path_str.contains("..") || path_str.contains('~') {
181        return Err(anyhow!("Suspicious workspace root path: {}", workspace_root.display()));
182    }
183
184    Ok(())
185}
186
187#[cfg(test)]
188mod tests {
189    use super::*;
190    use std::fs;
191    use tempfile::TempDir;
192
193    #[test]
194    fn test_validate_file_path_valid() {
195        use perl_tdd_support::must;
196        let temp_dir = must(TempDir::new());
197        let workspace_root = temp_dir.path();
198        let file_path = workspace_root.join("test.pl");
199        must(fs::write(&file_path, "print 'Hello';"));
200
201        let result = validate_file_path(&file_path, workspace_root);
202        assert!(result.is_ok());
203    }
204
205    #[test]
206    fn test_validate_file_path_traversal() {
207        use perl_tdd_support::must;
208        let temp_dir = must(TempDir::new());
209        let workspace_root = temp_dir.path();
210        let malicious_path = Path::new("../../etc/passwd");
211
212        let result = validate_file_path(malicious_path, workspace_root);
213        assert!(result.is_err());
214    }
215
216    #[test]
217    fn test_validate_file_content_valid() {
218        let content = "print 'Hello, World!';";
219        let file_path = Path::new("test.pl");
220
221        let result = validate_file_content(content, file_path);
222        assert!(result.is_ok());
223    }
224
225    #[test]
226    fn test_validate_file_content_too_large() {
227        let mut content = String::new();
228        content.reserve(MAX_FILE_SIZE + 1);
229        content.extend(std::iter::repeat_n('x', MAX_FILE_SIZE + 1));
230        let file_path = Path::new("large.pl");
231
232        let result = validate_file_content(&content, file_path);
233        assert!(result.is_err());
234    }
235
236    #[test]
237    fn test_validate_file_content_null_bytes() {
238        let content = "print 'Hello';\0";
239        let file_path = Path::new("null.pl");
240
241        let result = validate_file_content(content, file_path);
242        assert!(result.is_err());
243    }
244
245    #[test]
246    fn test_sanitize_string() {
247        let input = "Hello\x00World<script>alert('xss')</script>";
248        let expected = "HelloWorld<script>alert('xss')</script>";
249
250        let result = sanitize_string(input);
251        assert_eq!(result, expected);
252    }
253
254    #[test]
255    fn test_validate_lsp_request_valid() {
256        let method = "textDocument/didOpen";
257        let params = serde_json::json!({
258            "textDocument": {
259                "uri": "file:///test.pl",
260                "text": "print 'Hello';"
261            }
262        });
263
264        let result = validate_lsp_request(method, &params);
265        assert!(result.is_ok());
266    }
267
268    #[test]
269    fn test_validate_lsp_request_invalid_method() {
270        let method = "invalid<script>alert('xss')</script>";
271        let params = serde_json::json!({});
272
273        let result = validate_lsp_request(method, &params);
274        assert!(result.is_err());
275    }
276
277    #[test]
278    fn test_validate_execute_command_allowed() {
279        let method = "workspace/executeCommand";
280        let params = serde_json::json!({
281            "command": "perl.runCritic",
282            "arguments": []
283        });
284
285        let result = validate_lsp_request(method, &params);
286        assert!(result.is_ok());
287    }
288
289    #[test]
290    fn test_validate_execute_command_blocked() {
291        let method = "workspace/executeCommand";
292        let params = serde_json::json!({
293            "command": "rm -rf /",
294            "arguments": []
295        });
296
297        let result = validate_lsp_request(method, &params);
298        assert!(result.is_err());
299    }
300}