use serde_json::Value;
use std::fs;
use std::path::{Path, PathBuf};
use walkdir::WalkDir;
use crate::entities::{GtsConfig, GtsEntity, GtsFile};
use crate::store::GtsReader;
const EXCLUDE_LIST: &[&str] = &["node_modules", "dist", "build"];
const VALID_EXTENSIONS: &[&str] = &[".json", ".jsonc", ".gts", ".yaml", ".yml"];
pub struct GtsFileReader {
paths: Vec<PathBuf>,
cfg: GtsConfig,
files: Vec<PathBuf>,
initialized: bool,
}
impl GtsFileReader {
#[must_use]
pub fn new(path: &[String], cfg: Option<GtsConfig>) -> Self {
let paths = path
.iter()
.map(|p| PathBuf::from(shellexpand::tilde(p).to_string()))
.collect();
GtsFileReader {
paths,
cfg: cfg.unwrap_or_default(),
files: Vec::new(),
initialized: false,
}
}
#[allow(clippy::cognitive_complexity)]
fn collect_files(&mut self) {
let mut seen = std::collections::HashSet::new();
let mut collected = Vec::new();
for path in &self.paths {
let resolved_path = path.canonicalize().unwrap_or_else(|_| path.clone());
if resolved_path.is_file() {
if let Some(ext) = resolved_path.extension() {
let ext_str = ext.to_string_lossy().to_lowercase();
if VALID_EXTENSIONS.contains(&format!(".{ext_str}").as_str()) {
let rp = resolved_path.to_string_lossy().to_string();
if !seen.contains(&rp) {
seen.insert(rp.clone());
tracing::debug!("- discovered file: {:?}", resolved_path);
collected.push(resolved_path.clone());
}
}
}
} else if resolved_path.is_dir() {
for entry in WalkDir::new(&resolved_path)
.follow_links(true)
.into_iter()
.flatten()
{
let path = entry.path();
if path.is_dir()
&& let Some(name) = path.file_name()
&& EXCLUDE_LIST.contains(&name.to_string_lossy().as_ref())
{
continue;
}
if path.is_file()
&& let Some(ext) = path.extension()
{
let ext_str = ext.to_string_lossy().to_lowercase();
if VALID_EXTENSIONS.contains(&format!(".{ext_str}").as_str()) {
let rp = path
.canonicalize()
.unwrap_or_else(|_| path.to_path_buf())
.to_string_lossy()
.to_string();
if !seen.contains(&rp) {
seen.insert(rp.clone());
tracing::debug!("- discovered file: {:?}", path);
collected.push(PathBuf::from(rp));
}
}
}
}
}
}
self.files = collected;
}
fn load_json_file(file_path: &Path) -> Result<Value, Box<dyn std::error::Error>> {
let content = fs::read_to_string(file_path)?;
let extension = file_path
.extension()
.and_then(|e| e.to_str())
.map(str::to_lowercase)
.unwrap_or_default();
let value: Value = match extension.as_str() {
"yaml" | "yml" => {
serde_saphyr::from_str(&content)?
}
_ => {
serde_json::from_str(&content)?
}
};
Ok(value)
}
#[allow(clippy::cognitive_complexity)]
fn process_file(&self, file_path: &Path) -> Vec<GtsEntity> {
let mut entities = Vec::new();
match Self::load_json_file(file_path) {
Ok(content) => {
let json_file = GtsFile::new(
file_path.to_string_lossy().to_string(),
file_path
.file_name()
.unwrap_or_default()
.to_string_lossy()
.to_string(),
content.clone(),
);
if let Some(arr) = content.as_array() {
for (idx, item) in arr.iter().enumerate() {
let entity = GtsEntity::new(
Some(json_file.clone()),
Some(idx),
item,
Some(&self.cfg),
None,
false,
String::new(),
None,
None,
);
if let Some(id) = entity.effective_id() {
tracing::debug!("- discovered entity: {}", id);
entities.push(entity);
} else {
tracing::debug!("- skipped entity from {:?} (no valid ID)", file_path);
}
}
} else {
let entity = GtsEntity::new(
Some(json_file),
None,
&content,
Some(&self.cfg),
None,
false,
String::new(),
None,
None,
);
if let Some(id) = entity.effective_id() {
tracing::debug!("- discovered entity: {}", id);
entities.push(entity);
} else {
tracing::debug!(
"- skipped entity from {:?} (no valid ID found in content: {:?})",
file_path,
content
);
}
}
}
Err(e) => {
tracing::debug!("Failed to parse file {:?}: {}", file_path, e);
}
}
entities
}
}
impl GtsReader for GtsFileReader {
fn iter(&mut self) -> Box<dyn Iterator<Item = GtsEntity> + '_> {
if !self.initialized {
self.collect_files();
self.initialized = true;
}
tracing::debug!(
"Processing {} files from {:?}",
self.files.len(),
self.paths
);
#[allow(clippy::needless_collect)]
let entities: Vec<GtsEntity> = self
.files
.iter()
.flat_map(|file_path| self.process_file(file_path))
.collect();
Box::new(entities.into_iter())
}
fn read_by_id(&self, _entity_id: &str) -> Option<GtsEntity> {
None
}
fn reset(&mut self) {
self.initialized = false;
}
}
#[cfg(test)]
mod tests {
use super::*;
use std::fs;
use tempfile::TempDir;
#[test]
fn test_new_with_default_config() {
let paths = vec!["/tmp/test".to_owned()];
let reader = GtsFileReader::new(&paths, None);
assert_eq!(reader.paths.len(), 1);
assert_eq!(reader.files.len(), 0);
assert!(!reader.initialized);
}
#[test]
fn test_new_with_custom_config() {
let paths = vec!["/tmp/test".to_owned()];
let config = GtsConfig::default();
let reader = GtsFileReader::new(&paths, Some(config));
assert_eq!(reader.paths.len(), 1);
assert!(!reader.initialized);
}
#[test]
fn test_new_with_tilde_expansion() {
let paths = vec!["~/test".to_owned()];
let reader = GtsFileReader::new(&paths, None);
assert!(!reader.paths[0].to_string_lossy().contains('~'));
}
#[test]
fn test_new_with_multiple_paths() {
let paths = vec![
"/tmp/test1".to_owned(),
"/tmp/test2".to_owned(),
"/tmp/test3".to_owned(),
];
let reader = GtsFileReader::new(&paths, None);
assert_eq!(reader.paths.len(), 3);
}
#[test]
fn test_collect_files_all_supported_extensions() {
let temp_dir = TempDir::new().unwrap();
fs::write(temp_dir.path().join("test.json"), r#"{"$id": "test1"}"#).unwrap();
fs::write(temp_dir.path().join("test.yaml"), "id: test2").unwrap();
fs::write(temp_dir.path().join("test.yml"), "id: test3").unwrap();
fs::write(temp_dir.path().join("test.gts"), r#"{"$id": "test4"}"#).unwrap();
fs::write(temp_dir.path().join("test.jsonc"), r#"{"$id": "test5"}"#).unwrap();
let paths = vec![temp_dir.path().to_string_lossy().to_string()];
let mut reader = GtsFileReader::new(&paths, None);
reader.collect_files();
assert_eq!(
reader.files.len(),
5,
"Should collect all 5 supported file types"
);
}
#[test]
fn test_collect_files_invalid_extension_ignored() {
let temp_dir = TempDir::new().unwrap();
let file_path = temp_dir.path().join("test.txt");
fs::write(&file_path, "test content").unwrap();
let paths = vec![file_path.to_string_lossy().to_string()];
let mut reader = GtsFileReader::new(&paths, None);
reader.collect_files();
assert_eq!(reader.files.len(), 0);
}
#[test]
fn test_collect_files_directory() {
let temp_dir = TempDir::new().unwrap();
fs::write(temp_dir.path().join("file1.json"), r#"{"$id": "test1"}"#).unwrap();
fs::write(temp_dir.path().join("file2.json"), r#"{"$id": "test2"}"#).unwrap();
fs::write(temp_dir.path().join("file3.txt"), "ignored").unwrap();
let paths = vec![temp_dir.path().to_string_lossy().to_string()];
let mut reader = GtsFileReader::new(&paths, None);
reader.collect_files();
assert_eq!(reader.files.len(), 2);
}
#[test]
fn test_collect_files_excludes_standard_directories() {
let temp_dir = TempDir::new().unwrap();
fs::write(temp_dir.path().join("file1.json"), r#"{"$id": "test1"}"#).unwrap();
let node_modules = temp_dir.path().join("node_modules");
fs::create_dir(&node_modules).unwrap();
fs::write(node_modules.join("ignored1.json"), r#"{"$id": "ignored1"}"#).unwrap();
let dist = temp_dir.path().join("dist");
fs::create_dir(&dist).unwrap();
fs::write(dist.join("ignored2.json"), r#"{"$id": "ignored2"}"#).unwrap();
let build = temp_dir.path().join("build");
fs::create_dir(&build).unwrap();
fs::write(build.join("ignored3.json"), r#"{"$id": "ignored3"}"#).unwrap();
let paths = vec![temp_dir.path().to_string_lossy().to_string()];
let mut reader = GtsFileReader::new(&paths, None);
reader.collect_files();
assert!(
!reader.files.is_empty(),
"Should find at least the main file"
);
let main_file_found = reader.files.iter().any(|f| {
let path_str = f.to_string_lossy();
path_str.ends_with("file1.json")
});
assert!(main_file_found, "Should find the main file");
}
#[test]
fn test_collect_files_case_insensitive_extension() {
let temp_dir = TempDir::new().unwrap();
fs::write(temp_dir.path().join("test.JSON"), r#"{"$id": "test"}"#).unwrap();
fs::write(temp_dir.path().join("test2.YML"), "id: test2").unwrap();
let paths = vec![temp_dir.path().to_string_lossy().to_string()];
let mut reader = GtsFileReader::new(&paths, None);
reader.collect_files();
assert_eq!(reader.files.len(), 2);
}
#[test]
fn test_collect_files_deduplicates() {
let temp_dir = TempDir::new().unwrap();
let file_path = temp_dir.path().join("test.json");
fs::write(&file_path, r#"{"$id": "test"}"#).unwrap();
let path_str = file_path.to_string_lossy().to_string();
let paths = vec![path_str.clone(), path_str];
let mut reader = GtsFileReader::new(&paths, None);
reader.collect_files();
assert_eq!(reader.files.len(), 1);
}
#[test]
fn test_collect_files_nested_directories() {
let temp_dir = TempDir::new().unwrap();
let nested = temp_dir.path().join("level1").join("level2");
fs::create_dir_all(&nested).unwrap();
fs::write(nested.join("nested.json"), r#"{"$id": "nested"}"#).unwrap();
let paths = vec![temp_dir.path().to_string_lossy().to_string()];
let mut reader = GtsFileReader::new(&paths, None);
reader.collect_files();
assert_eq!(reader.files.len(), 1);
}
#[test]
fn test_load_json_file_valid_json() {
let temp_dir = TempDir::new().unwrap();
let file_path = temp_dir.path().join("test.json");
fs::write(&file_path, r#"{"name": "test", "value": 42}"#).unwrap();
let result = GtsFileReader::load_json_file(&file_path);
assert!(result.is_ok());
let json = result.unwrap();
assert_eq!(json["name"], "test");
assert_eq!(json["value"], 42);
}
#[test]
fn test_load_json_file_yaml_extensions() {
let temp_dir = TempDir::new().unwrap();
let yaml_path = temp_dir.path().join("test.yaml");
fs::write(&yaml_path, "name: test\nvalue: 42").unwrap();
let yaml_result = GtsFileReader::load_json_file(&yaml_path);
assert!(yaml_result.is_ok());
assert_eq!(yaml_result.unwrap()["name"], "test");
let yml_path = temp_dir.path().join("test.yml");
fs::write(&yml_path, "name: test2\nvalue: 43").unwrap();
let yaml_result_yml = GtsFileReader::load_json_file(&yml_path);
assert!(yaml_result_yml.is_ok());
assert_eq!(yaml_result_yml.unwrap()["name"], "test2");
}
#[test]
fn test_load_json_file_invalid_json() {
let temp_dir = TempDir::new().unwrap();
let file_path = temp_dir.path().join("test.json");
fs::write(&file_path, "{invalid json}").unwrap();
let result = GtsFileReader::load_json_file(&file_path);
assert!(result.is_err());
}
#[test]
fn test_load_json_file_invalid_yaml() {
let temp_dir = TempDir::new().unwrap();
let file_path = temp_dir.path().join("test.yaml");
fs::write(&file_path, "invalid: yaml: content: [").unwrap();
let result = GtsFileReader::load_json_file(&file_path);
assert!(result.is_err());
}
#[test]
fn test_load_json_file_nonexistent() {
let result = GtsFileReader::load_json_file(Path::new("/nonexistent/file.json"));
assert!(result.is_err());
}
#[test]
fn test_process_file_single_entity() {
let temp_dir = TempDir::new().unwrap();
let file_path = temp_dir.path().join("test.json");
fs::write(&file_path, r#"{"$id": "gts://test/schema"}"#).unwrap();
let reader = GtsFileReader::new(&[], None);
let entities = reader.process_file(&file_path);
assert_eq!(entities.len(), 1);
}
#[test]
fn test_process_file_array_of_entities() {
let temp_dir = TempDir::new().unwrap();
let file_path = temp_dir.path().join("test.json");
fs::write(
&file_path,
r#"[{"$id": "gts://test/schema1"}, {"$id": "gts://test/schema2"}]"#,
)
.unwrap();
let reader = GtsFileReader::new(&[], None);
let entities = reader.process_file(&file_path);
assert_eq!(entities.len(), 2);
}
#[test]
fn test_process_file_entity_without_explicit_gts_id() {
let temp_dir = TempDir::new().unwrap();
let file_path = temp_dir.path().join("test.json");
fs::write(&file_path, r#"{"name": "test"}"#).unwrap();
let reader = GtsFileReader::new(&[], None);
let entities = reader.process_file(&file_path);
assert!(entities.len() <= 1);
}
#[test]
fn test_process_file_invalid_json_returns_empty() {
let temp_dir = TempDir::new().unwrap();
let file_path = temp_dir.path().join("test.json");
fs::write(&file_path, "{invalid}").unwrap();
let reader = GtsFileReader::new(&[], None);
let entities = reader.process_file(&file_path);
assert_eq!(entities.len(), 0);
}
#[test]
fn test_process_file_array_with_some_without_explicit_ids() {
let temp_dir = TempDir::new().unwrap();
let file_path = temp_dir.path().join("test.json");
fs::write(
&file_path,
r#"[{"$id": "gts://test/schema"}, {"name": "no-id"}]"#,
)
.unwrap();
let reader = GtsFileReader::new(&[], None);
let entities = reader.process_file(&file_path);
assert!(
!entities.is_empty(),
"Should find at least the entity with $id"
);
}
#[test]
fn test_iter_initialization_behavior() {
let temp_dir = TempDir::new().unwrap();
fs::write(
temp_dir.path().join("test.json"),
r#"{"$id": "gts://test/schema"}"#,
)
.unwrap();
let paths = vec![temp_dir.path().to_string_lossy().to_string()];
let mut reader = GtsFileReader::new(&paths, None);
assert!(!reader.initialized);
let entities: Vec<_> = reader.iter().collect();
assert!(reader.initialized);
assert_eq!(entities.len(), 1);
let entities2: Vec<_> = reader.iter().collect();
assert!(reader.initialized);
assert_eq!(entities2.len(), 1);
}
#[test]
fn test_iter_with_multiple_files() {
let temp_dir = TempDir::new().unwrap();
fs::write(
temp_dir.path().join("test1.json"),
r#"{"$id": "gts://test/schema1"}"#,
)
.unwrap();
fs::write(
temp_dir.path().join("test2.json"),
r#"{"$id": "gts://test/schema2"}"#,
)
.unwrap();
let paths = vec![temp_dir.path().to_string_lossy().to_string()];
let mut reader = GtsFileReader::new(&paths, None);
let entities: Vec<_> = reader.iter().collect();
assert_eq!(entities.len(), 2);
}
#[test]
fn test_read_by_id_always_returns_none() {
let reader = GtsFileReader::new(&[], None);
let result = reader.read_by_id("gts://test/schema");
assert!(result.is_none());
}
#[test]
fn test_reset_clears_initialized_flag() {
let temp_dir = TempDir::new().unwrap();
fs::write(
temp_dir.path().join("test.json"),
r#"{"$id": "gts://test/schema"}"#,
)
.unwrap();
let paths = vec![temp_dir.path().to_string_lossy().to_string()];
let mut reader = GtsFileReader::new(&paths, None);
let _: Vec<_> = reader.iter().collect();
assert!(reader.initialized);
reader.reset();
assert!(!reader.initialized);
}
#[test]
fn test_reset_allows_reinitialization() {
let temp_dir = TempDir::new().unwrap();
fs::write(
temp_dir.path().join("test.json"),
r#"{"$id": "gts://test/schema"}"#,
)
.unwrap();
let paths = vec![temp_dir.path().to_string_lossy().to_string()];
let mut reader = GtsFileReader::new(&paths, None);
let _: Vec<_> = reader.iter().collect();
reader.reset();
let entities: Vec<_> = reader.iter().collect();
assert_eq!(entities.len(), 1);
assert!(reader.initialized);
}
}