Skip to main content

tsz_cli/
driver_sources.rs

1//! Source file I/O, config helpers, and file reading for the compilation driver.
2
3use super::*;
4
5/// Result of reading a source file - either valid text or binary/unreadable
6#[derive(Debug, Clone)]
7pub enum FileReadResult {
8    /// File was successfully read as UTF-8 text
9    Text(String),
10    /// File appears to be binary (emit TS1490), but keep best-effort text for parser diagnostics
11    Binary(String),
12    /// File could not be read (I/O error)
13    Error(String),
14}
15
16/// Read a source file, detecting binary files that should emit TS1490.
17///
18/// TypeScript detects binary files by checking for:
19/// - UTF-16 BOM (FE FF for BE, FF FE for LE)
20/// - Non-valid UTF-8 sequences
21/// - Many control bytes (not expected in source files)
22/// - Files with many null bytes
23pub fn read_source_file(path: &Path) -> FileReadResult {
24    // Read as bytes first
25    let bytes = match std::fs::read(path) {
26        Ok(b) => b,
27        Err(e) => return FileReadResult::Error(e.to_string()),
28    };
29
30    // Check for UTF-16 BOM
31    // UTF-16 BE: FE FF
32    // UTF-16 LE: FF FE
33    if bytes.len() >= 2 {
34        if bytes[0] == 0xFE && bytes[1] == 0xFF {
35            // Decode UTF-16 BE
36            let u16_words: Vec<u16> = bytes[2..]
37                .chunks_exact(2)
38                .map(|chunk| {
39                    if chunk.len() == 2 {
40                        u16::from_be_bytes([chunk[0], chunk[1]])
41                    } else {
42                        0
43                    }
44                })
45                .collect();
46            return FileReadResult::Text(String::from_utf16_lossy(&u16_words));
47        } else if bytes[0] == 0xFF && bytes[1] == 0xFE {
48            // Decode UTF-16 LE
49            let u16_words: Vec<u16> = bytes[2..]
50                .chunks_exact(2)
51                .map(|chunk| {
52                    if chunk.len() == 2 {
53                        u16::from_le_bytes([chunk[0], chunk[1]])
54                    } else {
55                        0
56                    }
57                })
58                .collect();
59            return FileReadResult::Text(String::from_utf16_lossy(&u16_words));
60        }
61    }
62
63    // Check for binary indicators
64    if is_binary_file(&bytes) {
65        return FileReadResult::Binary(String::from_utf8_lossy(&bytes).to_string());
66    }
67
68    // Try to decode as UTF-8
69    match String::from_utf8(bytes) {
70        Ok(text) => FileReadResult::Text(text),
71        Err(err) => FileReadResult::Binary(String::from_utf8_lossy(err.as_bytes()).to_string()),
72    }
73}
74
75/// Check if file content appears to be binary (not valid source code).
76///
77/// Matches TypeScript's binary detection:
78/// - UTF-16 BOM at start
79/// - Many consecutive null bytes (embedded binaries, corrupted files)
80/// - Repeated control bytes in first 1024 bytes
81pub(super) fn is_binary_file(bytes: &[u8]) -> bool {
82    if bytes.is_empty() {
83        return false;
84    }
85
86    // Check for many null bytes (binary file indicator)
87    // TypeScript considers files with many nulls as binary
88    let null_count = bytes.iter().take(1024).filter(|&&b| b == 0).count();
89    if null_count > 10 {
90        return true;
91    }
92
93    // Check for consecutive null bytes (UTF-16 or binary)
94    // UTF-16 text will have null bytes between ASCII characters
95    let mut consecutive_nulls = 0;
96    for &byte in bytes.iter().take(512) {
97        if byte == 0 {
98            consecutive_nulls += 1;
99            if consecutive_nulls >= 4 {
100                return true;
101            }
102        } else {
103            consecutive_nulls = 0;
104        }
105    }
106
107    // Check for non-whitespace control bytes (e.g. U+0000/Control-Range from garbled UTF-16 read as UTF-8)
108    let control_count = bytes
109        .iter()
110        .take(1024)
111        .filter(|&&b| {
112            b < 0x20 && b != b'\t' && b != b'\n' && b != b'\r' && b != b'\x0C' && b != b'\x0B'
113        })
114        .count();
115    if control_count >= 4 {
116        return true;
117    }
118
119    false
120}
121
122#[derive(Debug, Clone)]
123pub(super) struct SourceEntry {
124    pub(super) path: PathBuf,
125    pub(super) text: Option<String>,
126    /// If true, this file appears to be binary (emit TS1490)
127    pub(super) is_binary: bool,
128}
129
130pub(super) fn sources_have_no_default_lib(sources: &[SourceEntry]) -> bool {
131    sources.iter().any(source_has_no_default_lib)
132}
133
134pub(super) fn source_has_no_default_lib(source: &SourceEntry) -> bool {
135    if let Some(text) = source.text.as_deref() {
136        return has_no_default_lib_directive(text);
137    }
138    let Ok(text) = std::fs::read_to_string(&source.path) else {
139        return false;
140    };
141    has_no_default_lib_directive(&text)
142}
143
144pub(super) fn has_no_default_lib_directive(source: &str) -> bool {
145    for line in source.lines() {
146        let trimmed = line.trim_start();
147        if !trimmed.starts_with("///") {
148            if trimmed.is_empty() {
149                continue;
150            }
151            break;
152        }
153        if let Some(true) = parse_reference_no_default_lib_value(trimmed) {
154            return true;
155        }
156    }
157    false
158}
159
160pub(super) fn sources_have_no_types_and_symbols(sources: &[SourceEntry]) -> bool {
161    sources.iter().any(source_has_no_types_and_symbols)
162}
163
164pub(super) fn source_has_no_types_and_symbols(source: &SourceEntry) -> bool {
165    if let Some(text) = source.text.as_deref() {
166        return has_no_types_and_symbols_directive(text);
167    }
168    let Ok(text) = std::fs::read_to_string(&source.path) else {
169        return false;
170    };
171    has_no_types_and_symbols_directive(&text)
172}
173
174pub(crate) fn has_no_types_and_symbols_directive(source: &str) -> bool {
175    for line in source.lines().take(32) {
176        let trimmed = line.trim_start();
177        if !trimmed.starts_with("//") {
178            continue;
179        }
180
181        let lower = trimmed.to_ascii_lowercase();
182        let Some(idx) = lower.find("@notypesandsymbols") else {
183            continue;
184        };
185
186        let mut rest = &trimmed[idx + "@noTypesAndSymbols".len()..];
187        rest = rest.trim_start();
188        if !rest.starts_with(':') {
189            continue;
190        }
191        rest = rest[1..].trim_start();
192
193        let value = rest
194            .split(|c: char| c == ',' || c == ';' || c.is_whitespace())
195            .find(|s| !s.is_empty())
196            .unwrap_or("");
197        return value.eq_ignore_ascii_case("true");
198    }
199    false
200}
201
202pub(super) fn parse_reference_no_default_lib_value(line: &str) -> Option<bool> {
203    let needle = "no-default-lib";
204    let lower = line.to_ascii_lowercase();
205    let idx = lower.find(needle)?;
206    let mut rest = &line[idx + needle.len()..];
207    rest = rest.trim_start();
208    if !rest.starts_with('=') {
209        return None;
210    }
211    rest = rest[1..].trim_start();
212    let quote = rest.as_bytes().first().copied()?;
213    if quote != b'"' && quote != b'\'' {
214        return None;
215    }
216    let rest = &rest[1..];
217    let end = rest.find(quote as char)?;
218    let value = rest[..end].trim();
219    match value.to_ascii_lowercase().as_str() {
220        "true" => Some(true),
221        "false" => Some(false),
222        _ => None,
223    }
224}
225
226pub(super) struct SourceReadResult {
227    pub(super) sources: Vec<SourceEntry>,
228    pub(super) dependencies: FxHashMap<PathBuf, FxHashSet<PathBuf>>,
229    pub(super) type_reference_errors: Vec<(PathBuf, String)>,
230}
231
232pub(crate) fn find_tsconfig(cwd: &Path) -> Option<PathBuf> {
233    let candidate = cwd.join("tsconfig.json");
234    candidate
235        .is_file()
236        .then(|| canonicalize_or_owned(&candidate))
237}
238
239pub(crate) fn resolve_tsconfig_path(cwd: &Path, project: Option<&Path>) -> Result<Option<PathBuf>> {
240    let Some(project) = project else {
241        return Ok(find_tsconfig(cwd));
242    };
243
244    let mut candidate = if project.is_absolute() {
245        project.to_path_buf()
246    } else {
247        cwd.join(project)
248    };
249
250    if candidate.is_dir() {
251        candidate = candidate.join("tsconfig.json");
252    }
253
254    if !candidate.exists() {
255        bail!("tsconfig not found at {}", candidate.display());
256    }
257
258    if !candidate.is_file() {
259        bail!("project path is not a file: {}", candidate.display());
260    }
261
262    Ok(Some(canonicalize_or_owned(&candidate)))
263}
264
265pub(crate) fn load_config(path: Option<&Path>) -> Result<Option<TsConfig>> {
266    let Some(path) = path else {
267        return Ok(None);
268    };
269
270    let config = load_tsconfig(path)?;
271    Ok(Some(config))
272}
273
274/// Return type for config loading that includes removed-but-honored suppress flags.
275pub(crate) struct LoadedConfig {
276    pub config: Option<TsConfig>,
277    pub diagnostics: Vec<Diagnostic>,
278    pub suppress_excess_property_errors: bool,
279    pub suppress_implicit_any_index_errors: bool,
280}
281
282pub(crate) fn load_config_with_diagnostics(path: Option<&Path>) -> Result<LoadedConfig> {
283    let Some(path) = path else {
284        return Ok(LoadedConfig {
285            config: None,
286            diagnostics: Vec::new(),
287            suppress_excess_property_errors: false,
288            suppress_implicit_any_index_errors: false,
289        });
290    };
291
292    let parsed = load_tsconfig_with_diagnostics(path)?;
293    Ok(LoadedConfig {
294        config: Some(parsed.config),
295        diagnostics: parsed.diagnostics,
296        suppress_excess_property_errors: parsed.suppress_excess_property_errors,
297        suppress_implicit_any_index_errors: parsed.suppress_implicit_any_index_errors,
298    })
299}
300
301pub(crate) fn config_base_dir(cwd: &Path, tsconfig_path: Option<&Path>) -> PathBuf {
302    tsconfig_path
303        .and_then(|path| path.parent().map(Path::to_path_buf))
304        .unwrap_or_else(|| cwd.to_path_buf())
305}
306
307pub(super) fn build_discovery_options(
308    args: &CliArgs,
309    base_dir: &Path,
310    tsconfig_path: Option<&Path>,
311    config: Option<&TsConfig>,
312    out_dir: Option<&Path>,
313    resolved: &ResolvedCompilerOptions,
314) -> Result<FileDiscoveryOptions> {
315    let follow_links = env_flag("TSZ_FOLLOW_SYMLINKS");
316    if !args.files.is_empty() {
317        return Ok(FileDiscoveryOptions {
318            base_dir: base_dir.to_path_buf(),
319            files: args.files.clone(),
320            include: None,
321            exclude: None,
322            out_dir: out_dir.map(Path::to_path_buf),
323            follow_links,
324            allow_js: resolved.allow_js,
325        });
326    }
327
328    let Some(config) = config else {
329        bail!("no input files specified and no tsconfig.json found");
330    };
331    let Some(tsconfig_path) = tsconfig_path else {
332        bail!("no tsconfig.json path available");
333    };
334
335    let mut options = FileDiscoveryOptions::from_tsconfig(tsconfig_path, config, out_dir);
336    options.follow_links = follow_links;
337    options.allow_js = resolved.allow_js;
338    Ok(options)
339}
340
341/// Returns (resolved files, unresolved type names from tsconfig `types` array).
342pub(super) fn collect_type_root_files(
343    base_dir: &Path,
344    options: &ResolvedCompilerOptions,
345) -> (Vec<PathBuf>, Vec<String>) {
346    let roots = match options.type_roots.as_ref() {
347        Some(roots) => roots.clone(),
348        None => default_type_roots(base_dir),
349    };
350    if roots.is_empty() {
351        return (Vec::new(), Vec::new());
352    }
353
354    let mut files = std::collections::BTreeSet::new();
355    if let Some(types) = options.types.as_ref() {
356        let mut unresolved = Vec::new();
357        for name in types {
358            if let Some(entry) = resolve_type_package_from_roots(name, &roots, options) {
359                files.insert(entry);
360            } else {
361                unresolved.push(name.clone());
362            }
363        }
364        return (files.into_iter().collect(), unresolved);
365    }
366
367    for root in roots {
368        for package_root in collect_type_packages_from_root(&root) {
369            if let Some(entry) = resolve_type_package_entry(&package_root, options) {
370                files.insert(entry);
371            }
372        }
373    }
374
375    (files.into_iter().collect(), Vec::new())
376}
377
378pub(super) fn read_source_files(
379    paths: &[PathBuf],
380    base_dir: &Path,
381    options: &ResolvedCompilerOptions,
382    cache: Option<&CompilationCache>,
383    changed_paths: Option<&FxHashSet<PathBuf>>,
384) -> Result<SourceReadResult> {
385    let mut sources: FxHashMap<PathBuf, (Option<String>, bool)> = FxHashMap::default(); // (text, is_binary)
386    let mut dependencies: FxHashMap<PathBuf, FxHashSet<PathBuf>> = FxHashMap::default();
387    let mut seen = FxHashSet::default();
388    let mut pending = VecDeque::new();
389    let mut resolution_cache = ModuleResolutionCache::default();
390    let mut type_reference_errors = Vec::new();
391    let use_cache = cache.is_some() && changed_paths.is_some();
392
393    for path in paths {
394        let canonical = canonicalize_or_owned(path);
395        if seen.insert(canonical.clone()) {
396            pending.push_back(canonical);
397        }
398    }
399
400    while let Some(path) = pending.pop_front() {
401        // Use cached bind result only when we know the file hasn't changed
402        // (changed_paths is provided and this file is not in it)
403        if use_cache
404            && let Some(cache) = cache
405            && let Some(changed_paths) = changed_paths
406            && !changed_paths.contains(&path)
407            && let (Some(_), Some(cached_deps)) =
408                (cache.bind_cache.get(&path), cache.dependencies.get(&path))
409        {
410            dependencies.insert(path.clone(), cached_deps.clone());
411            sources.insert(path.clone(), (None, false)); // Cached files are not binary
412            for dep in cached_deps {
413                if seen.insert(dep.clone()) {
414                    pending.push_back(dep.clone());
415                }
416            }
417            continue;
418        }
419
420        // Read file with binary detection
421        let (text, is_binary) = match read_source_file(&path) {
422            FileReadResult::Text(t) => (t, false),
423            FileReadResult::Binary(text) => (text, true),
424            FileReadResult::Error(e) => {
425                return Err(anyhow::anyhow!("failed to read {}: {}", path.display(), e));
426            }
427        };
428        let (specifiers, type_refs) = if is_binary {
429            (vec![], vec![])
430        } else {
431            (
432                collect_module_specifiers_from_text(&path, &text),
433                tsz::checker::triple_slash_validator::extract_reference_types(&text),
434            )
435        };
436        let reference_paths = if is_binary || options.no_resolve {
437            vec![]
438        } else {
439            tsz::checker::triple_slash_validator::extract_reference_paths(&text)
440        };
441
442        sources.insert(path.clone(), (Some(text), is_binary));
443        let entry = dependencies.entry(path.clone()).or_default();
444
445        if !options.no_resolve {
446            for specifier in specifiers {
447                if let Some(resolved) = resolve_module_specifier(
448                    &path,
449                    &specifier,
450                    options,
451                    base_dir,
452                    &mut resolution_cache,
453                    &seen,
454                ) {
455                    let canonical = canonicalize_or_owned(&resolved);
456                    entry.insert(canonical.clone());
457                    if seen.insert(canonical.clone()) {
458                        pending.push_back(canonical);
459                    }
460                }
461            }
462        }
463
464        // Resolve /// <reference types="..." /> directives
465        if !type_refs.is_empty() && !options.no_resolve {
466            let type_roots = options
467                .type_roots
468                .clone()
469                .unwrap_or_else(|| default_type_roots(base_dir));
470            for (type_name, resolution_mode, _line) in type_refs {
471                let resolved =
472                    if let Some(ref mode) = resolution_mode {
473                        // With explicit resolution-mode, use exports map with the specified condition
474                        let candidates =
475                            crate::driver_resolution::type_package_candidates_pub(&type_name);
476                        let mut result = None;
477                        for root in &type_roots {
478                            for candidate in &candidates {
479                                let package_root = root.join(candidate);
480                                if package_root.is_dir()
481                                    && let Some(entry) =
482                                    crate::driver_resolution::resolve_type_package_entry_with_mode(
483                                        &package_root, mode, options,
484                                    )
485                                {
486                                    result = Some(entry);
487                                    break;
488                                }
489                            }
490                            if result.is_some() {
491                                break;
492                            }
493                        }
494                        result
495                    } else {
496                        resolve_type_package_from_roots(&type_name, &type_roots, options)
497                    };
498                if let Some(resolved) = resolved {
499                    let canonical = canonicalize_or_owned(&resolved);
500                    entry.insert(canonical.clone());
501                    if seen.insert(canonical.clone()) {
502                        pending.push_back(canonical);
503                    }
504                } else {
505                    type_reference_errors.push((path.clone(), type_name));
506                }
507            }
508        }
509
510        // Resolve /// <reference path="..." /> directives
511        if !reference_paths.is_empty() {
512            let base_dir = path.parent().unwrap_or_else(|| Path::new(""));
513            for (reference_path, _line_num, _quote_offset) in reference_paths {
514                if reference_path.is_empty() {
515                    continue;
516                }
517                let mut candidates = Vec::new();
518                let direct_reference = base_dir.join(&reference_path);
519                candidates.push(direct_reference);
520                if !reference_path.contains('.') {
521                    for ext in [".ts", ".tsx", ".d.ts"] {
522                        candidates.push(base_dir.join(format!("{reference_path}{ext}")));
523                    }
524                }
525
526                let Some(resolved_reference) = candidates
527                    .iter()
528                    .find(|candidate| candidate.is_file())
529                    .map(|candidate| canonicalize_or_owned(candidate))
530                else {
531                    continue;
532                };
533                entry.insert(resolved_reference.clone());
534                if seen.insert(resolved_reference.clone()) {
535                    pending.push_back(resolved_reference);
536                }
537            }
538        }
539    }
540
541    let mut list: Vec<SourceEntry> = sources
542        .into_iter()
543        .map(|(path, (text, is_binary))| SourceEntry {
544            path,
545            text,
546            is_binary,
547        })
548        .collect();
549    list.sort_by(|left, right| {
550        left.path
551            .to_string_lossy()
552            .cmp(&right.path.to_string_lossy())
553    });
554    Ok(SourceReadResult {
555        sources: list,
556        dependencies,
557        type_reference_errors,
558    })
559}