src2md/
lib.rs

1//! # src2md Library
2//!
3//! This crate can be used to:
4//!
5//! - Collect all source/text files from a project and compile them into a Markdown file
6//! - Restore original source files back from a generated Markdown file
7//! - Clone and process git repositories (with the `git` feature)
8//!
9//! ## Features
10//!
11//! - `git` - Enables git repository cloning support via `--git <url>`
12//!
13//! ## Default Exclusions
14//!
15//! The following are always excluded by default:
16//! - Hidden files and directories (starting with `.`)
17//! - Lock files (package-lock.json, yarn.lock, Cargo.lock, etc.)
18//! - Previous src2md output files
19//!
20//! ## Usage
21//!
22//! ### To generate a Markdown file:
23//!
24//! ```rust,no_run
25//! use src2md::{Config, run_src2md};
26//! use std::collections::HashSet;
27//! use std::path::PathBuf;
28//!
29//! #[tokio::main]
30//! async fn main() -> anyhow::Result<()> {
31//!     let config = Config {
32//!         output_path: PathBuf::from("output.md"),
33//!         ignore_file: None,
34//!         specific_paths: HashSet::new(),
35//!         project_root: std::env::current_dir()?,
36//!         restore_input: None,
37//!         restore_path: None,
38//!         verbosity: 0,
39//!         fail_fast: true,
40//!         extensions: HashSet::new(), // empty = include all
41//!         #[cfg(feature = "git")]
42//!         git_url: None,
43//!         #[cfg(feature = "git")]
44//!         git_branch: None,
45//!     };
46//!
47//!     run_src2md(config).await
48//! }
49//! ```
50//!
51//! ### To restore files from a Markdown file:
52//!
53//! ```rust,no_run
54//! use src2md::extract_from_markdown;
55//! use std::path::PathBuf;
56//!
57//! #[tokio::main]
58//! async fn main() -> anyhow::Result<()> {
59//!     extract_from_markdown(
60//!         &PathBuf::from("generated.md"),
61//!         Some(&PathBuf::from("restored/")),
62//!     ).await
63//! }
64//! ```
65
66pub mod cli;
67pub mod extractor;
68pub mod filewalker;
69pub mod utils;
70pub mod writer;
71
72#[cfg(feature = "git")]
73pub mod git;
74
75pub use cli::Config;
76pub use extractor::extract_from_markdown;
77pub use filewalker::collect_files;
78pub use writer::{MarkdownWriter, OUTPUT_MAGIC_BYTES, OUTPUT_MAGIC_HEADER};
79
80#[cfg(feature = "git")]
81pub use git::{clone_repository, repo_name_from_url, ClonedRepo};
82
83use anyhow::Result;
84use log::error;
85use tokio::fs::File;
86use tokio::io::BufWriter;
87
88/// Generate a Markdown file from source/text files
89///
90/// If `fail_fast` is true in the config, stops on first error.
91/// Otherwise, logs errors and continues processing remaining files.
92///
93/// # Output File Handling
94///
95/// The output file and any previous src2md outputs are automatically excluded
96/// from collection to prevent:
97/// - Race conditions (writing while reading the same file)
98/// - Self-inclusion (including previous outputs in new outputs)
99///
100/// # Default Exclusions
101///
102/// Hidden files, lock files, and previous src2md outputs are always excluded.
103/// Use the `extensions` field to filter by file type.
104pub async fn run_src2md(config: Config) -> Result<()> {
105    let file = File::create(&config.output_path).await?;
106    let buf_writer = BufWriter::new(file);
107    let mut md_writer = MarkdownWriter::new(buf_writer);
108
109    let entries = collect_files(
110        &config.project_root,
111        config.ignore_file.as_ref(),
112        &config.specific_paths,
113        Some(&config.output_path),
114        &config.extensions,
115    )?;
116
117    for entry in entries {
118        if let Err(e) = md_writer.write_entry(&entry, &config.project_root).await {
119            if config.fail_fast {
120                return Err(e);
121            }
122            error!("Failed to write {}: {e}", entry.path().display());
123        }
124    }
125
126    md_writer.flush().await?;
127    Ok(())
128}
129
130/// Generate a Markdown file from a specific directory path.
131///
132/// This is a convenience function that creates a Config and runs src2md.
133/// Useful when you have a path (e.g., from a cloned git repo) and want to
134/// process it without constructing a full Config.
135pub async fn run_src2md_on_path(
136    project_root: std::path::PathBuf,
137    output_path: std::path::PathBuf,
138    ignore_file: Option<std::path::PathBuf>,
139    extensions: &std::collections::HashSet<String>,
140    fail_fast: bool,
141) -> Result<()> {
142    let file = File::create(&output_path).await?;
143    let buf_writer = BufWriter::new(file);
144    let mut md_writer = MarkdownWriter::new(buf_writer);
145
146    let entries = collect_files(
147        &project_root,
148        ignore_file.as_ref(),
149        &std::collections::HashSet::new(),
150        Some(&output_path),
151        extensions,
152    )?;
153
154    for entry in entries {
155        if let Err(e) = md_writer.write_entry(&entry, &project_root).await {
156            if fail_fast {
157                return Err(e);
158            }
159            error!("Failed to write {}: {e}", entry.path().display());
160        }
161    }
162
163    md_writer.flush().await?;
164    Ok(())
165}