mod big_api;
mod db_create;
mod geometry;
mod parse;
mod pdf;
pub use parse::ParseOutputDetail;
use std::path::{Path, PathBuf};
pub const DATA_DECREE: &str = "Kepmendagri No 300.2.2-2138 Tahun 2025";
const PDF_URL: &str =
"https://drive.google.com/uc?export=download&id=1o_m621D00TtwCwQMLn8XUnV3nolamPDm";
const BIG_API_URL: &str =
"https://geoservices.big.go.id/gis/rest/services/BAPANAS/Batas_Administrasi/MapServer/2/query";
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum RingClassification {
SeparateRings,
ClassifyHoles,
}
pub struct PipelineError {
message: String,
source: Option<Box<dyn std::error::Error + Send + Sync>>,
}
impl PipelineError {
pub fn new(msg: impl Into<String>) -> Self {
PipelineError {
message: msg.into(),
source: None,
}
}
pub fn context(self, msg: impl Into<String>) -> Self {
PipelineError {
message: msg.into(),
source: Some(Box::new(self)),
}
}
}
impl std::fmt::Display for PipelineError {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "{}", self.message)
}
}
impl std::fmt::Debug for PipelineError {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
if let Some(src) = &self.source {
write!(f, "PipelineError({}, source: {})", self.message, src)
} else {
write!(f, "PipelineError({})", self.message)
}
}
}
impl std::error::Error for PipelineError {
fn source(&self) -> Option<&(dyn std::error::Error + 'static)> {
self.source
.as_ref()
.map(|e| e.as_ref() as &(dyn std::error::Error + 'static))
}
}
trait PipelineResultExt<T> {
fn ctx(self, msg: impl Into<String>) -> Result<T, PipelineError>;
}
impl<T, E: std::error::Error + Send + Sync + 'static> PipelineResultExt<T> for Result<T, E> {
fn ctx(self, msg: impl Into<String>) -> Result<T, PipelineError> {
self.map_err(|e| PipelineError {
message: msg.into(),
source: Some(Box::new(e)),
})
}
}
impl From<std::io::Error> for PipelineError {
fn from(e: std::io::Error) -> Self {
PipelineError {
message: e.to_string(),
source: Some(Box::new(e)),
}
}
}
impl From<rusqlite::Error> for PipelineError {
fn from(e: rusqlite::Error) -> Self {
PipelineError {
message: e.to_string(),
source: Some(Box::new(e)),
}
}
}
impl From<serde_json::Error> for PipelineError {
fn from(e: serde_json::Error) -> Self {
PipelineError {
message: e.to_string(),
source: Some(Box::new(e)),
}
}
}
pub struct PipelineOutput {
pub db_path: PathBuf,
pub poly_db_path: Option<PathBuf>,
pub parsed_villages_path: Option<PathBuf>,
pub village_count: usize,
pub sha256: String,
}
pub struct Pipeline {
pdf_url: String,
big_api_url: String,
cache_dir: PathBuf,
output: PathBuf,
decree: String,
force_refresh_big: bool,
ring_classification: RingClassification,
include_polygons: bool,
save_parsed_villages: Option<parse::ParseOutputDetail>,
}
impl Pipeline {
pub fn new() -> Self {
Self {
pdf_url: PDF_URL.to_string(),
big_api_url: BIG_API_URL.to_string(),
cache_dir: PathBuf::from("data/cache"),
output: PathBuf::from("data/locations.db"),
decree: DATA_DECREE.to_string(),
force_refresh_big: false,
ring_classification: RingClassification::SeparateRings,
include_polygons: false,
save_parsed_villages: None,
}
}
pub fn pdf_url(mut self, url: &str) -> Self {
self.pdf_url = url.to_string();
self
}
pub fn big_api_url(mut self, url: &str) -> Self {
self.big_api_url = url.to_string();
self
}
pub fn cache_dir(mut self, dir: &Path) -> Self {
self.cache_dir = dir.to_path_buf();
self
}
pub fn output(mut self, path: &Path) -> Self {
self.output = path.to_path_buf();
self
}
pub fn decree(mut self, decree: &str) -> Self {
self.decree = decree.to_string();
self
}
pub fn force_refresh_big(mut self, yes: bool) -> Self {
self.force_refresh_big = yes;
self
}
pub fn ring_classification(mut self, mode: RingClassification) -> Self {
self.ring_classification = mode;
self
}
pub fn include_polygons(mut self, yes: bool) -> Self {
self.include_polygons = yes;
self
}
pub fn save_parsed_villages(mut self, detail: parse::ParseOutputDetail) -> Self {
self.save_parsed_villages = Some(detail);
self
}
pub fn run(self) -> Result<PipelineOutput, PipelineError> {
eprintln!("Starting pipeline...");
let pdf_path = pdf::ensure_pdf(&self.pdf_url, &self.cache_dir)?;
let text = pdf::extract_text(&pdf_path)?;
let villages = parse::parse_villages(&text);
let parsed_villages_path = if let Some(detail) = self.save_parsed_villages {
let path = self.cache_dir.join("parsed_villages.json");
parse::save_parsed_villages(&villages, detail, &path)?;
Some(path)
} else {
None
};
let big_data = big_api::fetch_big_data(
&self.big_api_url,
&self.cache_dir,
self.force_refresh_big,
self.include_polygons,
)?;
let merged = db_create::merge_villages(&villages, &big_data);
let build_date = std::time::SystemTime::now()
.duration_since(std::time::UNIX_EPOCH)
.unwrap()
.as_secs();
db_create::build_db(&merged, &self.output, &self.decree, "official", build_date)?;
let poly_db_path = if self.include_polygons {
let poly_path = self.output.with_extension("poly.db");
db_create::build_poly_db(&big_data, &poly_path, self.ring_classification)?;
Some(poly_path)
} else {
None
};
let sha256 = db_create::compute_sha256(&self.output)?;
let village_count = merged.len();
eprintln!("Pipeline completed successfully.");
Ok(PipelineOutput {
db_path: self.output,
poly_db_path,
parsed_villages_path,
village_count,
sha256,
})
}
}
impl Default for Pipeline {
fn default() -> Self {
Self::new()
}
}