pypinindia 0.1.0

Rust library for Indian pincode lookup and geographical information
Documentation
//! Core functionality for Indian pincode data lookup

use crate::error::{PincodeError, Result};
use crate::models::{PincodeInfo, PostOfficeSummary, Statistics};
use once_cell::sync::Lazy;
use std::collections::{HashMap, HashSet};
use std::fs::File;
use std::path::Path;
use std::sync::Mutex;

/// Main struct for managing and querying Indian pincode data
pub struct PincodeData {
    data: Vec<PincodeInfo>,
}

impl PincodeData {
    /// Create a new PincodeData instance
    ///
    /// # Arguments
    ///
    /// * `data_file` - Optional path to CSV file. If None, uses bundled data.
    ///
    /// # Example
    ///
    /// ```rust
    /// use pypinindia::PincodeData;
    ///
    /// let data = PincodeData::new(None).unwrap();
    /// ```
    pub fn new(data_file: Option<&str>) -> Result<Self> {
        let data = Self::load_data(data_file)?;
        Ok(Self { data })
    }

    fn load_data(data_file: Option<&str>) -> Result<Vec<PincodeInfo>> {
        let path = match data_file {
            Some(p) => p.to_string(),
            None => {
                // In production, you'd bundle the CSV with include_bytes! or similar
                return Err(PincodeError::DataLoad(
                    "No data file provided. Please provide path to CSV file.".to_string(),
                ));
            }
        };

        if !Path::new(&path).exists() {
            return Err(PincodeError::DataLoad(format!(
                "Data file not found: {}",
                path
            )));
        }

        let file = File::open(&path)?;

        // Try to handle different encodings like the Python version
        // Use encoding_rs_io to handle UTF-8 with BOM and other encodings
        let transcoded = encoding_rs_io::DecodeReaderBytesBuilder::new()
            .encoding(Some(encoding_rs::UTF_8))
            .utf8_passthru(true)
            .build(file);

        let mut reader = csv::ReaderBuilder::new()
            .flexible(true) // Allow variable number of fields
            .from_reader(transcoded);

        let mut records = Vec::new();
        for result in reader.deserialize() {
            match result {
                Ok(record) => records.push(record),
                Err(e) => {
                    // Skip invalid records but continue processing
                    eprintln!("Warning: Skipping invalid record: {}", e);
                    continue;
                }
            }
        }

        if records.is_empty() {
            return Err(PincodeError::DataLoad(
                "Data file is empty or all records invalid".to_string(),
            ));
        }

        Ok(records)
    }

    fn validate_pincode(&self, pincode: &str) -> Result<String> {
        let pincode = pincode.trim();

        if pincode.len() != 6 || !pincode.chars().all(|c| c.is_ascii_digit()) {
            return Err(PincodeError::InvalidPincode(pincode.to_string()));
        }

        Ok(pincode.to_string())
    }

    /// Get complete information for a pincode
    ///
    /// # Example
    ///
    /// ```rust,no_run
    /// use pypinindia::PincodeData;
    ///
    /// let data = PincodeData::new(None).unwrap();
    /// let info = data.get_pincode_info("110001").unwrap();
    /// for office in info {
    ///     println!("Office: {}", office.officename);
    /// }
    /// ```
    pub fn get_pincode_info(&self, pincode: &str) -> Result<Vec<PincodeInfo>> {
        let pincode = self.validate_pincode(pincode)?;

        let results: Vec<PincodeInfo> = self
            .data
            .iter()
            .filter(|info| info.pincode == pincode)
            .cloned()
            .collect();

        if results.is_empty() {
            return Err(PincodeError::DataNotFound(pincode));
        }

        Ok(results)
    }

    /// Get state name for a pincode
    pub fn get_state(&self, pincode: &str) -> Result<String> {
        let info = self.get_pincode_info(pincode)?;
        Ok(info[0].statename.clone())
    }

    /// Get district name for a pincode
    pub fn get_district(&self, pincode: &str) -> Result<String> {
        let info = self.get_pincode_info(pincode)?;
        Ok(info[0].districtname.clone())
    }

    /// Get taluk name for a pincode
    pub fn get_taluk(&self, pincode: &str) -> Result<String> {
        let info = self.get_pincode_info(pincode)?;
        Ok(info[0].taluk.clone())
    }

    /// Get all office names for a pincode
    pub fn get_offices(&self, pincode: &str) -> Result<Vec<String>> {
        let info = self.get_pincode_info(pincode)?;
        Ok(info.iter().map(|i| i.officename.clone()).collect())
    }

    /// Search pincodes by state name
    pub fn search_by_state(&self, state_name: &str) -> Vec<String> {
        let state_upper = state_name.trim().to_uppercase();
        let mut pincodes: HashSet<String> = self
            .data
            .iter()
            .filter(|info| info.statename.trim().to_uppercase() == state_upper)
            .map(|info| info.pincode.clone())
            .collect();

        let mut result: Vec<String> = pincodes.drain().collect();
        result.sort();
        result
    }

    /// Search pincodes by district name
    pub fn search_by_district(&self, district_name: &str, state_name: Option<&str>) -> Vec<String> {
        let district_upper = district_name.trim().to_uppercase();

        let filtered: Vec<&PincodeInfo> = self
            .data
            .iter()
            .filter(|info| {
                let district_match = info.districtname.trim().to_uppercase() == district_upper;

                if let Some(state) = state_name {
                    let state_match =
                        info.statename.trim().to_uppercase() == state.trim().to_uppercase();
                    district_match && state_match
                } else {
                    district_match
                }
            })
            .collect();

        let mut pincodes: HashSet<String> =
            filtered.iter().map(|info| info.pincode.clone()).collect();

        let mut result: Vec<String> = pincodes.drain().collect();
        result.sort();
        result
    }

    /// Search pincodes by taluk name
    pub fn search_by_taluk(
        &self,
        taluk_name: &str,
        state_name: Option<&str>,
        district_name: Option<&str>,
    ) -> Vec<String> {
        let taluk_upper = taluk_name.trim().to_uppercase();

        let filtered: Vec<&PincodeInfo> = self
            .data
            .iter()
            .filter(|info| {
                let taluk_match = info.taluk.trim().to_uppercase() == taluk_upper;

                let state_match = state_name.is_none_or(|s| {
                    info.statename.trim().to_uppercase() == s.trim().to_uppercase()
                });

                let district_match = district_name.is_none_or(|d| {
                    info.districtname.trim().to_uppercase() == d.trim().to_uppercase()
                });

                taluk_match && state_match && district_match
            })
            .collect();

        let mut pincodes: HashSet<String> =
            filtered.iter().map(|info| info.pincode.clone()).collect();

        let mut result: Vec<String> = pincodes.drain().collect();
        result.sort();
        result
    }

    /// Search by office name (partial match)
    pub fn search_by_office(&self, office_name: &str) -> Vec<PincodeInfo> {
        let office_upper = office_name.trim().to_uppercase();

        self.data
            .iter()
            .filter(|info| info.officename.to_uppercase().contains(&office_upper))
            .cloned()
            .collect()
    }

    /// Get list of all states
    pub fn get_states(&self) -> Vec<String> {
        let mut states: HashSet<String> = self
            .data
            .iter()
            .map(|info| info.statename.clone())
            .collect();

        let mut result: Vec<String> = states.drain().collect();
        result.sort();
        result
    }

    /// Get list of all districts, optionally filtered by state
    pub fn get_districts(&self, state_name: Option<&str>) -> Vec<String> {
        let filtered: Vec<&PincodeInfo> = if let Some(state) = state_name {
            let state_upper = state.trim().to_uppercase();
            self.data
                .iter()
                .filter(|info| info.statename.trim().to_uppercase() == state_upper)
                .collect()
        } else {
            self.data.iter().collect()
        };

        let mut districts: HashSet<String> = filtered
            .iter()
            .map(|info| info.districtname.clone())
            .collect();

        let mut result: Vec<String> = districts.drain().collect();
        result.sort();
        result
    }

    /// Get list of all unique taluks
    pub fn get_unique_taluks(
        &self,
        state_name: Option<&str>,
        district_name: Option<&str>,
    ) -> Vec<String> {
        let filtered: Vec<&PincodeInfo> = self
            .data
            .iter()
            .filter(|info| {
                let state_match = state_name.is_none_or(|s| {
                    info.statename.trim().to_uppercase() == s.trim().to_uppercase()
                });

                let district_match = district_name.is_none_or(|d| {
                    info.districtname.trim().to_uppercase() == d.trim().to_uppercase()
                });

                state_match && district_match
            })
            .collect();

        let mut taluks: HashSet<String> = filtered.iter().map(|info| info.taluk.clone()).collect();

        let mut result: Vec<String> = taluks.drain().collect();
        result.sort();
        result
    }

    /// Get dataset statistics
    pub fn get_statistics(&self) -> Statistics {
        let unique_pincodes: HashSet<_> = self.data.iter().map(|i| &i.pincode).collect();
        let unique_states: HashSet<_> = self.data.iter().map(|i| &i.statename).collect();
        let unique_districts: HashSet<_> = self.data.iter().map(|i| &i.districtname).collect();
        let unique_offices: HashSet<_> = self.data.iter().map(|i| &i.officename).collect();

        Statistics {
            total_records: self.data.len(),
            unique_pincodes: unique_pincodes.len(),
            unique_states: unique_states.len(),
            unique_districts: unique_districts.len(),
            unique_offices: unique_offices.len(),
        }
    }

    /// Get post office summary for a pincode
    pub fn get_postoffice_summary(&self, pincode: &str) -> Result<PostOfficeSummary> {
        let info = self.get_pincode_info(pincode)?;

        let mut types: HashMap<String, usize> = HashMap::new();
        let mut delivery_statuses: HashMap<String, usize> = HashMap::new();

        for office in &info {
            *types.entry(office.officetype.clone()).or_insert(0) += 1;
            *delivery_statuses
                .entry(office.delivery_status.clone())
                .or_insert(0) += 1;
        }

        Ok(PostOfficeSummary {
            total: info.len(),
            types,
            delivery_statuses,
        })
    }
}

// Global singleton instance
static DEFAULT_INSTANCE: Lazy<Mutex<Option<PincodeData>>> = Lazy::new(|| Mutex::new(None));

fn get_default_instance() -> Result<PincodeData> {
    let mut instance = DEFAULT_INSTANCE.lock().unwrap();

    if instance.is_none() {
        // Try to load from environment variable or default location
        let data_file = std::env::var("PYPININDIA_DATA_FILE").ok();
        *instance = Some(PincodeData::new(data_file.as_deref())?);
    }

    // Clone the data (this is acceptable for convenience functions)
    Ok(PincodeData {
        data: instance.as_ref().unwrap().data.clone(),
    })
}

// Convenience functions

/// Get complete pincode information (convenience function)
pub fn get_pincode_info(pincode: &str) -> Result<Vec<PincodeInfo>> {
    get_default_instance()?.get_pincode_info(pincode)
}

/// Get state name for a pincode (convenience function)
pub fn get_state(pincode: &str) -> Result<String> {
    get_default_instance()?.get_state(pincode)
}

/// Get district name for a pincode (convenience function)
pub fn get_district(pincode: &str) -> Result<String> {
    get_default_instance()?.get_district(pincode)
}

/// Get taluk name for a pincode (convenience function)
pub fn get_taluk(pincode: &str) -> Result<String> {
    get_default_instance()?.get_taluk(pincode)
}

/// Get office names for a pincode (convenience function)
pub fn get_offices(pincode: &str) -> Result<Vec<String>> {
    get_default_instance()?.get_offices(pincode)
}