sklears_neighbors/
lib.rs

1#![allow(dead_code)]
2#![allow(non_snake_case)]
3#![allow(missing_docs)]
4#![allow(deprecated)]
5#![allow(clippy::all)]
6#![allow(clippy::pedantic)]
7#![allow(clippy::nursery)]
8#![allow(clippy::type_complexity)]
9#![allow(clippy::needless_range_loop)]
10#![allow(clippy::iter_kv_map)]
11#![allow(clippy::enum_variant_names)]
12#![allow(clippy::assign_op_pattern)]
13#![allow(clippy::explicit_counter_loop)]
14#![allow(clippy::unused_enumerate_index)]
15#![allow(clippy::manual_map)]
16#![allow(clippy::manual_clamp)]
17#![allow(clippy::manual_unwrap_or_default)]
18//! Neighbor-based algorithms for machine learning
19//!
20//! This crate provides k-nearest neighbors (k-NN) and related algorithms for
21//! classification, regression, and outlier detection.
22//!
23//! # Examples
24//!
25//! ```rust
26//! use sklears_neighbors::KNeighborsClassifier;
27//! use sklears_core::traits::{Fit, Predict};
28//! use scirs2_core::ndarray::{array, Array2};
29//!
30//! // Create training data
31//! let X = Array2::from_shape_vec((4, 2), vec![
32//!     1.0, 2.0,
33//!     2.0, 3.0,
34//!     3.0, 1.0,
35//!     4.0, 2.0,
36//! ]).unwrap();
37//! let y = array![0, 0, 1, 1];
38//!
39//! // Train classifier
40//! let classifier = KNeighborsClassifier::new(3);
41//! let fitted = classifier.fit(&X, &y).unwrap();
42//!
43//! // Make predictions
44//! let X_test = Array2::from_shape_vec((2, 2), vec![
45//!     1.5, 2.5,
46//!     3.5, 1.5,
47//! ]).unwrap();
48//! let predictions = fitted.predict(&X_test).unwrap();
49//! ```
50
51pub mod abod;
52pub mod adaptive_distance;
53pub mod advanced_outliers;
54pub mod approximate_distance;
55pub mod batch_processing;
56pub mod bayesian_neighbors;
57pub mod bioinformatics;
58pub mod compressed_distance;
59pub mod computer_vision;
60pub mod cross_validation;
61pub mod density_estimation;
62pub mod distance;
63pub mod distributed_neighbors;
64pub mod federated_neighbors;
65pub mod gpu_distance;
66pub mod graph_methods;
67pub mod incremental_index;
68pub mod interpretability;
69pub mod knn;
70pub mod local_outlier_factor;
71pub mod lsh;
72pub mod manifold_learning;
73pub mod mapreduce_neighbors;
74pub mod memory_constrained;
75pub mod memory_mapped;
76pub mod metric_learning;
77pub mod multi_view_learning;
78pub mod nearest_centroid;
79pub mod nearest_neighbors;
80pub mod nlp;
81pub mod online_learning;
82pub mod parallel_tree;
83pub mod performance;
84pub mod radius_neighbors;
85pub mod simd_distance;
86pub mod sparse_neighbors;
87pub mod spatial;
88pub mod specialized_distances;
89pub mod streaming;
90pub mod time_series_neighbors;
91pub mod transformers;
92pub mod tree;
93pub mod validation;
94
95#[allow(non_snake_case)]
96#[cfg(test)]
97mod property_tests;
98
99#[allow(non_snake_case)]
100#[cfg(test)]
101pub mod comprehensive_tests;
102
103pub use abod::AngleBasedOutlierDetection;
104pub use adaptive_distance::{
105    AdaptiveDensityDistance, CombinationMethod, ContextDependentDistance, EnsembleDistance,
106    OnlineAdaptiveDistance,
107};
108pub use advanced_outliers::{
109    ConnectivityBasedOutlierFactor, IsolationForest, LocalCorrelationIntegral,
110};
111pub use approximate_distance::ApproximateDistance;
112pub use batch_processing::{
113    BatchConfiguration, BatchNeighborSearch, BatchProcessable, BatchProcessor, BatchResult,
114    BatchStatistics, MemoryMonitor,
115};
116pub use bayesian_neighbors::{
117    BayesianKNeighborsClassifier, BayesianKNeighborsRegressor, BayesianPrediction,
118    BayesianRegressionPrediction, CredibleNeighborSet, UncertaintyMethod,
119};
120pub use bioinformatics::{
121    BioSearchConfig, GeneExpressionNeighbors, GeneExpressionResult, GeneMetadata, KmerIndex,
122    ProteinMetadata, ProteinSearchResult, ProteinStructure, ProteinStructureSearch, ScoringScheme,
123    SequenceAligner, SequenceAlignment, SequenceMetadata, SequenceSearchResult,
124    SequenceSimilaritySearch, SequenceType,
125};
126pub use compressed_distance::{CompressedDistanceMatrix, CompressionMethod, CompressionStats};
127pub use computer_vision::{
128    ColorHistogramExtractor, DescriptorMatch, FeatureDescriptorMatcher, FeatureExtractor,
129    FeatureType, HistogramOfGradientsExtractor, ImageMetadata, ImageSearchConfig,
130    ImageSearchResult, ImageSimilaritySearch, Keypoint, LocalBinaryPatternExtractor,
131    PatchBasedMatching, VisualWordRecognizer,
132};
133pub use cross_validation::{CVFoldResult, CVResults, CVStrategy, NeighborCrossValidator};
134pub use density_estimation::{
135    BandwidthMethod, DensityBasedClustering, KNeighborsDensityEstimator, KernelType,
136    LocalDensityEstimator, VariableBandwidthKDE,
137};
138pub use distance::Distance;
139pub use distributed_neighbors::{
140    DataPartitioner, DistributedConfiguration, DistributedNeighborSearch,
141    DistributedNeighborSearchResult, DistributedWorker, LoadBalanceStrategy, PartitionInfo,
142    PartitionStats,
143};
144pub use federated_neighbors::{
145    FederatedConfig, FederatedNeighborCoordinator, FederatedParticipant, NoiseStrategy,
146    PrivacyLevel, PrivacyPreservingProtocol,
147};
148pub use gpu_distance::{
149    GpuBackend, GpuComputationStats, GpuConfig, GpuDeviceInfo, GpuDistanceCalculator,
150    GpuDistanceResult, GpuKNeighborsSearch, GpuMemoryEstimator, GpuMemoryStrategy,
151};
152pub use graph_methods::{
153    EpsilonGraph, GabrielGraph, GraphEdge, GraphNeighborSearch, GraphStatistics,
154    KNearestNeighborGraph, MutualKNearestNeighbors, NeighborhoodGraph, RelativeNeighborhoodGraph,
155};
156pub use incremental_index::{
157    IncrementalIndexBuilder, IncrementalIndexType, IncrementalNeighborIndex,
158    IndexPerformanceMetrics, UpdateStrategy,
159};
160pub use interpretability::{
161    InfluenceAnalysis, LocalImportanceExplanation, NeighborExplainer, NeighborExplanation,
162    Prototype,
163};
164pub use knn::{KNeighborsClassifier, KNeighborsRegressor};
165pub use local_outlier_factor::LocalOutlierFactor;
166pub use lsh::{HashFamily, LshIndex, LshKNeighborsClassifier};
167pub use manifold_learning::{Isomap, LaplacianEigenmaps, LocallyLinearEmbedding, TSNENeighbors};
168pub use mapreduce_neighbors::{
169    DistributedMapReduce, MapReduceConfig, MapReduceNeighborSearch, PartitionStrategy,
170    ReduceStrategy,
171};
172pub use memory_constrained::{
173    CacheObliviousNeighbors, ExternalMemoryKNN, MemoryBoundedApproximateNeighbors,
174};
175pub use memory_mapped::{MmapNeighborIndex, MmapNeighborIndexBuilder};
176pub use metric_learning::{
177    EnhancedLMNN, InformationTheoreticMetricLearning, LargeMarginNearestNeighbor,
178    NeighborhoodComponentsAnalysis, OnlineMetricLearning,
179};
180pub use multi_view_learning::{
181    ConsensusAnalysis, FusionStrategy, MultiViewKNeighborsClassifier, MultiViewKNeighborsRegressor,
182    RegressionFusionStrategy, ViewConfig,
183};
184pub use nearest_centroid::{CentroidType, ClassConfig, NearestCentroid};
185pub use nearest_neighbors::{kneighbors_graph, radius_neighbors_graph, NearestNeighbors};
186pub use nlp::{
187    DocumentFeatureExtractor, DocumentMetadata, DocumentSearchResult, DocumentSimilaritySearch,
188    NlpSearchConfig, SentenceSimilaritySearch, TextFeatureType, TextPreprocessor, TfIdfExtractor,
189    WordEmbeddingSearch,
190};
191pub use online_learning::{
192    AdaptiveKNeighborsClassifier, DriftDetectionMethod, DriftDetector, StreamingOutlierDetector,
193};
194pub use parallel_tree::{ParallelBuildStrategy, ParallelTreeBuilder, ParallelTreeIndex, WorkUnit};
195pub use performance::{
196    BenchmarkConfig, BenchmarkResult, NeighborBenchmark, PerformanceMetrics, QuickProfiler,
197};
198pub use radius_neighbors::{
199    AdaptiveRadiusNeighborsClassifier, AdaptiveRadiusNeighborsRegressor, RadiusNeighborsClassifier,
200    RadiusNeighborsRegressor, RadiusStrategy,
201};
202pub use simd_distance::{
203    batch_euclidean_distances, pairwise_distances_simd, SimdCapability, SimdDistanceCalculator,
204};
205pub use sparse_neighbors::{SparseIndexType, SparseNeighborBuilder, SparseNeighborMatrix};
206pub use spatial::{
207    OctPoint, OctTree, QuadPoint, QuadTree, RTree, Rectangle, SpatialHash, SpatialHashStats,
208};
209pub use specialized_distances::{
210    CategoricalDistance, GraphDistance, ProbabilisticDistance, SetDistance, SimpleGraph,
211    StringDistance,
212};
213pub use streaming::{
214    IncrementalKNeighborsClassifier, IncrementalKNeighborsRegressor, MemoryStrategy,
215};
216pub use time_series_neighbors::{
217    DtwDistance, DtwStepPattern, Shapelet, ShapeletDiscovery, StreamingTimeSeriesNeighbors,
218    SubsequenceSearch, TemporalNeighborSearch,
219};
220pub use transformers::{KNeighborsTransformer, RadiusNeighborsTransformer};
221pub use type_safe_distance::{
222    ChebyshevMetric, ComputeDistance, CosineMetric, EuclideanMetric, ManhattanMetric,
223    MetricDistance, MinkowskiMetric, NonMetricDistance, NormalizedDistance, TypeSafeDistance,
224    TypeSafeKnnConfig,
225};
226pub use validation::{
227    BootstrapResult, BootstrapValidator, ClassificationMetric, CrossValidationResult, GridSearchCV,
228    GridSearchResult, KFoldValidator, RegressionMetric,
229};
230
231use sklears_core::types::Float;
232
233/// Common error type for neighbors algorithms
234#[derive(thiserror::Error, Debug)]
235pub enum NeighborsError {
236    #[error("Invalid number of neighbors: {0}")]
237    InvalidNeighbors(usize),
238    #[error("Invalid radius: {0}")]
239    InvalidRadius(Float),
240    #[error("Empty input data")]
241    EmptyInput,
242    #[error("Shape mismatch: expected {expected:?}, got {actual:?}")]
243    ShapeMismatch {
244        expected: Vec<usize>,
245        actual: Vec<usize>,
246    },
247    #[error("No neighbors found")]
248    NoNeighbors,
249    #[error("Invalid input: {0}")]
250    InvalidInput(String),
251}
252
253impl From<NeighborsError> for sklears_core::error::SklearsError {
254    fn from(err: NeighborsError) -> Self {
255        sklears_core::error::SklearsError::InvalidInput(err.to_string())
256    }
257}
258
259impl From<sklears_core::error::SklearsError> for NeighborsError {
260    fn from(err: sklears_core::error::SklearsError) -> Self {
261        NeighborsError::InvalidInput(err.to_string())
262    }
263}
264
265impl From<scirs2_core::ndarray::ShapeError> for NeighborsError {
266    fn from(err: scirs2_core::ndarray::ShapeError) -> Self {
267        NeighborsError::InvalidInput(format!("Shape error: {}", err))
268    }
269}
270
271/// Type alias for neighbors results
272pub type NeighborsResult<T> = std::result::Result<T, NeighborsError>;
273pub mod type_safe_distance;