using System.Runtime.InteropServices;
using Native = UInsight.Interop.NativeLibrary;
using NativeStructs = UInsight.Interop.NativeStructs;
namespace UInsight;
public sealed class InsightClient : IDisposable
{
private bool _disposed;
public string GetVersion() => Native.GetVersion();
#region Profiling
public ProfileResult ProfileCsv(string csvData)
{
var ctx = Native.insight_profile_csv(csvData);
if (ctx == IntPtr.Zero)
throw new InsightException(-1, "Failed to profile CSV data");
return BuildProfileResult(ctx);
}
public ProfileResult ProfileJson(string jsonData)
{
var ctx = Native.insight_profile_json(jsonData);
if (ctx == IntPtr.Zero)
throw new InsightException(-1, "Failed to profile JSON data");
return BuildProfileResult(ctx);
}
private static ProfileResult BuildProfileResult(IntPtr ctx)
{
try
{
var rows = Native.insight_profile_row_count(ctx);
var cols = Native.insight_profile_col_count(ctx);
var columns = new ColumnSummary[(int)cols];
for (uint i = 0; i < (uint)cols; i++)
{
var native = new NativeStructs.CColumnSummary();
Native.ThrowIfFailed(Native.insight_profile_column(ctx, i, ref native));
var dataType = Enum.IsDefined(typeof(InsightDataType), native.DataType)
? (InsightDataType)native.DataType
: InsightDataType.Text;
columns[i] = new ColumnSummary
{
Index = native.Index,
ValidCount = native.ValidCount,
NullCount = native.NullCount,
DataType = dataType,
Mean = native.Mean,
StdDev = native.StdDev,
Min = native.Min,
Max = native.Max
};
}
return new ProfileResult { RowCount = rows, ColumnCount = cols, Columns = columns };
}
finally
{
Native.insight_profile_free(ctx);
}
}
#endregion
#region Clustering
public KMeansResult KMeans(double[,] data, uint k)
{
var (nRows, nCols, flat) = Flatten(data);
var native = new NativeStructs.CKMeansResult();
unsafe
{
fixed (double* ptr = flat)
{
Native.ThrowIfFailed(Native.insight_kmeans(ptr, nRows, nCols, k, ref native));
}
}
try
{
return new KMeansResult
{
K = native.K,
Wcss = native.Wcss,
Iterations = native.Iterations,
Labels = CopyU32Array(native.Labels, native.NLabels)
};
}
finally
{
Native.insight_free_labels(native.Labels, native.NLabels);
}
}
public KMeansResult MiniBatchKMeans(double[,] data, uint k, uint batchSize = 100, uint maxIter = 100, ulong seed = 42)
{
var (nRows, nCols, flat) = Flatten(data);
var native = new NativeStructs.CKMeansResult();
unsafe
{
fixed (double* ptr = flat)
{
Native.ThrowIfFailed(
Native.insight_mini_batch_kmeans(ptr, nRows, nCols, k, batchSize, maxIter, seed, ref native));
}
}
try
{
return new KMeansResult
{
K = native.K,
Wcss = native.Wcss,
Iterations = native.Iterations,
Labels = CopyU32Array(native.Labels, native.NLabels)
};
}
finally
{
Native.insight_free_labels(native.Labels, native.NLabels);
}
}
public DbscanResult Dbscan(double[,] data, double epsilon, uint minSamples)
{
var (nRows, nCols, flat) = Flatten(data);
var native = new NativeStructs.CDbscanResult();
unsafe
{
fixed (double* ptr = flat)
{
Native.ThrowIfFailed(
Native.insight_dbscan(ptr, nRows, nCols, epsilon, minSamples, ref native));
}
}
try
{
return new DbscanResult
{
NClusters = native.NClusters,
NoiseCount = native.NoiseCount,
Labels = CopyI32Array(native.Labels, native.NLabels)
};
}
finally
{
Native.insight_free_i32_array(native.Labels, native.NLabels);
}
}
public HierarchicalResult Hierarchical(double[,] data, uint linkage, uint nClusters)
{
var (nRows, nCols, flat) = Flatten(data);
var native = new NativeStructs.CHierarchicalResult();
unsafe
{
fixed (double* ptr = flat)
{
Native.ThrowIfFailed(
Native.insight_hierarchical(ptr, nRows, nCols, linkage, nClusters, ref native));
}
}
try
{
return new HierarchicalResult
{
NClusters = native.NClusters,
Labels = CopyU32Array(native.Labels, native.NLabels),
MergeDistances = CopyF64Array(native.MergeDistances, native.NMerges),
MergeSizes = CopyU32Array(native.MergeSizes, native.NMerges)
};
}
finally
{
Native.insight_free_labels(native.Labels, native.NLabels);
Native.insight_free_f64_array(native.MergeDistances, native.NMerges);
Native.insight_free_labels(native.MergeSizes, native.NMerges);
}
}
public HdbscanResult Hdbscan(double[,] data, uint minClusterSize, uint minSamples)
{
var (nRows, nCols, flat) = Flatten(data);
var native = new NativeStructs.CHdbscanResult();
unsafe
{
fixed (double* ptr = flat)
{
Native.ThrowIfFailed(
Native.insight_hdbscan(ptr, nRows, nCols, minClusterSize, minSamples, ref native));
}
}
try
{
return new HdbscanResult
{
NClusters = native.NClusters,
NoiseCount = native.NoiseCount,
Labels = CopyI32Array(native.Labels, native.NLabels),
Probabilities = CopyF64Array(native.Probabilities, native.NLabels)
};
}
finally
{
Native.insight_free_i32_array(native.Labels, native.NLabels);
Native.insight_free_f64_array(native.Probabilities, native.NLabels);
}
}
public GapStatResult GapStatistic(double[,] data, uint kMin, uint kMax, uint nRefs = 10, ulong seed = 42)
{
var (nRows, nCols, flat) = Flatten(data);
var native = new NativeStructs.CGapStatResult();
unsafe
{
fixed (double* ptr = flat)
{
Native.ThrowIfFailed(
Native.insight_gap_statistic(ptr, nRows, nCols, kMin, kMax, nRefs, seed, ref native));
}
}
try
{
return new GapStatResult
{
BestK = native.BestK,
GapValues = CopyF64Array(native.GapValues, native.NValues),
StdErrors = CopyF64Array(native.StdErrors, native.NValues)
};
}
finally
{
Native.insight_free_f64_array(native.GapValues, native.NValues);
Native.insight_free_f64_array(native.StdErrors, native.NValues);
}
}
#endregion
#region PCA
public PcaResult Pca(double[,] data, uint nComponents, bool autoScale = true)
{
var (nRows, nCols, flat) = Flatten(data);
var native = new NativeStructs.CPcaResult();
unsafe
{
fixed (double* ptr = flat)
{
Native.ThrowIfFailed(
Native.insight_pca(ptr, nRows, nCols, nComponents, autoScale ? 1 : 0, ref native));
}
}
try
{
return new PcaResult
{
NComponents = native.NComponents,
ExplainedVariance = CopyF64Array(native.ExplainedVariance, native.NVariance)
};
}
finally
{
Native.insight_free_f64_array(native.ExplainedVariance, native.NVariance);
}
}
#endregion
#region Anomaly Detection
public AnomalyResult IsolationForest(double[,] data, uint nEstimators = 100, double contamination = 0.1, ulong seed = 42)
{
var (nRows, nCols, flat) = Flatten(data);
var native = new NativeStructs.CAnomalyResult();
unsafe
{
fixed (double* ptr = flat)
{
Native.ThrowIfFailed(
Native.insight_isolation_forest(ptr, nRows, nCols, nEstimators, contamination, seed, ref native));
}
}
try
{
return new AnomalyResult
{
Scores = CopyF64Array(native.Scores, native.N),
Anomalies = CopyByteArray(native.Anomalies, native.N),
AnomalyCount = native.AnomalyCount,
Threshold = native.Threshold
};
}
finally
{
Native.insight_free_f64_array(native.Scores, native.N);
Native.insight_free_f64_array(native.Anomalies, native.N);
}
}
public AnomalyResult Lof(double[,] data, uint k = 20, double threshold = 1.5)
{
var (nRows, nCols, flat) = Flatten(data);
var native = new NativeStructs.CAnomalyResult();
unsafe
{
fixed (double* ptr = flat)
{
Native.ThrowIfFailed(
Native.insight_lof(ptr, nRows, nCols, k, threshold, ref native));
}
}
try
{
return new AnomalyResult
{
Scores = CopyF64Array(native.Scores, native.N),
Anomalies = CopyByteArray(native.Anomalies, native.N),
AnomalyCount = native.AnomalyCount,
Threshold = native.Threshold
};
}
finally
{
Native.insight_free_f64_array(native.Scores, native.N);
Native.insight_free_f64_array(native.Anomalies, native.N);
}
}
public MahalanobisResult Mahalanobis(double[,] data, double chi2Quantile = 0.975)
{
var (nRows, nCols, flat) = Flatten(data);
var native = new NativeStructs.CMahalanobisResult();
unsafe
{
fixed (double* ptr = flat)
{
Native.ThrowIfFailed(
Native.insight_mahalanobis(ptr, nRows, nCols, chi2Quantile, ref native));
}
}
try
{
return new MahalanobisResult
{
Distances = CopyF64Array(native.Distances, native.N),
Anomalies = CopyByteArray(native.Anomalies, native.N),
Threshold = native.Threshold,
OutlierCount = native.OutlierCount
};
}
finally
{
Native.insight_free_f64_array(native.Distances, native.N);
Native.insight_free_f64_array(native.Anomalies, native.N);
}
}
#endregion
#region Statistical Analysis
public CorrelationResult Correlation(double[,] data)
{
var (nRows, nCols, flat) = Flatten(data);
var native = new NativeStructs.CCorrelationResult();
unsafe
{
fixed (double* ptr = flat)
{
Native.ThrowIfFailed(
Native.insight_correlation(ptr, nRows, nCols, ref native));
}
}
try
{
var matrixSize = native.NVars * native.NVars;
var flatMatrix = CopyF64Array(native.Matrix, matrixSize);
var matrix = new double[native.NVars, native.NVars];
for (uint i = 0; i < native.NVars; i++)
for (uint j = 0; j < native.NVars; j++)
matrix[i, j] = flatMatrix[i * native.NVars + j];
return new CorrelationResult
{
NVars = native.NVars,
Matrix = matrix,
NHighPairs = native.NHighPairs
};
}
finally
{
Native.insight_free_f64_array(native.Matrix, native.NVars * native.NVars);
}
}
public RegressionResult Regression(double[] x, double[] y)
{
if (x.Length != y.Length)
throw new ArgumentException("x and y must have the same length");
var native = new NativeStructs.CRegressionResult();
unsafe
{
fixed (double* xPtr = x)
fixed (double* yPtr = y)
{
Native.ThrowIfFailed(
Native.insight_regression(xPtr, yPtr, (uint)x.Length, ref native));
}
}
return new RegressionResult
{
Intercept = native.Intercept,
Slope = native.Slope,
RSquared = native.RSquared,
AdjRSquared = native.AdjRSquared,
FPValue = native.FPValue
};
}
public CramersVResult CramersV(double[,] table)
{
var (nRows, nCols, flat) = Flatten(table);
var native = new NativeStructs.CCramersVResult();
unsafe
{
fixed (double* ptr = flat)
{
Native.ThrowIfFailed(
Native.insight_cramers_v(ptr, nRows, nCols, ref native));
}
}
return new CramersVResult
{
V = native.V,
ChiSquared = native.ChiSquared,
PValue = native.PValue
};
}
#endregion
#region Distribution
public DistributionResult Distribution(double[] data, double significanceLevel = 0.05)
{
var native = new NativeStructs.CDistributionResult();
unsafe
{
fixed (double* ptr = data)
{
Native.ThrowIfFailed(
Native.insight_distribution(ptr, (uint)data.Length, significanceLevel, ref native));
}
}
return new DistributionResult
{
N = native.N,
KsStatistic = native.KsStatistic,
KsPValue = native.KsPValue,
JbStatistic = native.JbStatistic,
JbPValue = native.JbPValue,
SwStatistic = native.SwStatistic,
SwPValue = native.SwPValue,
AdStatistic = native.AdStatistic,
AdPValue = native.AdPValue,
IsNormal = native.IsNormal != 0
};
}
#endregion
#region Feature Importance
public FeatureImportanceResult FeatureImportance(double[,] data)
{
var (nRows, nCols, flat) = Flatten(data);
var native = new NativeStructs.CFeatureImportanceResult();
unsafe
{
fixed (double* ptr = flat)
{
Native.ThrowIfFailed(
Native.insight_feature_importance(ptr, nRows, nCols, ref native));
}
}
try
{
return new FeatureImportanceResult
{
Scores = CopyF64Array(native.Scores, native.NScores),
ConditionNumber = native.ConditionNumber,
NLowVariance = native.NLowVariance,
NHighCorrPairs = native.NHighCorrPairs
};
}
finally
{
Native.insight_free_f64_array(native.Scores, native.NScores);
}
}
public AnovaSelectionResult AnovaSelect(double[,] data, uint[] target, double significanceLevel = 0.05)
{
var (nRows, nCols, flat) = Flatten(data);
var native = new NativeStructs.CAnovaSelectionResult();
unsafe
{
fixed (double* ptr = flat)
fixed (uint* tgt = target)
{
Native.ThrowIfFailed(
Native.insight_anova_select(ptr, nRows, nCols, tgt, significanceLevel, ref native));
}
}
try
{
var features = new AnovaFeature[native.NFeatures];
for (uint i = 0; i < native.NFeatures; i++)
{
var f = Marshal.PtrToStructure<NativeStructs.CAnovaFeature>(
native.Features + (int)i * Marshal.SizeOf<NativeStructs.CAnovaFeature>());
features[i] = new AnovaFeature { Index = f.Index, FStatistic = f.FStatistic, PValue = f.PValue };
}
return new AnovaSelectionResult
{
Features = features,
NSelected = native.NSelected
};
}
finally
{
Native.insight_free_anova_features(native.Features, native.NFeatures);
}
}
public MutualInfoResult MutualInfo(double[,] data, uint[] target, uint nBins = 10)
{
var (nRows, nCols, flat) = Flatten(data);
var native = new NativeStructs.CMutualInfoResult();
unsafe
{
fixed (double* ptr = flat)
fixed (uint* tgt = target)
{
Native.ThrowIfFailed(
Native.insight_mutual_info(ptr, nRows, nCols, tgt, nBins, ref native));
}
}
try
{
var features = new MutualInfoFeature[native.NFeatures];
for (uint i = 0; i < native.NFeatures; i++)
{
var f = Marshal.PtrToStructure<NativeStructs.CMutualInfoFeature>(
native.Features + (int)i * Marshal.SizeOf<NativeStructs.CMutualInfoFeature>());
features[i] = new MutualInfoFeature { Index = f.Index, Mi = f.Mi };
}
return new MutualInfoResult { Features = features };
}
finally
{
Native.insight_free_mi_features(native.Features, native.NFeatures);
}
}
public PermImportanceResult PermutationImportance(double[,] data, double[] target, uint nRepeats = 5, ulong seed = 42)
{
var (nRows, nCols, flat) = Flatten(data);
var native = new NativeStructs.CPermImportanceResult();
unsafe
{
fixed (double* ptr = flat)
fixed (double* tgt = target)
{
Native.ThrowIfFailed(
Native.insight_permutation_importance(ptr, nRows, nCols, tgt, nRepeats, seed, ref native));
}
}
try
{
var features = new PermImportanceFeature[native.NFeatures];
for (uint i = 0; i < native.NFeatures; i++)
{
var f = Marshal.PtrToStructure<NativeStructs.CPermImportanceFeature>(
native.Features + (int)i * Marshal.SizeOf<NativeStructs.CPermImportanceFeature>());
features[i] = new PermImportanceFeature { Index = f.Index, Importance = f.Importance, StdDev = f.StdDev };
}
return new PermImportanceResult
{
BaselineScore = native.BaselineScore,
Features = features
};
}
finally
{
Native.insight_free_perm_features(native.Features, native.NFeatures);
}
}
#endregion
#region Changepoint Detection
public PeltResult Pelt(double[] data, uint cost = 0, double penalty = 0.0, uint minSegmentLen = 2)
{
var native = new NativeStructs.CPeltResult();
unsafe
{
fixed (double* ptr = data)
{
Native.ThrowIfFailed(
Native.insight_pelt(ptr, (uint)data.Length, cost, penalty, minSegmentLen, ref native));
}
}
try
{
var changepoints = CopyU32Array(native.Changepoints, native.NChangepoints);
return new PeltResult
{
Changepoints = changepoints,
NSegments = native.NChangepoints + 1
};
}
finally
{
Native.insight_free_pelt_result(ref native);
}
}
public PeltResult PeltMulti(double[][] signals, uint cost = 0, double penalty = 0.0, uint minSegmentLen = 2)
{
if (signals.Length == 0)
throw new ArgumentException("signals must not be empty", nameof(signals));
var nChannels = (uint)signals.Length;
var nPoints = (uint)signals[0].Length;
if (signals.Any(s => s.Length != (int)nPoints))
throw new ArgumentException("All signals must have the same length", nameof(signals));
var flat = new double[nChannels * nPoints];
for (int ch = 0; ch < signals.Length; ch++)
Array.Copy(signals[ch], 0, flat, ch * (int)nPoints, (int)nPoints);
var native = new NativeStructs.CPeltResult();
unsafe
{
fixed (double* ptr = flat)
{
Native.ThrowIfFailed(
Native.insight_pelt_multi(ptr, nChannels, nPoints, cost, penalty, minSegmentLen, ref native));
}
}
try
{
var changepoints = CopyU32Array(native.Changepoints, native.NChangepoints);
return new PeltResult
{
Changepoints = changepoints,
NSegments = native.NChangepoints + 1
};
}
finally
{
Native.insight_free_pelt_result(ref native);
}
}
#endregion
#region Helpers
private static (uint nRows, uint nCols, double[] flat) Flatten(double[,] data)
{
var nRows = (uint)data.GetLength(0);
var nCols = (uint)data.GetLength(1);
var flat = new double[nRows * nCols];
Buffer.BlockCopy(data, 0, flat, 0, flat.Length * sizeof(double));
return (nRows, nCols, flat);
}
private static uint[] CopyU32Array(IntPtr ptr, uint count)
{
var result = new uint[count];
if (count > 0 && ptr != IntPtr.Zero)
Marshal.Copy(ptr, (int[])(object)result, 0, (int)count);
return result;
}
private static int[] CopyI32Array(IntPtr ptr, uint count)
{
var result = new int[count];
if (count > 0 && ptr != IntPtr.Zero)
Marshal.Copy(ptr, result, 0, (int)count);
return result;
}
private static double[] CopyF64Array(IntPtr ptr, uint count)
{
var result = new double[count];
if (count > 0 && ptr != IntPtr.Zero)
Marshal.Copy(ptr, result, 0, (int)count);
return result;
}
private static byte[] CopyByteArray(IntPtr ptr, uint count)
{
var result = new byte[count];
if (count > 0 && ptr != IntPtr.Zero)
Marshal.Copy(ptr, result, 0, (int)count);
return result;
}
#endregion
#region IDisposable
public void Dispose()
{
if (!_disposed)
{
Native.insight_clear_error();
_disposed = true;
}
}
#endregion
}
#region Result Models
public class ProfileResult
{
public long RowCount { get; init; }
public long ColumnCount { get; init; }
public ColumnSummary[] Columns { get; init; } = [];
}
public class ColumnSummary
{
public uint Index { get; init; }
public ulong ValidCount { get; init; }
public ulong NullCount { get; init; }
public InsightDataType DataType { get; init; }
public double Mean { get; init; }
public double StdDev { get; init; }
public double Min { get; init; }
public double Max { get; init; }
}
public class KMeansResult
{
public uint K { get; init; }
public double Wcss { get; init; }
public uint Iterations { get; init; }
public uint[] Labels { get; init; } = [];
}
public class DbscanResult
{
public uint NClusters { get; init; }
public uint NoiseCount { get; init; }
public int[] Labels { get; init; } = [];
}
public class HierarchicalResult
{
public uint NClusters { get; init; }
public uint[] Labels { get; init; } = [];
public double[] MergeDistances { get; init; } = [];
public uint[] MergeSizes { get; init; } = [];
}
public class HdbscanResult
{
public uint NClusters { get; init; }
public uint NoiseCount { get; init; }
public int[] Labels { get; init; } = [];
public double[] Probabilities { get; init; } = [];
}
public class GapStatResult
{
public uint BestK { get; init; }
public double[] GapValues { get; init; } = [];
public double[] StdErrors { get; init; } = [];
}
public class PcaResult
{
public uint NComponents { get; init; }
public double[] ExplainedVariance { get; init; } = [];
}
public class AnomalyResult
{
public double[] Scores { get; init; } = [];
public byte[] Anomalies { get; init; } = [];
public uint AnomalyCount { get; init; }
public double Threshold { get; init; }
}
public class MahalanobisResult
{
public double[] Distances { get; init; } = [];
public byte[] Anomalies { get; init; } = [];
public double Threshold { get; init; }
public uint OutlierCount { get; init; }
}
public class CorrelationResult
{
public uint NVars { get; init; }
public double[,] Matrix { get; init; } = new double[0, 0];
public uint NHighPairs { get; init; }
}
public class RegressionResult
{
public double Intercept { get; init; }
public double Slope { get; init; }
public double RSquared { get; init; }
public double AdjRSquared { get; init; }
public double FPValue { get; init; }
}
public class CramersVResult
{
public double V { get; init; }
public double ChiSquared { get; init; }
public double PValue { get; init; }
}
public class DistributionResult
{
public uint N { get; init; }
public double KsStatistic { get; init; }
public double KsPValue { get; init; }
public double JbStatistic { get; init; }
public double JbPValue { get; init; }
public double SwStatistic { get; init; }
public double SwPValue { get; init; }
public double AdStatistic { get; init; }
public double AdPValue { get; init; }
public bool IsNormal { get; init; }
}
public class FeatureImportanceResult
{
public double[] Scores { get; init; } = [];
public double ConditionNumber { get; init; }
public uint NLowVariance { get; init; }
public uint NHighCorrPairs { get; init; }
}
public class AnovaFeature
{
public uint Index { get; init; }
public double FStatistic { get; init; }
public double PValue { get; init; }
}
public class AnovaSelectionResult
{
public AnovaFeature[] Features { get; init; } = [];
public uint NSelected { get; init; }
}
public class MutualInfoFeature
{
public uint Index { get; init; }
public double Mi { get; init; }
}
public class MutualInfoResult
{
public MutualInfoFeature[] Features { get; init; } = [];
}
public class PermImportanceFeature
{
public uint Index { get; init; }
public double Importance { get; init; }
public double StdDev { get; init; }
}
public class PermImportanceResult
{
public double BaselineScore { get; init; }
public PermImportanceFeature[] Features { get; init; } = [];
}
public class PeltResult
{
public uint[] Changepoints { get; init; } = [];
public uint NSegments { get; init; }
}
#endregion