pub struct TrainingDataExtractor { /* private fields */ }Expand description
Training data extractor
Implementations§
Source§impl TrainingDataExtractor
impl TrainingDataExtractor
Sourcepub fn extract_training_data(
&self,
commits: &[CommitInfo],
_repository_name: &str,
) -> Result<Vec<TrainingExample>>
pub fn extract_training_data( &self, commits: &[CommitInfo], _repository_name: &str, ) -> Result<Vec<TrainingExample>>
Extract training examples from commit history
Filters commits and auto-labels using rule-based classifier.
§Arguments
commits- Raw commit historyrepository_name- Name of the repository
§Returns
Ok(Vec<TrainingExample>)- Labeled training examplesErr- If extraction fails
§Examples
use organizational_intelligence_plugin::training::TrainingDataExtractor;
use organizational_intelligence_plugin::git::CommitInfo;
let extractor = TrainingDataExtractor::new(0.75);
let commits = vec![
CommitInfo {
hash: "abc123".to_string(),
message: "fix: null pointer dereference".to_string(),
author: "dev@example.com".to_string(),
timestamp: 1234567890,
files_changed: 2,
lines_added: 10,
lines_removed: 5,
},
];
let examples = extractor.extract_training_data(&commits, "test-repo").unwrap();
assert_eq!(examples.len(), 1);Sourcepub fn create_splits(
&self,
examples: &[TrainingExample],
repositories: &[String],
) -> Result<TrainingDataset>
pub fn create_splits( &self, examples: &[TrainingExample], repositories: &[String], ) -> Result<TrainingDataset>
Create train/test/validation splits
Uses 70/15/15 split (train/validation/test) as recommended by the spec.
§Arguments
examples- Labeled training examplesrepositories- List of repository names
§Returns
Ok(TrainingDataset)- Dataset with splitsErr- If split fails
§Examples
use organizational_intelligence_plugin::training::TrainingDataExtractor;
use organizational_intelligence_plugin::training::TrainingExample;
use organizational_intelligence_plugin::classifier::DefectCategory;
let extractor = TrainingDataExtractor::new(0.75);
let examples = vec![
TrainingExample {
message: "fix: bug".to_string(),
label: DefectCategory::MemorySafety,
confidence: 0.85,
commit_hash: "abc".to_string(),
author: "dev".to_string(),
timestamp: 123,
lines_added: 5,
lines_removed: 2,
files_changed: 1,
error_code: None,
clippy_lint: None,
has_suggestion: false,
suggestion_applicability: None,
source: organizational_intelligence_plugin::citl::TrainingSource::CommitMessage,
},
];
let dataset = extractor.create_splits(&examples, &["repo1".to_string()]).unwrap();
assert!(dataset.train.len() + dataset.validation.len() + dataset.test.len() == 1);Sourcepub fn get_statistics(&self, examples: &[TrainingExample]) -> String
pub fn get_statistics(&self, examples: &[TrainingExample]) -> String
Trait Implementations§
Auto Trait Implementations§
impl Freeze for TrainingDataExtractor
impl RefUnwindSafe for TrainingDataExtractor
impl Send for TrainingDataExtractor
impl Sync for TrainingDataExtractor
impl Unpin for TrainingDataExtractor
impl UnwindSafe for TrainingDataExtractor
Blanket Implementations§
Source§impl<T> BorrowMut<T> for Twhere
T: ?Sized,
impl<T> BorrowMut<T> for Twhere
T: ?Sized,
Source§fn borrow_mut(&mut self) -> &mut T
fn borrow_mut(&mut self) -> &mut T
Mutably borrows from an owned value. Read more
Source§impl<T> Instrument for T
impl<T> Instrument for T
Source§fn instrument(self, span: Span) -> Instrumented<Self>
fn instrument(self, span: Span) -> Instrumented<Self>
Source§fn in_current_span(self) -> Instrumented<Self>
fn in_current_span(self) -> Instrumented<Self>
Source§impl<T> IntoEither for T
impl<T> IntoEither for T
Source§fn into_either(self, into_left: bool) -> Either<Self, Self>
fn into_either(self, into_left: bool) -> Either<Self, Self>
Converts
self into a Left variant of Either<Self, Self>
if into_left is true.
Converts self into a Right variant of Either<Self, Self>
otherwise. Read moreSource§fn into_either_with<F>(self, into_left: F) -> Either<Self, Self>
fn into_either_with<F>(self, into_left: F) -> Either<Self, Self>
Converts
self into a Left variant of Either<Self, Self>
if into_left(&self) returns true.
Converts self into a Right variant of Either<Self, Self>
otherwise. Read more