pub struct TrainingDataExtractor { /* private fields */ }Expand description
Training data extractor
Implementations§
Source§impl TrainingDataExtractor
impl TrainingDataExtractor
Sourcepub fn extract_training_data(
&self,
commits: &[CommitInfo],
_repository_name: &str,
) -> Result<Vec<TrainingExample>>
pub fn extract_training_data( &self, commits: &[CommitInfo], _repository_name: &str, ) -> Result<Vec<TrainingExample>>
Extract training examples from commit history
Filters commits and auto-labels using rule-based classifier.
§Arguments
commits- Raw commit historyrepository_name- Name of the repository
§Returns
Ok(Vec<TrainingExample>)- Labeled training examplesErr- If extraction fails
§Examples
use organizational_intelligence_plugin::training::TrainingDataExtractor;
use organizational_intelligence_plugin::git::CommitInfo;
let extractor = TrainingDataExtractor::new(0.75);
let commits = vec![
CommitInfo {
hash: "abc123".to_string(),
message: "fix: null pointer dereference".to_string(),
author: "dev@example.com".to_string(),
timestamp: 1234567890,
files_changed: 2,
lines_added: 10,
lines_removed: 5,
},
];
let examples = extractor.extract_training_data(&commits, "test-repo").unwrap();
assert_eq!(examples.len(), 1);Sourcepub fn create_splits(
&self,
examples: &[TrainingExample],
repositories: &[String],
) -> Result<TrainingDataset>
pub fn create_splits( &self, examples: &[TrainingExample], repositories: &[String], ) -> Result<TrainingDataset>
Create train/test/validation splits
Uses 70/15/15 split (train/validation/test) as recommended by the spec.
§Arguments
examples- Labeled training examplesrepositories- List of repository names
§Returns
Ok(TrainingDataset)- Dataset with splitsErr- If split fails
§Examples
use organizational_intelligence_plugin::training::TrainingDataExtractor;
use organizational_intelligence_plugin::training::TrainingExample;
use organizational_intelligence_plugin::classifier::DefectCategory;
let extractor = TrainingDataExtractor::new(0.75);
let examples = vec![
TrainingExample {
message: "fix: bug".to_string(),
label: DefectCategory::MemorySafety,
confidence: 0.85,
commit_hash: "abc".to_string(),
author: "dev".to_string(),
timestamp: 123,
lines_added: 5,
lines_removed: 2,
files_changed: 1,
error_code: None,
clippy_lint: None,
has_suggestion: false,
suggestion_applicability: None,
source: organizational_intelligence_plugin::citl::TrainingSource::CommitMessage,
},
];
let dataset = extractor.create_splits(&examples, &["repo1".to_string()]).unwrap();
assert!(dataset.train.len() + dataset.validation.len() + dataset.test.len() == 1);Sourcepub fn get_statistics(&self, examples: &[TrainingExample]) -> String
pub fn get_statistics(&self, examples: &[TrainingExample]) -> String
Trait Implementations§
Auto Trait Implementations§
impl Freeze for TrainingDataExtractor
impl RefUnwindSafe for TrainingDataExtractor
impl Send for TrainingDataExtractor
impl Sync for TrainingDataExtractor
impl Unpin for TrainingDataExtractor
impl UnwindSafe for TrainingDataExtractor
Blanket Implementations§
Source§impl<T> BorrowMut<T> for Twhere
T: ?Sized,
impl<T> BorrowMut<T> for Twhere
T: ?Sized,
Source§fn borrow_mut(&mut self) -> &mut T
fn borrow_mut(&mut self) -> &mut T
Mutably borrows from an owned value. Read more
Source§impl<T> Instrument for T
impl<T> Instrument for T
Source§fn instrument(self, span: Span) -> Instrumented<Self>
fn instrument(self, span: Span) -> Instrumented<Self>
Source§fn in_current_span(self) -> Instrumented<Self>
fn in_current_span(self) -> Instrumented<Self>
Source§impl<T> IntoEither for T
impl<T> IntoEither for T
Source§fn into_either(self, into_left: bool) -> Either<Self, Self>
fn into_either(self, into_left: bool) -> Either<Self, Self>
Converts
self into a Left variant of Either<Self, Self>
if into_left is true.
Converts self into a Right variant of Either<Self, Self>
otherwise. Read moreSource§fn into_either_with<F>(self, into_left: F) -> Either<Self, Self>
fn into_either_with<F>(self, into_left: F) -> Either<Self, Self>
Converts
self into a Left variant of Either<Self, Self>
if into_left(&self) returns true.
Converts self into a Right variant of Either<Self, Self>
otherwise. Read moreSource§impl<T> Pointable for T
impl<T> Pointable for T
Source§impl<T> PolicyExt for Twhere
T: ?Sized,
impl<T> PolicyExt for Twhere
T: ?Sized,
Source§impl<SS, SP> SupersetOf<SS> for SPwhere
SS: SubsetOf<SP>,
impl<SS, SP> SupersetOf<SS> for SPwhere
SS: SubsetOf<SP>,
Source§fn to_subset(&self) -> Option<SS>
fn to_subset(&self) -> Option<SS>
The inverse inclusion map: attempts to construct
self from the equivalent element of its
superset. Read moreSource§fn is_in_subset(&self) -> bool
fn is_in_subset(&self) -> bool
Checks if
self is actually part of its subset T (and can be converted to it).Source§fn to_subset_unchecked(&self) -> SS
fn to_subset_unchecked(&self) -> SS
Use with care! Same as
self.to_subset but without any property checks. Always succeeds.Source§fn from_subset(element: &SS) -> SP
fn from_subset(element: &SS) -> SP
The inclusion map: converts
self to the equivalent element of its superset.