pub struct TextInput {
pub texts: Vec<String>,
pub entities: Vec<String>,
}Expand description
Represents the raw text input, as a list of text chunks and a list of entity classes
Fields§
§texts: Vec<String>§entities: Vec<String>Implementations§
Source§impl TextInput
impl TextInput
Sourcepub fn new(texts: Vec<String>, entities: Vec<String>) -> Result<Self>
pub fn new(texts: Vec<String>, entities: Vec<String>) -> Result<Self>
Default constructor that moves the input data given as a vector of the text sequences to be analyzed, and a vector of entity classes.
Sourcepub fn from_str(texts: &[&str], entities: &[&str]) -> Result<Self>
pub fn from_str(texts: &[&str], entities: &[&str]) -> Result<Self>
This constructor will mostly be used to test with plain arrays of static strs.
Examples found in repository?
examples/multilingual.rs (line 39)
34fn main() -> Result<()> {
35 println!("Loading model...");
36 let model = GLiNER::<SpanMode>::new(Parameters::default(), RuntimeParameters::default(), TOKENIZER, MODEL)?;
37
38 println!("Inferencing...");
39 let input = TextInput::from_str(&[TEXT], &LABELS)?;
40 let output = model.inference(input)?;
41
42 println!("Results:\n{output}");
43
44 Ok(check_results(&output))
45}More examples
examples/span_mode.rs (lines 17-30)
7fn main() -> Result<()> {
8
9 println!("Loading model...");
10 let model = GLiNER::<SpanMode>::new(
11 Parameters::default(),
12 RuntimeParameters::default(),
13 "models/gliner_small-v2.1/tokenizer.json",
14 "models/gliner_small-v2.1/onnx/model.onnx",
15 )?;
16
17 let input = TextInput::from_str(
18 &[
19 "I am James Bond",
20 "This is James and I live in Chelsea, London.",
21 "My name is Bond, James Bond.",
22 "I like to drive my Aston Martin.",
23 "The villain in the movie is Auric Goldfinger."
24 ],
25 &[
26 "person",
27 "location",
28 "vehicle",
29 ]
30 )?;
31
32 println!("Inferencing...");
33 let output = model.inference(input)?;
34
35 println!("Results:");
36 for spans in output.spans {
37 for span in spans {
38 println!("{:3} | {:16} | {:10} | {:.1}%", span.sequence(), span.text(), span.class(), span.probability() * 100.0);
39 }
40 }
41
42 Ok(())
43
44}examples/token_mode.rs (lines 17-30)
7fn main() -> Result<()> {
8
9 println!("Loading model...");
10 let model = GLiNER::<TokenMode>::new(
11 Parameters::default(),
12 RuntimeParameters::default(),
13 "models/gliner-multitask-large-v0.5/tokenizer.json",
14 "models/gliner-multitask-large-v0.5/onnx/model.onnx",
15 )?;
16
17 let input = TextInput::from_str(
18 &[
19 "I am James Bond",
20 "This is James and I live in Chelsea, London.",
21 "My name is Bond, James Bond.",
22 "I like to drive my Aston Martin.",
23 "The villain in the movie is Auric Goldfinger."
24 ],
25 &[
26 "person",
27 "location",
28 "vehicle",
29 ]
30 )?;
31
32 println!("Inferencing...");
33 let output = model.inference(input)?;
34
35 println!("Results:");
36 for spans in output.spans {
37 for span in spans {
38 println!("{:3} | {:16} | {:10} | {:.1}%", span.sequence(), span.text(), span.class(), span.probability() * 100.0);
39 }
40 }
41
42 Ok(())
43
44}examples/relation_extraction.rs (lines 39-42)
24fn main() -> Result<()> {
25 // Set model and tokenizer paths
26 const MODEL_PATH: &str = "models/gliner-multitask-large-v0.5/onnx/model.onnx";
27 const TOKENIZER_PATH: &str = "models/gliner-multitask-large-v0.5/tokenizer.json";
28
29 // Use default parameters
30 let params: Parameters = Parameters::default();
31 let runtime_params = RuntimeParameters::default();
32
33 // Define a relation schema.
34 // We declare a "founded" relation which subject has to be a "person" and object has to be a "company"
35 let mut relation_schema = RelationSchema::new();
36 relation_schema.push_with_allowed_labels("founded", &["person"], &["company"]);
37
38 // Sample input text and entity labels
39 let input = TextInput::from_str(
40 &["Bill Gates is an American businessman who co-founded Microsoft."],
41 &["person", "company"],
42 )?;
43
44 // Load the model that will be leveraged for the pipeline below
45 println!("Loading model...");
46 let model = Model::new(MODEL_PATH, runtime_params)?;
47
48 // Relation Extraction needs Named Entity Recognition to be applied first.
49 // Here we combine the two pipelines: one for NER, and one for RE.
50 // For testing purposes we also insert printing functions.
51 let pipeline = composed![
52 TokenPipeline::new(TOKENIZER_PATH)?.to_composable(&model, ¶ms),
53 Print::new(Some("Entities:\n"), None),
54 RelationPipeline::default(TOKENIZER_PATH, &relation_schema)?.to_composable(&model, ¶ms),
55 Print::new(Some("Relations:\n"), None)
56 ];
57
58 // Actually perform inferences using the pipeline defined above
59 pipeline.apply(input)?;
60
61 Ok(())
62}Sourcepub fn new_from_csv<P: AsRef<Path>>(
path: P,
column: usize,
limit: usize,
entities: Vec<String>,
) -> Result<Self>
pub fn new_from_csv<P: AsRef<Path>>( path: P, column: usize, limit: usize, entities: Vec<String>, ) -> Result<Self>
For testing purposes. Panics if the specified column does not exist
Examples found in repository?
examples/benchmark_gpu.rs (line 33)
21fn main() -> Result<()> {
22
23 const MAX_SAMPLES: usize = 1000;
24 const CSV_PATH: &str = "data/nuner-sample-1k.csv";
25
26 let entities = [
27 "person",
28 "location",
29 "vehicle",
30 ];
31
32 println!("Loading data...");
33 let input = TextInput::new_from_csv(CSV_PATH, 0, MAX_SAMPLES, entities.map(|x| x.to_string()).to_vec())?;
34 let nb_samples = input.texts.len();
35
36 println!("Loading model...");
37 let model = GLiNER::<TokenMode>::new(
38 Parameters::default(),
39 RuntimeParameters::default().with_execution_providers([
40 CUDAExecutionProvider::default().build(),
41 CoreMLExecutionProvider::default().build(),
42 ]),
43 "models/gliner-multitask-large-v0.5/tokenizer.json",
44 "models/gliner-multitask-large-v0.5/onnx/model.onnx",
45 )?;
46
47 println!("Inferencing...");
48 let inference_start = std::time::Instant::now();
49 let _output = model.inference(input)?;
50
51 let inference_time = inference_start.elapsed();
52 println!("Inference took {} seconds on {} samples ({:.2} samples/sec)", inference_time.as_secs(), nb_samples, nb_samples as f32 / inference_time.as_secs() as f32);
53
54 Ok(())
55}More examples
examples/benchmark_cpu.rs (line 23)
15fn main() -> Result<()> {
16 let entities = [
17 "person",
18 "location",
19 "vehicle",
20 ];
21
22 println!("Loading data...");
23 let input = input::text::TextInput::new_from_csv(CSV_PATH, 0, MAX_SAMPLES, entities.map(|x| x.to_string()).to_vec())?;
24 let nb_samples = input.texts.len();
25
26 println!("Loading model...");
27 let model = GLiNER::<TokenMode>::new(
28 Parameters::default(),
29 RuntimeParameters::default().with_threads(THREADS),
30 std::path::Path::new("models/gliner-multitask-large-v0.5/tokenizer.json"),
31 std::path::Path::new("models/gliner-multitask-large-v0.5/onnx/model.onnx")
32 )?;
33
34 let global_inference_start = std::time::Instant::now();
35
36 for i in 0..REPEAT {
37 println!("Inferencing ({})...", i + 1);
38 let inference_start = std::time::Instant::now();
39 let _output = model.inference(input.clone())?;
40
41 let inference_time = inference_start.elapsed();
42 println!("Took {} seconds on {} samples ({:.2} samples/sec)", inference_time.as_secs(), nb_samples, nb_samples as f32 / inference_time.as_secs() as f32);
43
44 #[cfg(feature = "memprof")]
45 print_memory_usage();
46 }
47
48 let global_inference_time = global_inference_start.elapsed();
49 let global_nb_samples = nb_samples * REPEAT;
50 println!("All {} inferences took {} seconds on {} samples total ({:.2} samples/sec)", REPEAT, global_inference_time.as_secs(), global_nb_samples, global_nb_samples as f32 / global_inference_time.as_secs() as f32);
51
52 Ok(())
53}Trait Implementations§
Source§impl<S: Splitter> Composable<TextInput, TokenizedInput> for RawToTokenized<'_, S>
impl<S: Splitter> Composable<TextInput, TokenizedInput> for RawToTokenized<'_, S>
fn apply(&self, input: TextInput) -> Result<TokenizedInput>
fn compose<T, P>(self, other: T) -> impl Composable<I, P>where
Self: Sized,
T: Composable<O, P>,
Auto Trait Implementations§
impl Freeze for TextInput
impl RefUnwindSafe for TextInput
impl Send for TextInput
impl Sync for TextInput
impl Unpin for TextInput
impl UnwindSafe for TextInput
Blanket Implementations§
Source§impl<T> BorrowMut<T> for Twhere
T: ?Sized,
impl<T> BorrowMut<T> for Twhere
T: ?Sized,
Source§fn borrow_mut(&mut self) -> &mut T
fn borrow_mut(&mut self) -> &mut T
Mutably borrows from an owned value. Read more
Source§impl<T> CloneToUninit for Twhere
T: Clone,
impl<T> CloneToUninit for Twhere
T: Clone,
Source§impl<T> Instrument for T
impl<T> Instrument for T
Source§fn instrument(self, span: Span) -> Instrumented<Self>
fn instrument(self, span: Span) -> Instrumented<Self>
Source§fn in_current_span(self) -> Instrumented<Self>
fn in_current_span(self) -> Instrumented<Self>
Source§impl<T> IntoEither for T
impl<T> IntoEither for T
Source§fn into_either(self, into_left: bool) -> Either<Self, Self>
fn into_either(self, into_left: bool) -> Either<Self, Self>
Converts
self into a Left variant of Either<Self, Self>
if into_left is true.
Converts self into a Right variant of Either<Self, Self>
otherwise. Read moreSource§fn into_either_with<F>(self, into_left: F) -> Either<Self, Self>
fn into_either_with<F>(self, into_left: F) -> Either<Self, Self>
Converts
self into a Left variant of Either<Self, Self>
if into_left(&self) returns true.
Converts self into a Right variant of Either<Self, Self>
otherwise. Read more