quickner/lib.rs
1//! quickner is a library for NER annotation that prodives a CLI and a Python API.
2//! It comes with a default configuration file that can be modified to fit your needs.
3//!
4//! # Batch Annotation
5//!
6//! You can use quickner to annotate a batch of texts.
7//!
8//! Provide a configuration file and a folder containing your texts:
9//! - a csv file containing the **texts** you want to annotate.
10//! - a csv file containing the **entities** you want to annotate.
11//! - a csv file containing the **excludes** you want to exclude from the annotation.
12//!
13//! ## Configuration
14//!
15//! The configuration file is a toml file that contains the following fields:
16//! ```toml
17//! [logging]
18//! level = "info" # level of logging (debug, info, warning, error, fatal)
19//!
20//! [texts]
21//!
22//! [texts.input]
23//! filter = false # if true, only texts in the filter list will be used
24//! path = "texts.csv" # path to the texts file
25//!
26//! [texts.filters]
27//! accept_special_characters = ".,-" # list of special characters to accept in the text (if special_characters is true)
28//! alphanumeric = false # if true, only strictly alphanumeric texts will be used
29//! case_sensitive = false # if true, case sensitive search will be used
30//! max_length = 1024 # maximum length of the text
31//! min_length = 0 # minimum length of the text
32//! numbers = false # if true, texts with numbers will not be used
33//! punctuation = false # if true, texts with punctuation will not be used
34//! special_characters = false # if true, texts with special characters will not be used
35//!
36//! [annotations]
37//! format = "spacy" # format of the output file (jsonl, spaCy, brat, conll)
38//!
39//! [annotations.output]
40//! path = "annotations.jsonl" # path to the output file
41//!
42//! [entities]
43//!
44//! [entities.input]
45//! filter = true # if true, only entities in the filter list will be used
46//! path = "entities.csv" # path to the entities file
47//! save = true # if true, the entities found will be saved in the output file
48//!
49//! [entities.filters]
50//! accept_special_characters = ".-" # list of special characters to accept in the entity (if special_characters is true)
51//! alphanumeric = false # if true, only strictly alphanumeric entities will be used
52//! case_sensitive = false # if true, case sensitive search will be used
53//! max_length = 20 # maximum length of the entity
54//! min_length = 0 # minimum length of the entity
55//! numbers = false # if true, entities with numbers will not be used
56//! punctuation = false # if true, entities with punctuation will not be used
57//! special_characters = true # if true, entities with special characters will not be used
58//!
59//! [entities.excludes]
60//! # path = "excludes.csv" # path to entities to exclude from the search
61//!
62//! ```
63//!
64//! ## Example
65//!
66//! ```no_run
67//! use quickner::models::Quickner;
68//!
69//! let quick = Quickner::new("./config.toml");
70//! let annotations = quick.process(true);
71//! ```
72//!
73//! # Single Annotation
74//!
75//! You can also use quickner to annotate a single text.
76//! This is useful when you want to annotate a single text and then use the annotation in your code.
77//!
78//! ```no_run
79//! use quickner::Document;
80//!
81//! let annotation = Document::from_string("Rust is maintained by Mozilla");
82//! let entities = HashMap::new();
83//! entities.insert("Rust", "Programming Language");
84//! entities.insert("Mozilla", "Organization");
85//! annotation.annotate(entities);
86//! ```
87mod config;
88mod document;
89mod entity;
90mod models;
91mod quickner;
92mod utils;
93
94pub use crate::config::{
95 Annotations, Config, Entities, Excludes, Filters, Format, Input, Logging, Output, Texts,
96};
97pub use crate::document::Document;
98pub use crate::entity::Entity;
99pub use crate::models::SpacyEntity;
100pub use crate::quickner::Quickner;
101pub use crate::utils::hash_string;