1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101
//! quickner is a library for NER annotation that prodives a CLI and a Python API.
//! It comes with a default configuration file that can be modified to fit your needs.
//!
//! # Batch Annotation
//!
//! You can use quickner to annotate a batch of texts.
//!
//! Provide a configuration file and a folder containing your texts:
//! - a csv file containing the **texts** you want to annotate.
//! - a csv file containing the **entities** you want to annotate.
//! - a csv file containing the **excludes** you want to exclude from the annotation.
//!
//! ## Configuration
//!
//! The configuration file is a toml file that contains the following fields:
//! ```toml
//! [logging]
//! level = "info" # level of logging (debug, info, warning, error, fatal)
//!
//! [texts]
//!
//! [texts.input]
//! filter = false # if true, only texts in the filter list will be used
//! path = "texts.csv" # path to the texts file
//!
//! [texts.filters]
//! accept_special_characters = ".,-" # list of special characters to accept in the text (if special_characters is true)
//! alphanumeric = false # if true, only strictly alphanumeric texts will be used
//! case_sensitive = false # if true, case sensitive search will be used
//! max_length = 1024 # maximum length of the text
//! min_length = 0 # minimum length of the text
//! numbers = false # if true, texts with numbers will not be used
//! punctuation = false # if true, texts with punctuation will not be used
//! special_characters = false # if true, texts with special characters will not be used
//!
//! [annotations]
//! format = "spacy" # format of the output file (jsonl, spaCy, brat, conll)
//!
//! [annotations.output]
//! path = "annotations.jsonl" # path to the output file
//!
//! [entities]
//!
//! [entities.input]
//! filter = true # if true, only entities in the filter list will be used
//! path = "entities.csv" # path to the entities file
//! save = true # if true, the entities found will be saved in the output file
//!
//! [entities.filters]
//! accept_special_characters = ".-" # list of special characters to accept in the entity (if special_characters is true)
//! alphanumeric = false # if true, only strictly alphanumeric entities will be used
//! case_sensitive = false # if true, case sensitive search will be used
//! max_length = 20 # maximum length of the entity
//! min_length = 0 # minimum length of the entity
//! numbers = false # if true, entities with numbers will not be used
//! punctuation = false # if true, entities with punctuation will not be used
//! special_characters = true # if true, entities with special characters will not be used
//!
//! [entities.excludes]
//! # path = "excludes.csv" # path to entities to exclude from the search
//!
//! ```
//!
//! ## Example
//!
//! ```no_run
//! use quickner::models::Quickner;
//!
//! let quick = Quickner::new("./config.toml");
//! let annotations = quick.process(true);
//! ```
//!
//! # Single Annotation
//!
//! You can also use quickner to annotate a single text.
//! This is useful when you want to annotate a single text and then use the annotation in your code.
//!
//! ```no_run
//! use quickner::Document;
//!
//! let annotation = Document::from_string("Rust is maintained by Mozilla");
//! let entities = HashMap::new();
//! entities.insert("Rust", "Programming Language");
//! entities.insert("Mozilla", "Organization");
//! annotation.annotate(entities);
//! ```
mod config;
mod document;
mod entity;
mod models;
mod quickner;
mod utils;
pub use crate::config::{
Annotations, Config, Entities, Excludes, Filters, Format, Input, Logging, Output, Texts,
};
pub use crate::document::Document;
pub use crate::entity::Entity;
pub use crate::models::SpacyEntity;
pub use crate::quickner::Quickner;
pub use crate::utils::hash_string;