Enum ExtractionMethod

Source

#[non_exhaustive]pub enum ExtractionMethod {
    Pattern,
    Neural,
    Lexicon,
    SoftLexicon,
    GatedEnsemble,
    Consensus,
    Heuristic,
    Unknown,
    Rule,
    ML,
    Ensemble,
}

Expand description

Extraction method used to identify an entity.

§Research Context

Different extraction methods have different strengths:

Method	Precision	Recall	Generalization	Use Case
Pattern	Very High	Low	N/A (format-based)	Dates, emails, money
Neural	High	High	Good	General NER
Lexicon	Very High	Low	None	Closed-domain entities
SoftLexicon	Medium	High	Good for rare types	Low-resource NER
GatedEnsemble	Highest	Highest	Contextual	Short texts, domain shift

See docs/ for repo-local notes and entry points.

Variants (Non-exhaustive)§

This enum is marked as non-exhaustive

Non-exhaustive enums could have additional variants added in future. Therefore, when matching against variants of non-exhaustive enums, an extra wildcard arm must be added to account for any future variants.

§

Pattern

Regex pattern matching (high precision for structured data like dates, money). Does not generalize - only detects format-based entities.

§

Neural

Neural model inference (BERT, GLiNER, etc.). The recommended default for general NER. Generalizes to unseen entities.

§

Lexicon

👎Deprecated since 0.2.0: Use Neural or GatedEnsemble instead

Exact lexicon/gazetteer lookup (deprecated approach). High precision on known entities, zero recall on novel entities. Only use for closed domains (stock tickers, medical codes).

§

SoftLexicon

Embedding-based soft lexicon matching. Useful for low-resource languages and rare entity types. See: Rijhwani et al. (2020) “Soft Gazetteers for Low-Resource NER”

§

GatedEnsemble

Gated ensemble: neural + lexicon with learned weighting. Model learns when to trust lexicon vs. context. See: Nie et al. (2021) “GEMNET: Effective Gated Gazetteer Representations”

§

Consensus

Multiple methods agreed on this entity (high confidence).

§

Heuristic

Heuristic-based extraction (capitalization, word shape, context). Used by heuristic backends that don’t use neural models.

§

Unknown

Unknown or unspecified extraction method.

§

Rule

👎Deprecated since 0.2.0: Use Heuristic or Pattern instead

Legacy rule-based extraction (for backward compatibility).

§

ML

👎Deprecated since 0.2.0: Use Neural instead

Legacy alias for Neural (for backward compatibility).

§

Ensemble

👎Deprecated since 0.2.0: Use Consensus instead

Legacy alias for Consensus (for backward compatibility).

Enum ExtractionMethod Copy item path

§Research Context

Variants (Non-exhaustive)§

Pattern

Neural

Lexicon

SoftLexicon

GatedEnsemble

Consensus

Heuristic

Unknown

Rule

ML

Ensemble

Implementations§

impl ExtractionMethod

pub const fn is_calibrated(&self) -> bool

§Calibrated Methods

§Uncalibrated Methods

§Example

pub const fn confidence_interpretation(&self) -> &'static str

Trait Implementations§

impl Clone for ExtractionMethod

fn clone(&self) -> ExtractionMethod

fn clone_from(&mut self, source: &Self)

impl Debug for ExtractionMethod

fn fmt(&self, f: &mut Formatter<'_>) -> Result

impl Default for ExtractionMethod

fn default() -> ExtractionMethod

impl<'de> Deserialize<'de> for ExtractionMethod

fn deserialize<__D>(__deserializer: __D) -> Result<Self, __D::Error>where __D: Deserializer<'de>,

impl Display for ExtractionMethod

fn fmt(&self, f: &mut Formatter<'_>) -> Result

impl Hash for ExtractionMethod

fn hash<__H: Hasher>(&self, state: &mut __H)

fn hash_slice<H>(data: &[Self], state: &mut H)where H: Hasher, Self: Sized,

impl PartialEq for ExtractionMethod

fn eq(&self, other: &ExtractionMethod) -> bool

fn ne(&self, other: &Rhs) -> bool

impl Serialize for ExtractionMethod

fn serialize<__S>(&self, __serializer: __S) -> Result<__S::Ok, __S::Error>where __S: Serializer,

impl Copy for ExtractionMethod

impl Eq for ExtractionMethod

impl StructuralPartialEq for ExtractionMethod

Auto Trait Implementations§

impl Freeze for ExtractionMethod

impl RefUnwindSafe for ExtractionMethod

impl Send for ExtractionMethod

impl Sync for ExtractionMethod

impl Unpin for ExtractionMethod

impl UnsafeUnpin for ExtractionMethod

impl UnwindSafe for ExtractionMethod

Blanket Implementations§

impl<T> Any for Twhere T: 'static + ?Sized,

fn type_id(&self) -> TypeId

impl<T> Borrow<T> for Twhere T: ?Sized,

fn borrow(&self) -> &T

impl<T> BorrowMut<T> for Twhere T: ?Sized,

fn borrow_mut(&mut self) -> &mut T

impl<T> CloneToUninit for Twhere T: Clone,

unsafe fn clone_to_uninit(&self, dest: *mut u8)

impl<T> From<T> for T

fn from(t: T) -> T

impl<T, U> Into<U> for Twhere U: From<T>,

fn into(self) -> U

impl<T> ToOwned for Twhere T: Clone,

type Owned = T

fn to_owned(&self) -> T

fn clone_into(&self, target: &mut T)

impl<T> ToString for Twhere T: Display + ?Sized,

fn to_string(&self) -> String

impl<T, U> TryFrom<U> for Twhere U: Into<T>,

type Error = Infallible

fn try_from(value: U) -> Result<T, <T as TryFrom<U>>::Error>

impl<T, U> TryInto<U> for Twhere U: TryFrom<T>,

type Error = <U as TryFrom<T>>::Error

fn try_into(self) -> Result<U, <U as TryFrom<T>>::Error>

impl<V, T> VZip<V> for Twhere V: MultiLane<T>,

fn vzip(self) -> V

impl<T> DeserializeOwned for Twhere T: for<'de> Deserialize<'de>,

Enum ExtractionMethod

fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
where __D: Deserializer<'de>,

fn hash<H: Hasher>(&self, state: &mut H)

fn hash_slice<H>(data: &[Self], state: &mut H)
where H: Hasher, Self: Sized,

fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
where S: Serializer,

impl<T> Any for T
where T: 'static + ?Sized,

impl<T> Borrow<T> for T
where T: ?Sized,

impl<T> BorrowMut<T> for T
where T: ?Sized,

impl<T> CloneToUninit for T
where T: Clone,

impl<T, U> Into<U> for T
where U: From<T>,

impl<T> ToOwned for T
where T: Clone,

impl<T> ToString for T
where T: Display + ?Sized,

impl<T, U> TryFrom<U> for T
where U: Into<T>,

impl<T, U> TryInto<U> for T
where U: TryFrom<T>,

impl<V, T> VZip<V> for T
where V: MultiLane<T>,

impl<T> DeserializeOwned for T
where T: for<'de> Deserialize<'de>,