CodePoints

Struct CodePoints 

Source
pub struct CodePoints { /* private fields */ }
Expand description

Represents a collection of Unicode code points.

This struct provides functionality for checking if strings contain only the specified code points, and for performing set operations on code point collections.

§Examples

use japanese_codepoints::CodePoints;

let cp = CodePoints::new(vec![0x3042, 0x3044]); // あ, い
assert!(cp.contains("あ"));
assert!(cp.contains("い"));
assert!(!cp.contains("う"));

Implementations§

Source§

impl CodePoints

Source

pub fn new(codepoints: Vec<u32>) -> Self

Creates a new CodePoints instance from a vector of code points.

§Arguments
  • codepoints - A vector of Unicode code points (u32)
§Examples
use japanese_codepoints::CodePoints;
let cp = CodePoints::new(vec![0x3042, 0x3044]); // あ, い
assert!(cp.contains("あ"));
Source

pub fn from_string(s: &str) -> Self

Creates a new CodePoints instance from a string.

This method extracts all unique code points from the given string.

§Arguments
  • s - A string containing the code points
§Examples
use japanese_codepoints::CodePoints;

let cp = CodePoints::from_string("あい");
assert!(cp.contains("あ"));
assert!(cp.contains("い"));
Source

pub fn contains(&self, s: &str) -> bool

Checks if the given string contains only code points from this collection.

§Arguments
  • s - The string to check
§Returns

true if all characters in the string are in this code point collection, false otherwise.

§Examples
use japanese_codepoints::CodePoints;

let cp = CodePoints::new(vec![0x3042, 0x3044]); // あ, い
assert!(cp.contains("あ"));
assert!(cp.contains("あい"));
assert!(!cp.contains("あいう"));
Source

pub fn first_excluded_with_position(&self, s: &str) -> Option<(u32, usize)>

Returns the first code point in the string that is not in this collection, along with its character index.

§Arguments
  • s - The string to check
§Returns

Some((code_point, char_index)) if a disallowed character is found, where char_index is the index of the character (not byte index) in the string. Returns None if all characters are allowed.

§Note

The returned index is the character index (as in .chars().enumerate()), not the byte index.

§Examples
use japanese_codepoints::CodePoints;
let cp = CodePoints::new(vec![0x3042, 0x3044]); // あ, い
assert_eq!(cp.first_excluded_with_position("あい"), None);
assert_eq!(cp.first_excluded_with_position("あいう"), Some((0x3046, 2))); // う at char index 2
Source

pub fn first_excluded(&self, s: &str) -> Option<u32>

Returns the first code point in the string that is not in this collection.

§Arguments
  • s - The string to check
§Returns

Some(code_point) if a disallowed character is found, None otherwise.

§Examples
use japanese_codepoints::CodePoints;
let cp = CodePoints::new(vec![0x3042, 0x3044]); // あ, い
assert_eq!(cp.first_excluded("あいう"), Some(0x3046)); // う
assert_eq!(cp.first_excluded("あい"), None);
Source

pub fn all_excluded(&self, s: &str) -> Vec<u32>

Returns all unique code points in the string that are not in this collection.

§Arguments
  • s - The string to check
§Returns

A vector of unique excluded code points (no duplicates, order not guaranteed).

§Examples
use japanese_codepoints::CodePoints;
let cp = CodePoints::new(vec![0x3042, 0x3044]); // あ, い
let excluded = cp.all_excluded("あいうえ");
assert_eq!(excluded, vec![0x3046, 0x3048]); // う, え
Source

pub fn union(&self, other: &CodePoints) -> CodePoints

Returns the union of this code point collection with another.

§Arguments
  • other - Another CodePoints instance
§Returns

A new CodePoints instance containing all code points from both collections.

§Examples
use japanese_codepoints::CodePoints;

let cp1 = CodePoints::new(vec![0x3042, 0x3044]); // あ, い
let cp2 = CodePoints::new(vec![0x3044, 0x3046]); // い, う
let union = cp1.union(&cp2);
assert!(union.contains("あいう"));
Source

pub fn intersection(&self, other: &CodePoints) -> CodePoints

Returns the intersection of this code point collection with another.

§Arguments
  • other - Another CodePoints instance
§Returns

A new CodePoints instance containing only code points present in both collections.

§Examples
use japanese_codepoints::CodePoints;

let cp1 = CodePoints::new(vec![0x3042, 0x3044]); // あ, い
let cp2 = CodePoints::new(vec![0x3044, 0x3046]); // い, う
let intersection = cp1.intersection(&cp2);
assert!(intersection.contains("い"));
assert!(!intersection.contains("あ"));
assert!(!intersection.contains("う"));
Source

pub fn difference(&self, other: &CodePoints) -> CodePoints

Returns the difference of this code point collection with another.

§Arguments
  • other - Another CodePoints instance
§Returns

A new CodePoints instance containing code points in this collection but not in the other.

§Examples
use japanese_codepoints::CodePoints;

let cp1 = CodePoints::new(vec![0x3042, 0x3044]); // あ, い
let cp2 = CodePoints::new(vec![0x3044, 0x3046]); // い, う
let difference = cp1.difference(&cp2);
assert!(difference.contains("あ"));
assert!(!difference.contains("い"));
Source

pub fn len(&self) -> usize

Returns the number of code points in this collection.

§Examples
use japanese_codepoints::CodePoints;

let cp = CodePoints::new(vec![0x3042, 0x3044, 0x3046]); // あ, い, う
assert_eq!(cp.len(), 3);
Source

pub fn is_empty(&self) -> bool

Returns true if this collection contains no code points.

§Examples
use japanese_codepoints::CodePoints;

let cp = CodePoints::new(vec![]);
assert!(cp.is_empty());
Source

pub fn iter(&self) -> Iter<'_, u32>

Returns an iterator over the code points in this collection.

§Examples
use japanese_codepoints::CodePoints;

let cp = CodePoints::new(vec![0x3042, 0x3044]); // あ, い
let mut iter = cp.iter();
let first = iter.next();
let second = iter.next();
assert_eq!(iter.next(), None);
assert!(first.is_some());
assert!(second.is_some());
Source

pub fn ascii_control() -> Self

Creates a new CodePoints instance with ASCII control characters.

§Examples
use japanese_codepoints::CodePoints;

let cp = CodePoints::ascii_control();
assert!(cp.contains("\n"));
assert!(cp.contains("\r"));
assert!(!cp.contains("a"));
Source

pub fn ascii_control_cached() -> &'static CodePoints

Returns a cached instance of ASCII control characters CodePoints.

This method uses static caching to avoid repeated allocation. Subsequent calls return the same cached instance.

§Examples
use japanese_codepoints::CodePoints;

let cp1 = CodePoints::ascii_control_cached();
let cp2 = CodePoints::ascii_control_cached();
// Both instances share the same underlying data
Source

pub fn ascii_printable() -> Self

Creates a new CodePoints instance with ASCII printable characters.

§Examples
use japanese_codepoints::CodePoints;

let cp = CodePoints::ascii_printable();
assert!(cp.contains("Hello"));
assert!(cp.contains("123"));
assert!(!cp.contains("あ"));
Source

pub fn ascii_printable_cached() -> &'static CodePoints

Returns a cached instance of ASCII printable characters CodePoints.

This method uses static caching to avoid repeated allocation. Subsequent calls return the same cached instance.

§Examples
use japanese_codepoints::CodePoints;

let cp1 = CodePoints::ascii_printable_cached();
let cp2 = CodePoints::ascii_printable_cached();
// Both instances share the same underlying data
Source

pub fn crlf() -> Self

Creates a new CodePoints instance with CRLF characters.

§Examples
use japanese_codepoints::CodePoints;

let cp = CodePoints::crlf();
assert!(cp.contains("\n"));
assert!(cp.contains("\r"));
assert!(!cp.contains("a"));
Source

pub fn crlf_cached() -> &'static CodePoints

Returns a cached instance of CRLF characters CodePoints.

This method uses static caching to avoid repeated allocation. Subsequent calls return the same cached instance.

§Examples
use japanese_codepoints::CodePoints;

let cp1 = CodePoints::crlf_cached();
let cp2 = CodePoints::crlf_cached();
// Both instances share the same underlying data
Source

pub fn ascii_all() -> Self

Creates a new CodePoints instance with all ASCII characters.

§Examples
use japanese_codepoints::CodePoints;

let cp = CodePoints::ascii_all();
assert!(cp.contains("Hello"));
assert!(cp.contains("\n"));
assert!(!cp.contains("あ"));
Source

pub fn ascii_all_cached() -> &'static CodePoints

Returns a cached instance of all ASCII characters CodePoints.

This method uses static caching to avoid repeated allocation. Subsequent calls return the same cached instance.

§Examples
use japanese_codepoints::CodePoints;

let cp1 = CodePoints::ascii_all_cached();
let cp2 = CodePoints::ascii_all_cached();
// Both instances share the same underlying data
Source

pub fn is_subset_of(&self, other: &CodePoints) -> bool

Returns true if this collection is a subset of another CodePoints collection.

§Arguments
  • other - Another CodePoints instance
§Returns

true if all code points in this collection are also in other.

§Examples
use japanese_codepoints::CodePoints;
let cp1 = CodePoints::new(vec![0x3042, 0x3044]); // あ, い
let cp2 = CodePoints::new(vec![0x3042, 0x3044, 0x3046]); // あ, い, う
assert!(cp1.is_subset_of(&cp2));
Source

pub fn is_superset_of(&self, other: &CodePoints) -> bool

Returns true if this collection is a superset of another CodePoints collection.

§Arguments
  • other - Another CodePoints instance
§Returns

true if all code points in other are also in this collection.

§Examples
use japanese_codepoints::CodePoints;
let cp1 = CodePoints::new(vec![0x3042, 0x3044, 0x3046]); // あ, い, う
let cp2 = CodePoints::new(vec![0x3042, 0x3044]); // あ, い
assert!(cp1.is_superset_of(&cp2));
Source

pub fn symmetric_difference(&self, other: &CodePoints) -> CodePoints

Returns the symmetric difference of this code point collection with another.

§Arguments
  • other - Another CodePoints instance
§Returns

A new CodePoints instance containing code points that are in either collection but not in both.

§Examples
use japanese_codepoints::CodePoints;
let cp1 = CodePoints::new(vec![0x3042, 0x3044]); // あ, い
let cp2 = CodePoints::new(vec![0x3044, 0x3046]); // い, う
let diff = cp1.symmetric_difference(&cp2);
assert!(diff.contains("あ"));
assert!(diff.contains("う"));
assert!(!diff.contains("い"));
Source

pub fn contains_all_in_any(s: &str, codepoints_list: &[CodePoints]) -> bool

Checks if the given string contains only code points that are valid in ANY of the provided code point collections.

This is equivalent to the Java method containsAllInAnyCodePoints. Returns true if all characters in the string are included in at least one of the code point collections.

§Arguments
  • s - The string to check
  • codepoints_list - A slice of CodePoints instances to check against
§Returns

true if all code points in the given string are included in any of the code points list, false otherwise.

§Examples
use japanese_codepoints::CodePoints;

let hiragana = CodePoints::new(vec![0x3042, 0x3044]); // あ, い
let katakana = CodePoints::new(vec![0x30A2, 0x30A4]); // ア, イ
let mixed_text = "あア"; // Contains both hiragana and katakana

// Each character is valid in at least one collection
assert!(CodePoints::contains_all_in_any("あア", &[hiragana, katakana]));

Trait Implementations§

Source§

impl Clone for CodePoints

Source§

fn clone(&self) -> CodePoints

Returns a duplicate of the value. Read more
1.0.0 · Source§

fn clone_from(&mut self, source: &Self)

Performs copy-assignment from source. Read more
Source§

impl Debug for CodePoints

Source§

fn fmt(&self, f: &mut Formatter<'_>) -> Result

Formats the value using the given formatter. Read more
Source§

impl Display for CodePoints

Source§

fn fmt(&self, f: &mut Formatter<'_>) -> Result

Formats the value using the given formatter. Read more
Source§

impl From<&str> for CodePoints

Source§

fn from(s: &str) -> Self

Converts to this type from the input type.
Source§

impl From<Vec<u32>> for CodePoints

Source§

fn from(codepoints: Vec<u32>) -> Self

Converts to this type from the input type.
Source§

impl Hash for CodePoints

Source§

fn hash<H: Hasher>(&self, state: &mut H)

Feeds this value into the given Hasher. Read more
1.3.0 · Source§

fn hash_slice<H>(data: &[Self], state: &mut H)
where H: Hasher, Self: Sized,

Feeds a slice of this type into the given Hasher. Read more
Source§

impl PartialEq for CodePoints

Source§

fn eq(&self, other: &CodePoints) -> bool

Tests for self and other values to be equal, and is used by ==.
1.0.0 · Source§

fn ne(&self, other: &Rhs) -> bool

Tests for !=. The default implementation is almost always sufficient, and should not be overridden without very good reason.
Source§

impl Eq for CodePoints

Source§

impl StructuralPartialEq for CodePoints

Auto Trait Implementations§

Blanket Implementations§

Source§

impl<T> Any for T
where T: 'static + ?Sized,

Source§

fn type_id(&self) -> TypeId

Gets the TypeId of self. Read more
Source§

impl<T> Borrow<T> for T
where T: ?Sized,

Source§

fn borrow(&self) -> &T

Immutably borrows from an owned value. Read more
Source§

impl<T> BorrowMut<T> for T
where T: ?Sized,

Source§

fn borrow_mut(&mut self) -> &mut T

Mutably borrows from an owned value. Read more
Source§

impl<T> CloneToUninit for T
where T: Clone,

Source§

unsafe fn clone_to_uninit(&self, dest: *mut u8)

🔬This is a nightly-only experimental API. (clone_to_uninit)
Performs copy-assignment from self to dest. Read more
Source§

impl<T> From<T> for T

Source§

fn from(t: T) -> T

Returns the argument unchanged.

Source§

impl<T, U> Into<U> for T
where U: From<T>,

Source§

fn into(self) -> U

Calls U::from(self).

That is, this conversion is whatever the implementation of From<T> for U chooses to do.

Source§

impl<T> ToOwned for T
where T: Clone,

Source§

type Owned = T

The resulting type after obtaining ownership.
Source§

fn to_owned(&self) -> T

Creates owned data from borrowed data, usually by cloning. Read more
Source§

fn clone_into(&self, target: &mut T)

Uses borrowed data to replace owned data, usually by cloning. Read more
Source§

impl<T> ToString for T
where T: Display + ?Sized,

Source§

fn to_string(&self) -> String

Converts the given value to a String. Read more
Source§

impl<T, U> TryFrom<U> for T
where U: Into<T>,

Source§

type Error = Infallible

The type returned in the event of a conversion error.
Source§

fn try_from(value: U) -> Result<T, <T as TryFrom<U>>::Error>

Performs the conversion.
Source§

impl<T, U> TryInto<U> for T
where U: TryFrom<T>,

Source§

type Error = <U as TryFrom<T>>::Error

The type returned in the event of a conversion error.
Source§

fn try_into(self) -> Result<U, <U as TryFrom<T>>::Error>

Performs the conversion.