core_foundation_sys/
string_tokenizer.rs

1// Copyright 2023 The Servo Project Developers. See the COPYRIGHT
2// file at the top-level directory of this distribution.
3//
4// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
5// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
6// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
7// option. This file may not be copied, modified, or distributed
8// except according to those terms.
9
10use std::os::raw::c_void;
11
12use crate::array::CFMutableArrayRef;
13use crate::base::{CFAllocatorRef, CFIndex, CFOptionFlags, CFRange, CFTypeID, CFTypeRef};
14use crate::locale::CFLocaleRef;
15use crate::string::CFStringRef;
16
17#[repr(C)]
18pub struct __CFStringTokenizer(c_void);
19pub type CFStringTokenizerRef = *mut __CFStringTokenizer;
20
21pub type CFStringTokenizerTokenType = CFOptionFlags;
22
23pub const kCFStringTokenizerTokenNone: CFStringTokenizerTokenType = 0;
24pub const kCFStringTokenizerTokenNormal: CFStringTokenizerTokenType = 1 << 0;
25pub const kCFStringTokenizerTokenHasSubTokensMask: CFStringTokenizerTokenType = 1 << 1;
26pub const kCFStringTokenizerTokenHasDerivedSubTokensMask: CFStringTokenizerTokenType = 1 << 2;
27pub const kCFStringTokenizerTokenHasHasNumbersMask: CFStringTokenizerTokenType = 1 << 3;
28pub const kCFStringTokenizerTokenHasNonLettersMask: CFStringTokenizerTokenType = 1 << 4;
29pub const kCFStringTokenizerTokenIsCJWordMask: CFStringTokenizerTokenType = 1 << 5;
30
31/* Tokenization Modifiers */
32pub const kCFStringTokenizerUnitWord: CFOptionFlags = 0;
33pub const kCFStringTokenizerUnitSentence: CFOptionFlags = 1;
34pub const kCFStringTokenizerUnitParagraph: CFOptionFlags = 2;
35pub const kCFStringTokenizerUnitLineBreak: CFOptionFlags = 3;
36pub const kCFStringTokenizerUnitWordBoundary: CFOptionFlags = 4;
37pub const kCFStringTokenizerAttributeLatinTranscription: CFOptionFlags = 1 << 16;
38pub const kCFStringTokenizerAttributeLanguage: CFOptionFlags = 1 << 17;
39
40extern "C" {
41    /*
42     * CFStringTokenizer.h
43     */
44
45    /* Creating a Tokenizer */
46    pub fn CFStringTokenizerCreate(
47        alloc: CFAllocatorRef,
48        string: CFStringRef,
49        range: CFRange,
50        options: CFOptionFlags,
51        locale: CFLocaleRef,
52    ) -> CFStringTokenizerRef;
53
54    /* Setting the String */
55    pub fn CFStringTokenizerSetString(
56        tokenizer: CFStringTokenizerRef,
57        string: CFStringRef,
58        range: CFRange,
59    );
60
61    /* Changing the Location */
62    pub fn CFStringTokenizerAdvanceToNextToken(
63        tokenizer: CFStringTokenizerRef,
64    ) -> CFStringTokenizerTokenType;
65    pub fn CFStringTokenizerGoToTokenAtIndex(
66        tokenizer: CFStringTokenizerRef,
67        index: CFIndex,
68    ) -> CFStringTokenizerTokenType;
69
70    /* Getting Information About the Current Token */
71    pub fn CFStringTokenizerCopyCurrentTokenAttribute(
72        tokenizer: CFStringTokenizerRef,
73        attribute: CFOptionFlags,
74    ) -> CFTypeRef;
75    pub fn CFStringTokenizerGetCurrentTokenRange(tokenizer: CFStringTokenizerRef) -> CFRange;
76    pub fn CFStringTokenizerGetCurrentSubTokens(
77        tokenizer: CFStringTokenizerRef,
78        ranges: *mut CFRange,
79        maxRangeLength: CFIndex,
80        derivedSubTokens: CFMutableArrayRef,
81    ) -> CFIndex;
82
83    /* Identifying a Language */
84    pub fn CFStringTokenizerCopyBestStringLanguage(
85        string: CFStringRef,
86        range: CFRange,
87    ) -> CFStringRef;
88
89    /* Getting the CFStringTokenizer Type ID */
90    pub fn CFStringTokenizerGetTypeID() -> CFTypeID;
91}