1#[diplomat::bridge]
6pub mod ffi {
7 use crate::errors::ffi::ICU4XError;
8 use crate::provider::ffi::ICU4XDataProvider;
9 use alloc::boxed::Box;
10 use core::convert::TryFrom;
11 use icu_segmenter::{
12 WordBreakIteratorLatin1, WordBreakIteratorPotentiallyIllFormedUtf8, WordBreakIteratorUtf16,
13 WordSegmenter, WordType,
14 };
15
16 #[diplomat::enum_convert(WordType, needs_wildcard)]
17 #[diplomat::rust_link(icu::segmenter::WordType, Enum)]
18 pub enum ICU4XSegmenterWordType {
19 None = 0,
20 Number = 1,
21 Letter = 2,
22 }
23
24 #[diplomat::opaque]
25 #[diplomat::rust_link(icu::segmenter::WordSegmenter, Struct)]
27 pub struct ICU4XWordSegmenter(WordSegmenter);
28
29 #[diplomat::opaque]
30 #[diplomat::rust_link(icu::segmenter::WordBreakIterator, Struct)]
31 #[diplomat::rust_link(
32 icu::segmenter::WordBreakIteratorPotentiallyIllFormedUtf8,
33 Typedef,
34 hidden
35 )]
36 #[diplomat::rust_link(icu::segmenter::WordBreakIteratorUtf8, Typedef, hidden)]
37 pub struct ICU4XWordBreakIteratorUtf8<'a>(WordBreakIteratorPotentiallyIllFormedUtf8<'a, 'a>);
38
39 #[diplomat::opaque]
40 #[diplomat::rust_link(icu::segmenter::WordBreakIterator, Struct)]
41 #[diplomat::rust_link(icu::segmenter::WordBreakIteratorUtf16, Typedef, hidden)]
42 pub struct ICU4XWordBreakIteratorUtf16<'a>(WordBreakIteratorUtf16<'a, 'a>);
43
44 #[diplomat::opaque]
45 #[diplomat::rust_link(icu::segmenter::WordBreakIterator, Struct)]
46 #[diplomat::rust_link(icu::segmenter::WordBreakIteratorLatin1, Typedef, hidden)]
47 pub struct ICU4XWordBreakIteratorLatin1<'a>(WordBreakIteratorLatin1<'a, 'a>);
48
49 impl ICU4XSegmenterWordType {
50 #[diplomat::rust_link(icu::segmenter::WordType::is_word_like, FnInEnum)]
51 #[diplomat::attr(supports = accessors, getter)]
52 pub fn is_word_like(self) -> bool {
53 WordType::from(self).is_word_like()
54 }
55 }
56
57 impl ICU4XWordSegmenter {
58 #[diplomat::rust_link(icu::segmenter::WordSegmenter::new_auto, FnInStruct)]
64 #[diplomat::attr(all(supports = constructors, supports = fallible_constructors, supports = named_constructors), named_constructor = "auto")]
65 pub fn create_auto(
66 provider: &ICU4XDataProvider,
67 ) -> Result<Box<ICU4XWordSegmenter>, ICU4XError> {
68 Ok(Box::new(ICU4XWordSegmenter(call_constructor!(
69 WordSegmenter::new_auto [r => Ok(r)],
70 WordSegmenter::try_new_auto_with_any_provider,
71 WordSegmenter::try_new_auto_with_buffer_provider,
72 provider
73 )?)))
74 }
75
76 #[diplomat::rust_link(icu::segmenter::WordSegmenter::new_lstm, FnInStruct)]
82 #[diplomat::attr(all(supports = constructors, supports = fallible_constructors, supports = named_constructors), named_constructor = "lstm")]
83 pub fn create_lstm(
84 provider: &ICU4XDataProvider,
85 ) -> Result<Box<ICU4XWordSegmenter>, ICU4XError> {
86 Ok(Box::new(ICU4XWordSegmenter(call_constructor!(
87 WordSegmenter::new_lstm [r => Ok(r)],
88 WordSegmenter::try_new_lstm_with_any_provider,
89 WordSegmenter::try_new_lstm_with_buffer_provider,
90 provider,
91 )?)))
92 }
93
94 #[diplomat::rust_link(icu::segmenter::WordSegmenter::new_dictionary, FnInStruct)]
97 #[diplomat::attr(all(supports = constructors, supports = fallible_constructors, supports = named_constructors), named_constructor = "dictionary")]
98 pub fn create_dictionary(
99 provider: &ICU4XDataProvider,
100 ) -> Result<Box<ICU4XWordSegmenter>, ICU4XError> {
101 Ok(Box::new(ICU4XWordSegmenter(call_constructor!(
102 WordSegmenter::new_dictionary [r => Ok(r)],
103 WordSegmenter::try_new_dictionary_with_any_provider,
104 WordSegmenter::try_new_dictionary_with_buffer_provider,
105 provider,
106 )?)))
107 }
108
109 #[diplomat::rust_link(icu::segmenter::WordSegmenter::segment_utf8, FnInStruct)]
114 #[diplomat::rust_link(icu::segmenter::WordSegmenter::segment_str, FnInStruct, hidden)]
115 #[diplomat::attr(dart, disable)]
116 pub fn segment_utf8<'a>(
117 &'a self,
118 input: &'a DiplomatStr,
119 ) -> Box<ICU4XWordBreakIteratorUtf8<'a>> {
120 Box::new(ICU4XWordBreakIteratorUtf8(self.0.segment_utf8(input)))
121 }
122
123 #[diplomat::rust_link(icu::segmenter::WordSegmenter::segment_utf16, FnInStruct)]
128 #[diplomat::attr(dart, rename = "segment")]
129 pub fn segment_utf16<'a>(
130 &'a self,
131 input: &'a DiplomatStr16,
132 ) -> Box<ICU4XWordBreakIteratorUtf16<'a>> {
133 Box::new(ICU4XWordBreakIteratorUtf16(self.0.segment_utf16(input)))
134 }
135
136 #[diplomat::rust_link(icu::segmenter::WordSegmenter::segment_latin1, FnInStruct)]
138 #[diplomat::attr(dart, disable)]
139 pub fn segment_latin1<'a>(
140 &'a self,
141 input: &'a [u8],
142 ) -> Box<ICU4XWordBreakIteratorLatin1<'a>> {
143 Box::new(ICU4XWordBreakIteratorLatin1(self.0.segment_latin1(input)))
144 }
145 }
146
147 impl<'a> ICU4XWordBreakIteratorUtf8<'a> {
148 #[diplomat::rust_link(icu::segmenter::WordBreakIterator::next, FnInStruct)]
151 #[diplomat::rust_link(
152 icu::segmenter::WordBreakIterator::Item,
153 AssociatedTypeInStruct,
154 hidden
155 )]
156 pub fn next(&mut self) -> i32 {
157 self.0
158 .next()
159 .and_then(|u| i32::try_from(u).ok())
160 .unwrap_or(-1)
161 }
162
163 #[diplomat::rust_link(icu::segmenter::WordBreakIterator::word_type, FnInStruct)]
165 #[diplomat::attr(supports = accessors, getter)]
166 pub fn word_type(&self) -> ICU4XSegmenterWordType {
167 self.0.word_type().into()
168 }
169
170 #[diplomat::rust_link(icu::segmenter::WordBreakIterator::is_word_like, FnInStruct)]
172 #[diplomat::attr(supports = accessors, getter)]
173 pub fn is_word_like(&self) -> bool {
174 self.0.is_word_like()
175 }
176 }
177
178 impl<'a> ICU4XWordBreakIteratorUtf16<'a> {
179 #[diplomat::rust_link(icu::segmenter::WordBreakIterator::next, FnInStruct)]
182 #[diplomat::rust_link(
183 icu::segmenter::WordBreakIterator::Item,
184 AssociatedTypeInStruct,
185 hidden
186 )]
187 pub fn next(&mut self) -> i32 {
188 self.0
189 .next()
190 .and_then(|u| i32::try_from(u).ok())
191 .unwrap_or(-1)
192 }
193
194 #[diplomat::rust_link(icu::segmenter::WordBreakIterator::word_type, FnInStruct)]
196 #[diplomat::rust_link(
197 icu::segmenter::WordBreakIterator::iter_with_word_type,
198 FnInStruct,
199 hidden
200 )]
201 #[diplomat::attr(supports = accessors, getter)]
202 pub fn word_type(&self) -> ICU4XSegmenterWordType {
203 self.0.word_type().into()
204 }
205
206 #[diplomat::rust_link(icu::segmenter::WordBreakIterator::is_word_like, FnInStruct)]
208 #[diplomat::attr(supports = accessors, getter)]
209 pub fn is_word_like(&self) -> bool {
210 self.0.is_word_like()
211 }
212 }
213
214 impl<'a> ICU4XWordBreakIteratorLatin1<'a> {
215 #[diplomat::rust_link(icu::segmenter::WordBreakIterator::next, FnInStruct)]
218 #[diplomat::rust_link(
219 icu::segmenter::WordBreakIterator::Item,
220 AssociatedTypeInStruct,
221 hidden
222 )]
223 pub fn next(&mut self) -> i32 {
224 self.0
225 .next()
226 .and_then(|u| i32::try_from(u).ok())
227 .unwrap_or(-1)
228 }
229
230 #[diplomat::rust_link(icu::segmenter::WordBreakIterator::word_type, FnInStruct)]
232 #[diplomat::attr(supports = accessors, getter)]
233 pub fn word_type(&self) -> ICU4XSegmenterWordType {
234 self.0.word_type().into()
235 }
236
237 #[diplomat::rust_link(icu::segmenter::WordBreakIterator::is_word_like, FnInStruct)]
239 #[diplomat::attr(supports = accessors, getter)]
240 pub fn is_word_like(&self) -> bool {
241 self.0.is_word_like()
242 }
243 }
244}