icu_capi/
segmenter_grapheme.rs

1// This file is part of ICU4X. For terms of use, please see the file
2// called LICENSE at the top level of the ICU4X source tree
3// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
4
5#[diplomat::bridge]
6pub mod ffi {
7    use crate::errors::ffi::ICU4XError;
8    use crate::provider::ffi::ICU4XDataProvider;
9    use alloc::boxed::Box;
10    use core::convert::TryFrom;
11    use icu_segmenter::{
12        GraphemeClusterBreakIteratorLatin1, GraphemeClusterBreakIteratorPotentiallyIllFormedUtf8,
13        GraphemeClusterBreakIteratorUtf16, GraphemeClusterSegmenter,
14    };
15
16    #[diplomat::opaque]
17    /// An ICU4X grapheme-cluster-break segmenter, capable of finding grapheme cluster breakpoints
18    /// in strings.
19    #[diplomat::rust_link(icu::segmenter::GraphemeClusterSegmenter, Struct)]
20    pub struct ICU4XGraphemeClusterSegmenter(GraphemeClusterSegmenter);
21
22    #[diplomat::opaque]
23    #[diplomat::rust_link(icu::segmenter::GraphemeClusterBreakIterator, Struct)]
24    #[diplomat::rust_link(
25        icu::segmenter::GraphemeClusterBreakIteratorPotentiallyIllFormedUtf8,
26        Typedef,
27        hidden
28    )]
29    #[diplomat::rust_link(icu::segmenter::GraphemeClusterBreakIteratorUtf8, Typedef, hidden)]
30    pub struct ICU4XGraphemeClusterBreakIteratorUtf8<'a>(
31        GraphemeClusterBreakIteratorPotentiallyIllFormedUtf8<'a, 'a>,
32    );
33
34    #[diplomat::opaque]
35    #[diplomat::rust_link(icu::segmenter::GraphemeClusterBreakIterator, Struct)]
36    #[diplomat::rust_link(icu::segmenter::GraphemeClusterBreakIteratorUtf16, Typedef, hidden)]
37    pub struct ICU4XGraphemeClusterBreakIteratorUtf16<'a>(
38        GraphemeClusterBreakIteratorUtf16<'a, 'a>,
39    );
40
41    #[diplomat::opaque]
42    #[diplomat::rust_link(icu::segmenter::GraphemeClusterBreakIterator, Struct)]
43    #[diplomat::rust_link(icu::segmenter::GraphemeClusterBreakIteratorLatin1, Typedef, hidden)]
44    pub struct ICU4XGraphemeClusterBreakIteratorLatin1<'a>(
45        GraphemeClusterBreakIteratorLatin1<'a, 'a>,
46    );
47
48    impl ICU4XGraphemeClusterSegmenter {
49        /// Construct an [`ICU4XGraphemeClusterSegmenter`].
50        #[diplomat::rust_link(icu::segmenter::GraphemeClusterSegmenter::new, FnInStruct)]
51        #[diplomat::attr(all(supports = constructors, supports = fallible_constructors), constructor)]
52        pub fn create(
53            provider: &ICU4XDataProvider,
54        ) -> Result<Box<ICU4XGraphemeClusterSegmenter>, ICU4XError> {
55            Ok(Box::new(ICU4XGraphemeClusterSegmenter(call_constructor!(
56                GraphemeClusterSegmenter::new [r => Ok(r)],
57                GraphemeClusterSegmenter::try_new_with_any_provider,
58                GraphemeClusterSegmenter::try_new_with_buffer_provider,
59                provider,
60            )?)))
61        }
62
63        /// Segments a string.
64        ///
65        /// Ill-formed input is treated as if errors had been replaced with REPLACEMENT CHARACTERs according
66        /// to the WHATWG Encoding Standard.
67        #[diplomat::rust_link(
68            icu::segmenter::GraphemeClusterSegmenter::segment_str,
69            FnInStruct,
70            hidden
71        )]
72        #[diplomat::rust_link(icu::segmenter::GraphemeClusterSegmenter::segment_utf8, FnInStruct)]
73        #[diplomat::attr(dart, disable)]
74        pub fn segment_utf8<'a>(
75            &'a self,
76            input: &'a DiplomatStr,
77        ) -> Box<ICU4XGraphemeClusterBreakIteratorUtf8<'a>> {
78            Box::new(ICU4XGraphemeClusterBreakIteratorUtf8(
79                self.0.segment_utf8(input),
80            ))
81        }
82
83        /// Segments a string.
84        ///
85        /// Ill-formed input is treated as if errors had been replaced with REPLACEMENT CHARACTERs according
86        /// to the WHATWG Encoding Standard.
87        #[diplomat::rust_link(icu::segmenter::GraphemeClusterSegmenter::segment_utf16, FnInStruct)]
88        #[diplomat::attr(dart, rename = "segment")]
89        pub fn segment_utf16<'a>(
90            &'a self,
91            input: &'a DiplomatStr16,
92        ) -> Box<ICU4XGraphemeClusterBreakIteratorUtf16<'a>> {
93            Box::new(ICU4XGraphemeClusterBreakIteratorUtf16(
94                self.0.segment_utf16(input),
95            ))
96        }
97
98        /// Segments a Latin-1 string.
99        #[diplomat::rust_link(icu::segmenter::GraphemeClusterSegmenter::segment_latin1, FnInStruct)]
100        #[diplomat::attr(dart, disable)]
101        pub fn segment_latin1<'a>(
102            &'a self,
103            input: &'a [u8],
104        ) -> Box<ICU4XGraphemeClusterBreakIteratorLatin1<'a>> {
105            Box::new(ICU4XGraphemeClusterBreakIteratorLatin1(
106                self.0.segment_latin1(input),
107            ))
108        }
109    }
110
111    impl<'a> ICU4XGraphemeClusterBreakIteratorUtf8<'a> {
112        /// Finds the next breakpoint. Returns -1 if at the end of the string or if the index is
113        /// out of range of a 32-bit signed integer.
114        #[diplomat::rust_link(icu::segmenter::GraphemeClusterBreakIterator::next, FnInStruct)]
115        #[diplomat::rust_link(
116            icu::segmenter::GraphemeClusterBreakIterator::Item,
117            AssociatedTypeInStruct,
118            hidden
119        )]
120        pub fn next(&mut self) -> i32 {
121            self.0
122                .next()
123                .and_then(|u| i32::try_from(u).ok())
124                .unwrap_or(-1)
125        }
126    }
127
128    impl<'a> ICU4XGraphemeClusterBreakIteratorUtf16<'a> {
129        /// Finds the next breakpoint. Returns -1 if at the end of the string or if the index is
130        /// out of range of a 32-bit signed integer.
131        #[diplomat::rust_link(icu::segmenter::GraphemeClusterBreakIterator::next, FnInStruct)]
132        #[diplomat::rust_link(
133            icu::segmenter::GraphemeClusterBreakIterator::Item,
134            AssociatedTypeInStruct,
135            hidden
136        )]
137        pub fn next(&mut self) -> i32 {
138            self.0
139                .next()
140                .and_then(|u| i32::try_from(u).ok())
141                .unwrap_or(-1)
142        }
143    }
144
145    impl<'a> ICU4XGraphemeClusterBreakIteratorLatin1<'a> {
146        /// Finds the next breakpoint. Returns -1 if at the end of the string or if the index is
147        /// out of range of a 32-bit signed integer.
148        #[diplomat::rust_link(icu::segmenter::GraphemeClusterBreakIterator::next, FnInStruct)]
149        #[diplomat::rust_link(
150            icu::segmenter::GraphemeClusterBreakIterator::Item,
151            AssociatedTypeInStruct,
152            hidden
153        )]
154        pub fn next(&mut self) -> i32 {
155            self.0
156                .next()
157                .and_then(|u| i32::try_from(u).ok())
158                .unwrap_or(-1)
159        }
160    }
161}