icu_capi/
segmenter_sentence.rs

1// This file is part of ICU4X. For terms of use, please see the file
2// called LICENSE at the top level of the ICU4X source tree
3// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
4
5#[diplomat::bridge]
6pub mod ffi {
7    use crate::errors::ffi::ICU4XError;
8    use crate::provider::ffi::ICU4XDataProvider;
9    use alloc::boxed::Box;
10    use core::convert::TryFrom;
11    use icu_segmenter::{
12        SentenceBreakIteratorLatin1, SentenceBreakIteratorPotentiallyIllFormedUtf8,
13        SentenceBreakIteratorUtf16, SentenceSegmenter,
14    };
15
16    #[diplomat::opaque]
17    /// An ICU4X sentence-break segmenter, capable of finding sentence breakpoints in strings.
18    #[diplomat::rust_link(icu::segmenter::SentenceSegmenter, Struct)]
19    pub struct ICU4XSentenceSegmenter(SentenceSegmenter);
20
21    #[diplomat::opaque]
22    #[diplomat::rust_link(icu::segmenter::SentenceBreakIterator, Struct)]
23    #[diplomat::rust_link(
24        icu::segmenter::SentenceBreakIteratorPotentiallyIllFormedUtf8,
25        Typedef,
26        hidden
27    )]
28    #[diplomat::rust_link(icu::segmenter::SentenceBreakIteratorUtf8, Typedef, hidden)]
29    pub struct ICU4XSentenceBreakIteratorUtf8<'a>(
30        SentenceBreakIteratorPotentiallyIllFormedUtf8<'a, 'a>,
31    );
32
33    #[diplomat::opaque]
34    #[diplomat::rust_link(icu::segmenter::SentenceBreakIterator, Struct)]
35    #[diplomat::rust_link(icu::segmenter::SentenceBreakIteratorUtf16, Typedef, hidden)]
36    pub struct ICU4XSentenceBreakIteratorUtf16<'a>(SentenceBreakIteratorUtf16<'a, 'a>);
37
38    #[diplomat::opaque]
39    #[diplomat::rust_link(icu::segmenter::SentenceBreakIterator, Struct)]
40    #[diplomat::rust_link(icu::segmenter::SentenceBreakIteratorLatin1, Typedef, hidden)]
41    pub struct ICU4XSentenceBreakIteratorLatin1<'a>(SentenceBreakIteratorLatin1<'a, 'a>);
42
43    impl ICU4XSentenceSegmenter {
44        /// Construct an [`ICU4XSentenceSegmenter`].
45        #[diplomat::rust_link(icu::segmenter::SentenceSegmenter::new, FnInStruct)]
46        #[diplomat::attr(all(supports = constructors, supports = fallible_constructors), constructor)]
47        pub fn create(
48            provider: &ICU4XDataProvider,
49        ) -> Result<Box<ICU4XSentenceSegmenter>, ICU4XError> {
50            Ok(Box::new(ICU4XSentenceSegmenter(call_constructor!(
51                SentenceSegmenter::new [r => Ok(r)],
52                SentenceSegmenter::try_new_with_any_provider,
53                SentenceSegmenter::try_new_with_buffer_provider,
54                provider,
55            )?)))
56        }
57
58        /// Segments a string.
59        ///
60        /// Ill-formed input is treated as if errors had been replaced with REPLACEMENT CHARACTERs according
61        /// to the WHATWG Encoding Standard.
62        #[diplomat::rust_link(icu::segmenter::SentenceSegmenter::segment_utf8, FnInStruct)]
63        #[diplomat::rust_link(icu::segmenter::SentenceSegmenter::segment_str, FnInStruct, hidden)]
64        #[diplomat::attr(dart, disable)]
65        pub fn segment_utf8<'a>(
66            &'a self,
67            input: &'a DiplomatStr,
68        ) -> Box<ICU4XSentenceBreakIteratorUtf8<'a>> {
69            Box::new(ICU4XSentenceBreakIteratorUtf8(self.0.segment_utf8(input)))
70        }
71
72        /// Segments a string.
73        ///
74        /// Ill-formed input is treated as if errors had been replaced with REPLACEMENT CHARACTERs according
75        /// to the WHATWG Encoding Standard.
76        #[diplomat::rust_link(icu::segmenter::SentenceSegmenter::segment_utf16, FnInStruct)]
77        #[diplomat::attr(dart, rename = "segment")]
78        pub fn segment_utf16<'a>(
79            &'a self,
80            input: &'a DiplomatStr16,
81        ) -> Box<ICU4XSentenceBreakIteratorUtf16<'a>> {
82            Box::new(ICU4XSentenceBreakIteratorUtf16(self.0.segment_utf16(input)))
83        }
84
85        /// Segments a Latin-1 string.
86        #[diplomat::rust_link(icu::segmenter::SentenceSegmenter::segment_latin1, FnInStruct)]
87        #[diplomat::attr(dart, disable)]
88        pub fn segment_latin1<'a>(
89            &'a self,
90            input: &'a [u8],
91        ) -> Box<ICU4XSentenceBreakIteratorLatin1<'a>> {
92            Box::new(ICU4XSentenceBreakIteratorLatin1(
93                self.0.segment_latin1(input),
94            ))
95        }
96    }
97
98    impl<'a> ICU4XSentenceBreakIteratorUtf8<'a> {
99        /// Finds the next breakpoint. Returns -1 if at the end of the string or if the index is
100        /// out of range of a 32-bit signed integer.
101        #[diplomat::rust_link(icu::segmenter::SentenceBreakIterator::next, FnInStruct)]
102        #[diplomat::rust_link(
103            icu::segmenter::SentenceBreakIterator::Item,
104            AssociatedTypeInStruct,
105            hidden
106        )]
107        pub fn next(&mut self) -> i32 {
108            self.0
109                .next()
110                .and_then(|u| i32::try_from(u).ok())
111                .unwrap_or(-1)
112        }
113    }
114
115    impl<'a> ICU4XSentenceBreakIteratorUtf16<'a> {
116        /// Finds the next breakpoint. Returns -1 if at the end of the string or if the index is
117        /// out of range of a 32-bit signed integer.
118        #[diplomat::rust_link(icu::segmenter::SentenceBreakIterator::next, FnInStruct)]
119        #[diplomat::rust_link(
120            icu::segmenter::SentenceBreakIterator::Item,
121            AssociatedTypeInStruct,
122            hidden
123        )]
124        pub fn next(&mut self) -> i32 {
125            self.0
126                .next()
127                .and_then(|u| i32::try_from(u).ok())
128                .unwrap_or(-1)
129        }
130    }
131
132    impl<'a> ICU4XSentenceBreakIteratorLatin1<'a> {
133        /// Finds the next breakpoint. Returns -1 if at the end of the string or if the index is
134        /// out of range of a 32-bit signed integer.
135        #[diplomat::rust_link(icu::segmenter::SentenceBreakIterator::next, FnInStruct)]
136        #[diplomat::rust_link(
137            icu::segmenter::SentenceBreakIterator::Item,
138            AssociatedTypeInStruct,
139            hidden
140        )]
141        pub fn next(&mut self) -> i32 {
142            self.0
143                .next()
144                .and_then(|u| i32::try_from(u).ok())
145                .unwrap_or(-1)
146        }
147    }
148}