icu_properties/
bidi.rs

1// This file is part of ICU4X. For terms of use, please see the file
2// called LICENSE at the top level of the ICU4X source tree
3// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
4
5//! This module exposes tooling for running the [unicode bidi algorithm](https://unicode.org/reports/tr9/) using ICU4X data.
6//!
7//! `BidiClassAdapter` enables ICU4X to provide data to [`unicode-bidi`], an external crate implementing UAX #9.
8//!
9//! ✨ *Enabled with the `bidi` Cargo feature.*
10//!
11//! # Examples
12//!
13//!```
14//! use icu::properties::bidi::BidiClassAdapter;
15//! use icu::properties::maps;
16//! use unicode_bidi::BidiInfo;
17//! // This example text is defined using `concat!` because some browsers
18//! // and text editors have trouble displaying bidi strings.
19//! let text =  concat!["א", // RTL#1
20//!                     "ב", // RTL#2
21//!                     "ג", // RTL#3
22//!                     "a", // LTR#1
23//!                     "b", // LTR#2
24//!                     "c", // LTR#3
25//!                     ]; //
26//!
27//!
28//! let adapter = BidiClassAdapter::new(maps::bidi_class());
29//! // Resolve embedding levels within the text.  Pass `None` to detect the
30//! // paragraph level automatically.
31//!
32//! let bidi_info = BidiInfo::new_with_data_source(&adapter, text, None);
33//!
34//! // This paragraph has embedding level 1 because its first strong character is RTL.
35//! assert_eq!(bidi_info.paragraphs.len(), 1);
36//! let para = &bidi_info.paragraphs[0];
37//! assert_eq!(para.level.number(), 1);
38//! assert!(para.level.is_rtl());
39//!
40//! // Re-ordering is done after wrapping each paragraph into a sequence of
41//! // lines. For this example, I'll just use a single line that spans the
42//! // entire paragraph.
43//! let line = para.range.clone();
44//!
45//! let display = bidi_info.reorder_line(para, line);
46//! assert_eq!(display, concat!["a", // LTR#1
47//!                             "b", // LTR#2
48//!                             "c", // LTR#3
49//!                             "ג", // RTL#3
50//!                             "ב", // RTL#2
51//!                             "א", // RTL#1
52//!                             ]);
53//! ```
54
55use crate::maps::CodePointMapDataBorrowed;
56use crate::props::BidiClass;
57use unicode_bidi::data_source::BidiDataSource;
58use unicode_bidi::BidiClass as DataSourceBidiClass;
59
60/// An adapter to convert from icu4x `BidiClass` to `unicode_bidi::BidiClass`.
61///
62/// ✨ *Enabled with the `bidi` Cargo feature.*
63///
64/// # Example
65///
66/// ```
67/// use icu::collections::codepointtrie::CodePointTrie;
68/// use icu::properties::bidi::BidiClassAdapter;
69/// use icu::properties::{maps, BidiClass};
70/// use unicode_bidi::BidiClass as DataSourceBidiClass;
71/// use unicode_bidi::BidiDataSource;
72///
73/// let adapter = BidiClassAdapter::new(maps::bidi_class());
74/// assert_eq!(adapter.bidi_class('a'), DataSourceBidiClass::L);
75/// assert_eq!(adapter.bidi_class('ع'), DataSourceBidiClass::AL);
76/// ```
77#[derive(Debug)]
78pub struct BidiClassAdapter<'a> {
79    data: CodePointMapDataBorrowed<'a, BidiClass>,
80}
81
82impl<'a> BidiClassAdapter<'a> {
83    /// Creates new instance of `BidiClassAdapter`.
84    pub const fn new(data: CodePointMapDataBorrowed<'a, BidiClass>) -> BidiClassAdapter<'a> {
85        BidiClassAdapter { data }
86    }
87}
88
89impl<'a> BidiDataSource for BidiClassAdapter<'a> {
90    /// Returns a [`DataSourceBidiClass`] given a unicode character.
91    ///
92    /// # Example
93    ///
94    /// ```
95    /// use icu::collections::codepointtrie::CodePointTrie;
96    /// use icu::properties::bidi::BidiClassAdapter;
97    /// use icu::properties::{maps, BidiClass};
98    /// use unicode_bidi::BidiClass as DataSourceBidiClass;
99    /// use unicode_bidi::BidiDataSource;
100    ///
101    /// let adapter = BidiClassAdapter::new(maps::bidi_class());
102    /// assert_eq!(adapter.bidi_class('a'), DataSourceBidiClass::L);
103    /// ```
104    ///
105    /// [`CodePointTrie`]: icu::collections::codepointtrie::CodePointTrie
106    fn bidi_class(&self, c: char) -> DataSourceBidiClass {
107        let bidi_class = self.data.get(c);
108        match bidi_class {
109            BidiClass::LeftToRight => DataSourceBidiClass::L,
110            BidiClass::RightToLeft => DataSourceBidiClass::R,
111            BidiClass::EuropeanNumber => DataSourceBidiClass::EN,
112            BidiClass::EuropeanSeparator => DataSourceBidiClass::ES,
113            BidiClass::EuropeanTerminator => DataSourceBidiClass::ET,
114            BidiClass::ArabicNumber => DataSourceBidiClass::AN,
115            BidiClass::CommonSeparator => DataSourceBidiClass::CS,
116            BidiClass::ParagraphSeparator => DataSourceBidiClass::B,
117            BidiClass::SegmentSeparator => DataSourceBidiClass::S,
118            BidiClass::WhiteSpace => DataSourceBidiClass::WS,
119            BidiClass::OtherNeutral => DataSourceBidiClass::ON,
120            BidiClass::LeftToRightEmbedding => DataSourceBidiClass::LRE,
121            BidiClass::LeftToRightOverride => DataSourceBidiClass::LRO,
122            BidiClass::ArabicLetter => DataSourceBidiClass::AL,
123            BidiClass::RightToLeftEmbedding => DataSourceBidiClass::RLE,
124            BidiClass::RightToLeftOverride => DataSourceBidiClass::RLO,
125            BidiClass::PopDirectionalFormat => DataSourceBidiClass::PDF,
126            BidiClass::NonspacingMark => DataSourceBidiClass::NSM,
127            BidiClass::BoundaryNeutral => DataSourceBidiClass::BN,
128            BidiClass::FirstStrongIsolate => DataSourceBidiClass::FSI,
129            BidiClass::LeftToRightIsolate => DataSourceBidiClass::LRI,
130            BidiClass::RightToLeftIsolate => DataSourceBidiClass::RLI,
131            BidiClass::PopDirectionalIsolate => DataSourceBidiClass::PDI,
132            _ =>
133            // This must not happen.
134            {
135                DataSourceBidiClass::ON
136            }
137        }
138    }
139}