icu_properties/bidi.rs
1// This file is part of ICU4X. For terms of use, please see the file
2// called LICENSE at the top level of the ICU4X source tree
3// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
4
5//! This module exposes tooling for running the [unicode bidi algorithm](https://unicode.org/reports/tr9/) using ICU4X data.
6//!
7//! `BidiClassAdapter` enables ICU4X to provide data to [`unicode-bidi`], an external crate implementing UAX #9.
8//!
9//! ✨ *Enabled with the `bidi` Cargo feature.*
10//!
11//! # Examples
12//!
13//!```
14//! use icu::properties::bidi::BidiClassAdapter;
15//! use icu::properties::maps;
16//! use unicode_bidi::BidiInfo;
17//! // This example text is defined using `concat!` because some browsers
18//! // and text editors have trouble displaying bidi strings.
19//! let text = concat!["א", // RTL#1
20//! "ב", // RTL#2
21//! "ג", // RTL#3
22//! "a", // LTR#1
23//! "b", // LTR#2
24//! "c", // LTR#3
25//! ]; //
26//!
27//!
28//! let adapter = BidiClassAdapter::new(maps::bidi_class());
29//! // Resolve embedding levels within the text. Pass `None` to detect the
30//! // paragraph level automatically.
31//!
32//! let bidi_info = BidiInfo::new_with_data_source(&adapter, text, None);
33//!
34//! // This paragraph has embedding level 1 because its first strong character is RTL.
35//! assert_eq!(bidi_info.paragraphs.len(), 1);
36//! let para = &bidi_info.paragraphs[0];
37//! assert_eq!(para.level.number(), 1);
38//! assert!(para.level.is_rtl());
39//!
40//! // Re-ordering is done after wrapping each paragraph into a sequence of
41//! // lines. For this example, I'll just use a single line that spans the
42//! // entire paragraph.
43//! let line = para.range.clone();
44//!
45//! let display = bidi_info.reorder_line(para, line);
46//! assert_eq!(display, concat!["a", // LTR#1
47//! "b", // LTR#2
48//! "c", // LTR#3
49//! "ג", // RTL#3
50//! "ב", // RTL#2
51//! "א", // RTL#1
52//! ]);
53//! ```
54
55use crate::maps::CodePointMapDataBorrowed;
56use crate::props::BidiClass;
57use unicode_bidi::data_source::BidiDataSource;
58use unicode_bidi::BidiClass as DataSourceBidiClass;
59
60/// An adapter to convert from icu4x `BidiClass` to `unicode_bidi::BidiClass`.
61///
62/// ✨ *Enabled with the `bidi` Cargo feature.*
63///
64/// # Example
65///
66/// ```
67/// use icu::collections::codepointtrie::CodePointTrie;
68/// use icu::properties::bidi::BidiClassAdapter;
69/// use icu::properties::{maps, BidiClass};
70/// use unicode_bidi::BidiClass as DataSourceBidiClass;
71/// use unicode_bidi::BidiDataSource;
72///
73/// let adapter = BidiClassAdapter::new(maps::bidi_class());
74/// assert_eq!(adapter.bidi_class('a'), DataSourceBidiClass::L);
75/// assert_eq!(adapter.bidi_class('ع'), DataSourceBidiClass::AL);
76/// ```
77#[derive(Debug)]
78pub struct BidiClassAdapter<'a> {
79 data: CodePointMapDataBorrowed<'a, BidiClass>,
80}
81
82impl<'a> BidiClassAdapter<'a> {
83 /// Creates new instance of `BidiClassAdapter`.
84 pub const fn new(data: CodePointMapDataBorrowed<'a, BidiClass>) -> BidiClassAdapter<'a> {
85 BidiClassAdapter { data }
86 }
87}
88
89impl<'a> BidiDataSource for BidiClassAdapter<'a> {
90 /// Returns a [`DataSourceBidiClass`] given a unicode character.
91 ///
92 /// # Example
93 ///
94 /// ```
95 /// use icu::collections::codepointtrie::CodePointTrie;
96 /// use icu::properties::bidi::BidiClassAdapter;
97 /// use icu::properties::{maps, BidiClass};
98 /// use unicode_bidi::BidiClass as DataSourceBidiClass;
99 /// use unicode_bidi::BidiDataSource;
100 ///
101 /// let adapter = BidiClassAdapter::new(maps::bidi_class());
102 /// assert_eq!(adapter.bidi_class('a'), DataSourceBidiClass::L);
103 /// ```
104 ///
105 /// [`CodePointTrie`]: icu::collections::codepointtrie::CodePointTrie
106 fn bidi_class(&self, c: char) -> DataSourceBidiClass {
107 let bidi_class = self.data.get(c);
108 match bidi_class {
109 BidiClass::LeftToRight => DataSourceBidiClass::L,
110 BidiClass::RightToLeft => DataSourceBidiClass::R,
111 BidiClass::EuropeanNumber => DataSourceBidiClass::EN,
112 BidiClass::EuropeanSeparator => DataSourceBidiClass::ES,
113 BidiClass::EuropeanTerminator => DataSourceBidiClass::ET,
114 BidiClass::ArabicNumber => DataSourceBidiClass::AN,
115 BidiClass::CommonSeparator => DataSourceBidiClass::CS,
116 BidiClass::ParagraphSeparator => DataSourceBidiClass::B,
117 BidiClass::SegmentSeparator => DataSourceBidiClass::S,
118 BidiClass::WhiteSpace => DataSourceBidiClass::WS,
119 BidiClass::OtherNeutral => DataSourceBidiClass::ON,
120 BidiClass::LeftToRightEmbedding => DataSourceBidiClass::LRE,
121 BidiClass::LeftToRightOverride => DataSourceBidiClass::LRO,
122 BidiClass::ArabicLetter => DataSourceBidiClass::AL,
123 BidiClass::RightToLeftEmbedding => DataSourceBidiClass::RLE,
124 BidiClass::RightToLeftOverride => DataSourceBidiClass::RLO,
125 BidiClass::PopDirectionalFormat => DataSourceBidiClass::PDF,
126 BidiClass::NonspacingMark => DataSourceBidiClass::NSM,
127 BidiClass::BoundaryNeutral => DataSourceBidiClass::BN,
128 BidiClass::FirstStrongIsolate => DataSourceBidiClass::FSI,
129 BidiClass::LeftToRightIsolate => DataSourceBidiClass::LRI,
130 BidiClass::RightToLeftIsolate => DataSourceBidiClass::RLI,
131 BidiClass::PopDirectionalIsolate => DataSourceBidiClass::PDI,
132 _ =>
133 // This must not happen.
134 {
135 DataSourceBidiClass::ON
136 }
137 }
138 }
139}