icu_collator/
lib.rs

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
// This file is part of ICU4X. For terms of use, please see the file
// called LICENSE at the top level of the ICU4X source tree
// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).

// Various collation-related algorithms and constants in this file are
// adapted from ICU4C and, therefore, are subject to the ICU license as
// described in LICENSE.

// https://github.com/unicode-org/icu4x/blob/main/documents/process/boilerplate.md#library-annotations
#![cfg_attr(not(any(test, feature = "std")), no_std)]
#![cfg_attr(
    not(test),
    deny(
        clippy::indexing_slicing,
        clippy::unwrap_used,
        clippy::expect_used,
        clippy::panic,
        clippy::exhaustive_structs,
        clippy::exhaustive_enums,
        missing_debug_implementations,
    )
)]
#![warn(missing_docs)]

//! Comparing strings according to language-dependent conventions.
//!
//! This module is published as its own crate ([`icu_collator`](https://docs.rs/icu_collator/latest/icu_collator/))
//! and as part of the [`icu`](https://docs.rs/icu/latest/icu/) crate. See the latter for more details on the ICU4X project.
//! `Collator` is the main structure of the component. It accepts a set of arguments
//! which allow it to collect necessary data from the data provider, and once
//! instantiated, can be used to compare strings.
//!
//! Refer to the ICU User Guide sections for Collation that give an
//! [introduction](https://unicode-org.github.io/icu/userguide/collation/) and explain
//! [basic concepts](https://unicode-org.github.io/icu/userguide/collation/concepts.html).
//!
//! # Examples
//!
//! As its most basic purpose, `Collator` offers locale-aware ordering:
//!
//! ```
//! use core::cmp::Ordering;
//! use icu::collator::*;
//! use icu::locid::locale;
//!
//! let locale_es = locale!("es-u-co-trad").into();
//! let mut options = CollatorOptions::new();
//! options.strength = Some(Strength::Primary);
//! let collator_es: Collator = Collator::try_new(&locale_es, options).unwrap();
//!
//! // "pollo" > "polvo" in traditional Spanish
//! assert_eq!(collator_es.compare("pollo", "polvo"), Ordering::Greater);
//!
//! let locale_en = locale!("en").into();
//! let mut options = CollatorOptions::new();
//! options.strength = Some(Strength::Primary);
//! let collator_en: Collator = Collator::try_new(&locale_en, options).unwrap();
//!
//! // "pollo" < "polvo" according to English rules
//! assert_eq!(collator_en.compare("pollo", "polvo"), Ordering::Less);
//! ```
//!
//! ## Examples of `CollatorOptions`
//!
//! The [`CollatorOptions`] struct configures specific custom behavior for the `Collator`.  See docs
//! for [`CollatorOptions`] for more details.  Some basic descriptions and examples are below.
//!
//! ## Strength
//!
//! The degree of sensitivity in how to determine that strings are distinct.
//!
//! ```
//! use core::cmp::Ordering;
//! use icu::collator::*;
//!
//! // Primary Level
//!
//! let mut options_l1 = CollatorOptions::new();
//! options_l1.strength = Some(Strength::Primary);
//! let collator_l1: Collator =
//!     Collator::try_new(&Default::default(), options_l1).unwrap();
//!
//! assert_eq!(collator_l1.compare("a", "b"), Ordering::Less); // primary
//! assert_eq!(collator_l1.compare("as", "às"), Ordering::Equal); // secondary
//! assert_eq!(collator_l1.compare("às", "at"), Ordering::Less);
//! assert_eq!(collator_l1.compare("ao", "Ao"), Ordering::Equal); // tertiary
//! assert_eq!(collator_l1.compare("Ao", "aò"), Ordering::Equal);
//! assert_eq!(collator_l1.compare("A", "Ⓐ"), Ordering::Equal);
//!
//! // Secondary Level
//!
//! let mut options_l2 = CollatorOptions::new();
//! options_l2.strength = Some(Strength::Secondary);
//! let collator_l2: Collator =
//!     Collator::try_new(&Default::default(), options_l2).unwrap();
//!
//! assert_eq!(collator_l2.compare("a", "b"), Ordering::Less); // primary
//! assert_eq!(collator_l2.compare("as", "às"), Ordering::Less); // secondary
//! assert_eq!(collator_l2.compare("às", "at"), Ordering::Less);
//! assert_eq!(collator_l2.compare("ao", "Ao"), Ordering::Equal); // tertiary
//! assert_eq!(collator_l2.compare("Ao", "aò"), Ordering::Less);
//! assert_eq!(collator_l2.compare("A", "Ⓐ"), Ordering::Equal);
//!
//! // Tertiary Level
//!
//! let mut options_l3 = CollatorOptions::new();
//! options_l3.strength = Some(Strength::Tertiary);
//! let collator_l3: Collator =
//!     Collator::try_new(&Default::default(), options_l3).unwrap();
//!
//! assert_eq!(collator_l3.compare("a", "b"), Ordering::Less); // primary
//! assert_eq!(collator_l3.compare("as", "às"), Ordering::Less); // secondary
//! assert_eq!(collator_l3.compare("às", "at"), Ordering::Less);
//! assert_eq!(collator_l3.compare("ao", "Ao"), Ordering::Less); // tertiary
//! assert_eq!(collator_l3.compare("Ao", "aò"), Ordering::Less);
//! assert_eq!(collator_l3.compare("A", "Ⓐ"), Ordering::Less);
//! ```
//!
//! ## Alternate Handling
//!
//! Allows alternate handling for certain customized collation orderings, including the option to
//! ignore the special handling for the strings of such customizations.  Specifically,
//! alternate handling is used to control the handling of the so-called **variable** characters in the
//! Unicode Collation Algorithm: whitespace, punctuation and symbols.
//!
//! Note that `AlternateHandling::ShiftTrimmed` and `AlternateHandling::Blanked` are
//! unimplemented. The default is `AlternateHandling::NonIgnorable`, except
//! for Thai, whose default is `AlternateHandling::Shifted`.
//!
//! ```
//! use core::cmp::Ordering;
//! use icu::collator::*;
//!
//! // If alternate handling is set to `NonIgnorable`, then differences among
//! // these characters are of the same importance as differences among letters.
//!
//! let mut options_3n = CollatorOptions::new();
//! options_3n.strength = Some(Strength::Tertiary);
//! options_3n.alternate_handling = Some(AlternateHandling::NonIgnorable);
//! let collator_3n: Collator =
//!     Collator::try_new(&Default::default(), options_3n).unwrap();
//!
//! assert_eq!(collator_3n.compare("di Silva", "Di Silva"), Ordering::Less);
//! assert_eq!(collator_3n.compare("Di Silva", "diSilva"), Ordering::Less);
//! assert_eq!(collator_3n.compare("diSilva", "U.S.A."), Ordering::Less);
//! assert_eq!(collator_3n.compare("U.S.A.", "USA"), Ordering::Less);
//!
//! // If alternate handling is set to `Shifted`, then these characters are of only minor
//! // importance. The Shifted value is often used in combination with Strength
//! // set to Quaternary.
//!
//! let mut options_3s = CollatorOptions::new();
//! options_3s.strength = Some(Strength::Tertiary);
//! options_3s.alternate_handling = Some(AlternateHandling::Shifted);
//! let collator_3s: Collator =
//!     Collator::try_new(&Default::default(), options_3s).unwrap();
//!
//! assert_eq!(collator_3s.compare("di Silva", "diSilva"), Ordering::Equal);
//! assert_eq!(collator_3s.compare("diSilva", "Di Silva"), Ordering::Less);
//! assert_eq!(collator_3s.compare("Di Silva", "U.S.A."), Ordering::Less);
//! assert_eq!(collator_3s.compare("U.S.A.", "USA"), Ordering::Equal);
//!
//! let mut options_4s = CollatorOptions::new();
//! options_4s.strength = Some(Strength::Quaternary);
//! options_4s.alternate_handling = Some(AlternateHandling::Shifted);
//! let collator_4s: Collator =
//!     Collator::try_new(&Default::default(), options_4s).unwrap();
//!
//! assert_eq!(collator_4s.compare("di Silva", "diSilva"), Ordering::Less);
//! assert_eq!(collator_4s.compare("diSilva", "Di Silva"), Ordering::Less);
//! assert_eq!(collator_4s.compare("Di Silva", "U.S.A."), Ordering::Less);
//! assert_eq!(collator_4s.compare("U.S.A.", "USA"), Ordering::Less);
//! ```
//!
//! ## Case Level
//!
//! Whether to distinguish case in sorting, even for sorting levels higher than tertiary,
//! without having to use tertiary level just to enable case level differences.
//!
//! ```
//! use core::cmp::Ordering;
//! use icu::collator::*;
//!
//! // Primary
//!
//! let mut options = CollatorOptions::new();
//! options.strength = Some(Strength::Primary);
//! options.case_level = Some(CaseLevel::Off);
//! let primary =
//!   Collator::try_new(&Default::default(),
//!                     options).unwrap();
//!
//! assert_eq!(primary.compare("ⓓⓔⓐⓛ", "DEAL"), Ordering::Equal);
//! assert_eq!(primary.compare("dejavu", "dejAvu"), Ordering::Equal);
//! assert_eq!(primary.compare("dejavu", "déjavu"), Ordering::Equal);
//!
//! // Primary with case level on
//!
//! options.strength = Some(Strength::Primary);
//! options.case_level = Some(CaseLevel::On);
//! let primary_and_case =
//!   Collator::try_new(&Default::default(),
//!                     options).unwrap();
//!
//! assert_eq!(primary_and_case.compare("ⓓⓔⓐⓛ", "DEAL"), Ordering::Less);
//! assert_eq!(primary_and_case.compare("dejavu", "dejAvu"), Ordering::Less);
//! assert_eq!(primary_and_case.compare("dejavu", "déjavu"), Ordering::Equal);
//!
//! // Secondary with case level on
//!
//! options.strength = Some(Strength::Secondary);
//! options.case_level = Some(CaseLevel::On);
//! let secondary_and_case =
//!   Collator::try_new(&Default::default(),
//!                     options).unwrap();
//!
//! assert_eq!(secondary_and_case.compare("ⓓⓔⓐⓛ", "DEAL"), Ordering::Less);
//! assert_eq!(secondary_and_case.compare("dejavu", "dejAvu"), Ordering::Less);
//! assert_eq!(secondary_and_case.compare("dejavu", "déjavu"), Ordering::Less);  // secondary difference
//!
//! // Tertiary
//!
//! options.strength = Some(Strength::Tertiary);
//! options.case_level = Some(CaseLevel::Off);
//! let tertiary =
//!   Collator::try_new(&Default::default(),
//!                     options).unwrap();
//!
//! assert_eq!(tertiary.compare("ⓓⓔⓐⓛ", "DEAL"), Ordering::Less);
//! assert_eq!(tertiary.compare("dejavu", "dejAvu"), Ordering::Less);
//! assert_eq!(tertiary.compare("dejavu", "déjavu"), Ordering::Less);
//! ```
//!
//! ## Case First
//!
//! Whether to swap the ordering of uppercase and lowercase.
//!
//! ## Backward second level
//!
//! Compare the second level in backward order. The default is `false` (off), except for Canadian
//! French.
//!
//! ## Numeric
//!
//! When set to `true` (on), any sequence of decimal
//! digits is sorted at a primary level according to the
//! numeric value.
//!
//! ```
//! use core::cmp::Ordering;
//! use icu::collator::*;
//!
//! // Numerical sorting off
//!
//! let mut options_num_off = CollatorOptions::new();
//! options_num_off.numeric = Some(Numeric::Off);
//! let collator_num_off: Collator =
//!     Collator::try_new(&Default::default(), options_num_off).unwrap();
//! assert_eq!(collator_num_off.compare("a10b", "a2b"), Ordering::Less);
//!
//! // Numerical sorting on
//!
//! let mut options_num_on = CollatorOptions::new();
//! options_num_on.numeric = Some(Numeric::On);
//! let collator_num_on: Collator =
//!     Collator::try_new(&Default::default(), options_num_on).unwrap();
//! assert_eq!(collator_num_on.compare("a10b", "a2b"), Ordering::Greater);
//! ```

mod comparison;
#[cfg(doc)]
pub mod docs;

// NOTE: The Pernosco debugger has special knowledge
// of the `CharacterAndClass` struct inside the `elements`
// module. Please do not change the crate-module-qualified
// name of that struct without coordination.
mod elements;

mod error;
mod options;
pub mod provider;

extern crate alloc;

pub use comparison::Collator;
pub use error::CollatorError;
pub use options::AlternateHandling;
pub use options::BackwardSecondLevel;
pub use options::CaseFirst;
pub use options::CaseLevel;
pub use options::CollatorOptions;
pub use options::MaxVariable;
pub use options::Numeric;
pub use options::ResolvedCollatorOptions;
pub use options::Strength;

#[doc(no_inline)]
pub use CollatorError as Error;