encoding_c_mem/lib.rs
1// Copyright Mozilla Foundation. See the COPYRIGHT
2// file at the top-level directory of this distribution.
3//
4// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
5// https://www.apache.org/licenses/LICENSE-2.0> or the MIT license
6// <LICENSE-MIT or https://opensource.org/licenses/MIT>, at your
7// option. This file may not be copied, modified, or distributed
8// except according to those terms.
9
10//! FFI bindings for `encoding_rs::mem`.
11//!
12//! _Note:_ "Latin1" in this module refers to the Unicode range from U+0000 to
13//! U+00FF, inclusive, and does not refer to the windows-1252 range. This
14//! in-memory encoding is sometimes used as a storage optimization of text
15//! when UTF-16 indexing and length semantics are exposed.
16
17use encoding_rs::mem::Latin1Bidi;
18
19/// Checks whether the buffer is all-ASCII.
20///
21/// May read the entire buffer even if it isn't all-ASCII. (I.e. the function
22/// is not guaranteed to fail fast.)
23///
24/// # Undefined behavior
25///
26/// UB ensues if `buffer` and `buffer_len` don't designate a valid memory block
27/// or if `buffer` is `NULL`. (If `buffer_len` is `0`, `buffer` may be bogus but
28/// still has to be non-`NULL`.)
29#[no_mangle]
30pub unsafe extern "C" fn encoding_mem_is_ascii(buffer: *const u8, len: usize) -> bool {
31 encoding_rs::mem::is_ascii(::std::slice::from_raw_parts(buffer, len))
32}
33
34/// Checks whether the buffer is all-Basic Latin (i.e. UTF-16 representing
35/// only ASCII characters).
36///
37/// May read the entire buffer even if it isn't all-ASCII. (I.e. the function
38/// is not guaranteed to fail fast.)
39///
40/// # Undefined behavior
41///
42/// UB ensues if `buffer` and `buffer_len` don't designate a valid memory block
43/// or if `buffer` is `NULL`. (If `buffer_len` is `0`, `buffer` may be bogus but
44/// still has to be non-`NULL` and aligned.)
45#[no_mangle]
46pub unsafe extern "C" fn encoding_mem_is_basic_latin(buffer: *const u16, len: usize) -> bool {
47 encoding_rs::mem::is_basic_latin(::std::slice::from_raw_parts(buffer, len))
48}
49
50/// Checks whether the buffer is valid UTF-8 representing only code points
51/// less than or equal to U+00FF.
52///
53/// Fails fast. (I.e. returns before having read the whole buffer if UTF-8
54/// invalidity or code points above U+00FF are discovered.
55///
56/// # Undefined behavior
57///
58/// UB ensues if `buffer` and `buffer_len` don't designate a valid memory block
59/// or if `buffer` is `NULL`. (If `buffer_len` is `0`, `buffer` may be bogus but
60/// still has to be non-`NULL`.)
61#[no_mangle]
62pub unsafe extern "C" fn encoding_mem_is_utf8_latin1(buffer: *const u8, len: usize) -> bool {
63 encoding_rs::mem::is_utf8_latin1(::std::slice::from_raw_parts(buffer, len))
64}
65
66/// Checks whether the buffer represents only code points less than or equal
67/// to U+00FF.
68///
69/// Fails fast. (I.e. returns before having read the whole buffer if code
70/// points above U+00FF are discovered.
71///
72/// # Undefined behavior
73///
74/// UB ensues if `buffer` and `buffer_len` don't designate a valid memory block,
75/// if `buffer` is `NULL`, or if the memory designated by `buffer` and `buffer_len`
76/// does not contain valid UTF-8. (If `buffer_len` is `0`, `buffer` may be bogus but
77/// still has to be non-`NULL`.)
78#[no_mangle]
79pub unsafe extern "C" fn encoding_mem_is_str_latin1(buffer: *const u8, len: usize) -> bool {
80 encoding_rs::mem::is_str_latin1(::std::str::from_utf8_unchecked(
81 ::std::slice::from_raw_parts(buffer, len),
82 ))
83}
84
85/// Checks whether the buffer represents only code point less than or equal
86/// to U+00FF.
87///
88/// May read the entire buffer even if it isn't all-Latin1. (I.e. the function
89/// is not guaranteed to fail fast.)
90///
91/// # Undefined behavior
92///
93/// UB ensues if `buffer` and `buffer_len` don't designate a valid memory block
94/// or if `buffer` is `NULL`. (If `buffer_len` is `0`, `buffer` may be bogus but
95/// still has to be non-`NULL` and aligned.)
96#[no_mangle]
97pub unsafe extern "C" fn encoding_mem_is_utf16_latin1(buffer: *const u16, len: usize) -> bool {
98 encoding_rs::mem::is_utf16_latin1(::std::slice::from_raw_parts(buffer, len))
99}
100
101/// Checks whether a potentially-invalid UTF-8 buffer contains code points
102/// that trigger right-to-left processing.
103///
104/// The check is done on a Unicode block basis without regard to assigned
105/// vs. unassigned code points in the block. Hebrew presentation forms in
106/// the Alphabetic Presentation Forms block are treated as if they formed
107/// a block on their own (i.e. it treated as right-to-left). Additionally,
108/// the four RIGHT-TO-LEFT FOO controls in General Punctuation are checked
109/// for. Control characters that are technically bidi controls but do not
110/// cause right-to-left behavior without the presence of right-to-left
111/// characters or right-to-left controls are not checked for. As a special
112/// case, U+FEFF is excluded from Arabic Presentation Forms-B.
113///
114/// Returns `true` if the input is invalid UTF-8 or the input contains an
115/// RTL character. Returns `false` if the input is valid UTF-8 and contains
116/// no RTL characters.
117///
118/// # Undefined behavior
119///
120/// UB ensues if `buffer` and `buffer_len` don't designate a valid memory block
121/// or if `buffer` is `NULL`. (If `buffer_len` is `0`, `buffer` may be bogus but
122/// still has to be non-`NULL`.)
123#[no_mangle]
124pub unsafe extern "C" fn encoding_mem_is_utf8_bidi(buffer: *const u8, len: usize) -> bool {
125 encoding_rs::mem::is_utf8_bidi(::std::slice::from_raw_parts(buffer, len))
126}
127
128/// Checks whether a valid UTF-8 buffer contains code points that trigger
129/// right-to-left processing.
130///
131/// The check is done on a Unicode block basis without regard to assigned
132/// vs. unassigned code points in the block. Hebrew presentation forms in
133/// the Alphabetic Presentation Forms block are treated as if they formed
134/// a block on their own (i.e. it treated as right-to-left). Additionally,
135/// the four RIGHT-TO-LEFT FOO controls in General Punctuation are checked
136/// for. Control characters that are technically bidi controls but do not
137/// cause right-to-left behavior without the presence of right-to-left
138/// characters or right-to-left controls are not checked for. As a special
139/// case, U+FEFF is excluded from Arabic Presentation Forms-B.
140///
141/// # Undefined behavior
142///
143/// UB ensues if `buffer` and `buffer_len` don't designate a valid memory block,
144/// if `buffer` is `NULL`, or if the memory designated by `buffer` and `buffer_len`
145/// does not contain valid UTF-8. (If `buffer_len` is `0`, `buffer` may be bogus but
146/// still has to be non-`NULL`.)
147#[no_mangle]
148pub unsafe extern "C" fn encoding_mem_is_str_bidi(buffer: *const u8, len: usize) -> bool {
149 encoding_rs::mem::is_str_bidi(::std::str::from_utf8_unchecked(
150 ::std::slice::from_raw_parts(buffer, len),
151 ))
152}
153
154/// Checks whether a UTF-16 buffer contains code points that trigger
155/// right-to-left processing.
156///
157/// The check is done on a Unicode block basis without regard to assigned
158/// vs. unassigned code points in the block. Hebrew presentation forms in
159/// the Alphabetic Presentation Forms block are treated as if they formed
160/// a block on their own (i.e. it treated as right-to-left). Additionally,
161/// the four RIGHT-TO-LEFT FOO controls in General Punctuation are checked
162/// for. Control characters that are technically bidi controls but do not
163/// cause right-to-left behavior without the presence of right-to-left
164/// characters or right-to-left controls are not checked for. As a special
165/// case, U+FEFF is excluded from Arabic Presentation Forms-B.
166///
167/// Returns `true` if the input contains an RTL character or an unpaired
168/// high surrogate that could be the high half of an RTL character.
169/// Returns `false` if the input contains neither RTL characters nor
170/// unpaired high surrogates that could be higher halves of RTL characters.
171///
172/// # Undefined behavior
173///
174/// UB ensues if `buffer` and `buffer_len` don't designate a valid memory block
175/// or if `buffer` is `NULL`. (If `buffer_len` is `0`, `buffer` may be bogus but
176/// still has to be non-`NULL` and aligned.)
177#[no_mangle]
178pub unsafe extern "C" fn encoding_mem_is_utf16_bidi(buffer: *const u16, len: usize) -> bool {
179 encoding_rs::mem::is_utf16_bidi(::std::slice::from_raw_parts(buffer, len))
180}
181
182/// Checks whether a scalar value triggers right-to-left processing.
183///
184/// The check is done on a Unicode block basis without regard to assigned
185/// vs. unassigned code points in the block. Hebrew presentation forms in
186/// the Alphabetic Presentation Forms block are treated as if they formed
187/// a block on their own (i.e. it treated as right-to-left). Additionally,
188/// the four RIGHT-TO-LEFT FOO controls in General Punctuation are checked
189/// for. Control characters that are technically bidi controls but do not
190/// cause right-to-left behavior without the presence of right-to-left
191/// characters or right-to-left controls are not checked for. As a special
192/// case, U+FEFF is excluded from Arabic Presentation Forms-B.
193///
194/// # Undefined behavior
195///
196/// Undefined behavior ensues if `c` is not a valid Unicode Scalar Value.
197#[no_mangle]
198pub unsafe extern "C" fn encoding_mem_is_char_bidi(c: char) -> bool {
199 encoding_rs::mem::is_char_bidi(c)
200}
201
202/// Checks whether a UTF-16 code unit triggers right-to-left processing.
203///
204/// The check is done on a Unicode block basis without regard to assigned
205/// vs. unassigned code points in the block. Hebrew presentation forms in
206/// the Alphabetic Presentation Forms block are treated as if they formed
207/// a block on their own (i.e. it treated as right-to-left). Additionally,
208/// the four RIGHT-TO-LEFT FOO controls in General Punctuation are checked
209/// for. Control characters that are technically bidi controls but do not
210/// cause right-to-left behavior without the presence of right-to-left
211/// characters or right-to-left controls are not checked for. As a special
212/// case, U+FEFF is excluded from Arabic Presentation Forms-B.
213///
214/// Since supplementary-plane right-to-left blocks are identifiable from the
215/// high surrogate without examining the low surrogate, this function returns
216/// `true` for such high surrogates making the function suitable for handling
217/// supplementary-plane text without decoding surrogate pairs to scalar
218/// values. Obviously, such high surrogates are then reported as right-to-left
219/// even if actually unpaired.
220#[no_mangle]
221pub unsafe extern "C" fn encoding_mem_is_utf16_code_unit_bidi(u: u16) -> bool {
222 encoding_rs::mem::is_utf16_code_unit_bidi(u)
223}
224
225/// Checks whether a potentially invalid UTF-8 buffer contains code points
226/// that trigger right-to-left processing or is all-Latin1.
227///
228/// Possibly more efficient than performing the checks separately.
229///
230/// Returns `Latin1Bidi::Latin1` if `is_utf8_latin1()` would return `true`.
231/// Otherwise, returns `Latin1Bidi::Bidi` if `is_utf8_bidi()` would return
232/// `true`. Otherwise, returns `Latin1Bidi::LeftToRight`.
233///
234/// # Undefined behavior
235///
236/// UB ensues if `buffer` and `buffer_len` don't designate a valid memory block
237/// or if `buffer` is `NULL`. (If `buffer_len` is `0`, `buffer` may be bogus but
238/// still has to be non-`NULL`.)
239#[no_mangle]
240pub unsafe extern "C" fn encoding_mem_check_utf8_for_latin1_and_bidi(
241 buffer: *const u8,
242 len: usize,
243) -> Latin1Bidi {
244 encoding_rs::mem::check_utf8_for_latin1_and_bidi(::std::slice::from_raw_parts(buffer, len))
245}
246
247/// Checks whether a valid UTF-8 buffer contains code points
248/// that trigger right-to-left processing or is all-Latin1.
249///
250/// Possibly more efficient than performing the checks separately.
251///
252/// Returns `Latin1Bidi::Latin1` if `is_str_latin1()` would return `true`.
253/// Otherwise, returns `Latin1Bidi::Bidi` if `is_str_bidi()` would return
254/// `true`. Otherwise, returns `Latin1Bidi::LeftToRight`.
255///
256/// # Undefined behavior
257///
258/// UB ensues if `buffer` and `buffer_len` don't designate a valid memory block,
259/// if `buffer` is `NULL`, or if the memory designated by `buffer` and `buffer_len`
260/// does not contain valid UTF-8. (If `buffer_len` is `0`, `buffer` may be bogus but
261/// still has to be non-`NULL`.)
262#[no_mangle]
263pub unsafe extern "C" fn encoding_mem_check_str_for_latin1_and_bidi(
264 buffer: *const u8,
265 len: usize,
266) -> Latin1Bidi {
267 encoding_rs::mem::check_str_for_latin1_and_bidi(::std::str::from_utf8_unchecked(
268 ::std::slice::from_raw_parts(buffer, len),
269 ))
270}
271
272/// Checks whether a potentially invalid UTF-16 buffer contains code points
273/// that trigger right-to-left processing or is all-Latin1.
274///
275/// Possibly more efficient than performing the checks separately.
276///
277/// Returns `Latin1Bidi::Latin1` if `is_utf16_latin1()` would return `true`.
278/// Otherwise, returns `Latin1Bidi::Bidi` if `is_utf16_bidi()` would return
279/// `true`. Otherwise, returns `Latin1Bidi::LeftToRight`.
280///
281/// # Undefined behavior
282///
283/// UB ensues if `buffer` and `buffer_len` don't designate a valid memory block
284/// or if `buffer` is `NULL`. (If `buffer_len` is `0`, `buffer` may be bogus but
285/// still has to be non-`NULL` and aligned.)
286#[no_mangle]
287pub unsafe extern "C" fn encoding_mem_check_utf16_for_latin1_and_bidi(
288 buffer: *const u16,
289 len: usize,
290) -> Latin1Bidi {
291 encoding_rs::mem::check_utf16_for_latin1_and_bidi(::std::slice::from_raw_parts(buffer, len))
292}
293
294/// Converts potentially-invalid UTF-8 to valid UTF-16 with errors replaced
295/// with the REPLACEMENT CHARACTER.
296///
297/// The length of the destination buffer must be at least the length of the
298/// source buffer _plus one_.
299///
300/// Returns the number of `u16`s written.
301///
302/// # Panics
303///
304/// Panics if the destination buffer is shorter than stated above.
305///
306/// # Undefined behavior
307///
308/// UB ensues if `src` and `src_len` don't designate a valid memory block, if
309/// `src` is `NULL`, if `dst` and `dst_len` don't designate a valid memory
310/// block, if `dst` is `NULL` or if the two memory blocks overlap. (If
311/// `src_len` is `0`, `src` may be bogus but still has to be non-`NULL` and
312/// aligned. Likewise for `dst` and `dst_len`.)
313#[no_mangle]
314pub unsafe extern "C" fn encoding_mem_convert_utf8_to_utf16(
315 src: *const u8,
316 src_len: usize,
317 dst: *mut u16,
318 dst_len: usize,
319) -> usize {
320 encoding_rs::mem::convert_utf8_to_utf16(
321 ::std::slice::from_raw_parts(src, src_len),
322 ::std::slice::from_raw_parts_mut(dst, dst_len),
323 )
324}
325
326/// Converts valid UTF-8 to valid UTF-16.
327///
328/// The length of the destination buffer must be at least the length of the
329/// source buffer.
330///
331/// Returns the number of `u16`s written.
332///
333/// # Panics
334///
335/// Panics if the destination buffer is shorter than stated above.
336///
337/// # Undefined behavior
338///
339/// UB ensues if `src` and `src_len` don't designate a valid memory block, if
340/// `src` is `NULL`, if `dst` and `dst_len` don't designate a valid memory
341/// block, if `dst` is `NULL`, if the two memory blocks overlap, of if the
342/// buffer designated by `src` and `src_len` does not contain valid UTF-8. (If
343/// `src_len` is `0`, `src` may be bogus but still has to be non-`NULL` and
344/// aligned. Likewise for `dst` and `dst_len`.)
345#[no_mangle]
346pub unsafe extern "C" fn encoding_mem_convert_str_to_utf16(
347 src: *const u8,
348 src_len: usize,
349 dst: *mut u16,
350 dst_len: usize,
351) -> usize {
352 encoding_rs::mem::convert_str_to_utf16(
353 ::std::str::from_utf8_unchecked(::std::slice::from_raw_parts(src, src_len)),
354 ::std::slice::from_raw_parts_mut(dst, dst_len),
355 )
356}
357
358/// Converts potentially-invalid UTF-8 to valid UTF-16 signaling on error.
359///
360/// The length of the destination buffer must be at least the length of the
361/// source buffer.
362///
363/// Returns the number of `u16`s written or `SIZE_MAX` if the input was invalid.
364///
365/// When the input was invalid, some output may have been written.
366///
367/// # Panics
368///
369/// Panics if the destination buffer is shorter than stated above.
370///
371/// # Undefined behavior
372///
373/// UB ensues if `src` and `src_len` don't designate a valid memory block, if
374/// `src` is `NULL`, if `dst` and `dst_len` don't designate a valid memory
375/// block, if `dst` is `NULL` or if the two memory blocks overlap. (If
376/// `src_len` is `0`, `src` may be bogus but still has to be non-`NULL` and
377/// aligned. Likewise for `dst` and `dst_len`.)
378#[no_mangle]
379pub unsafe extern "C" fn encoding_mem_convert_utf8_to_utf16_without_replacement(
380 src: *const u8,
381 src_len: usize,
382 dst: *mut u16,
383 dst_len: usize,
384) -> usize {
385 encoding_rs::mem::convert_utf8_to_utf16_without_replacement(
386 ::std::slice::from_raw_parts(src, src_len),
387 ::std::slice::from_raw_parts_mut(dst, dst_len),
388 ).unwrap_or(::std::usize::MAX)
389}
390
391/// Converts potentially-invalid UTF-16 to valid UTF-8 with errors replaced
392/// with the REPLACEMENT CHARACTER with potentially insufficient output
393/// space.
394///
395/// Writes the number of code units read into `*src_len` and the number of
396/// bytes written into `*dst_len`.
397///
398/// Guarantees that the bytes in the destination beyond the number of
399/// bytes claimed as written by the second item of the return tuple
400/// are left unmodified.
401///
402/// Not all code units are read if there isn't enough output space.
403///
404/// Note that this method isn't designed for general streamability but for
405/// not allocating memory for the worst case up front. Specifically,
406/// if the input starts with or ends with an unpaired surrogate, those are
407/// replaced with the REPLACEMENT CHARACTER.
408///
409/// Matches the semantics of `TextEncoder.encodeInto()` from the
410/// Encoding Standard.
411///
412/// # Safety
413///
414/// If you want to convert into a `&mut str`, use
415/// `convert_utf16_to_str_partial()` instead of using this function
416/// together with the `unsafe` method `as_bytes_mut()` on `&mut str`.
417///
418/// # Undefined behavior
419///
420/// UB ensues if `src` and `src_len` don't designate a valid memory block, if
421/// `src` is `NULL`, if `dst` and `dst_len` don't designate a valid memory
422/// block, if `dst` is `NULL` or if the two memory blocks overlap. (If
423/// `src_len` is `0`, `src` may be bogus but still has to be non-`NULL` and
424/// aligned. Likewise for `dst` and `dst_len`.)
425#[no_mangle]
426pub unsafe extern "C" fn encoding_mem_convert_utf16_to_utf8_partial(
427 src: *const u16,
428 src_len: *mut usize,
429 dst: *mut u8,
430 dst_len: *mut usize,
431) {
432 let (read, written) = encoding_rs::mem::convert_utf16_to_utf8_partial(
433 ::std::slice::from_raw_parts(src, *src_len),
434 ::std::slice::from_raw_parts_mut(dst, *dst_len),
435 );
436 *src_len = read;
437 *dst_len = written;
438}
439
440/// Converts potentially-invalid UTF-16 to valid UTF-8 with errors replaced
441/// with the REPLACEMENT CHARACTER.
442///
443/// The length of the destination buffer must be at least the length of the
444/// source buffer times three.
445///
446/// Returns the number of bytes written.
447///
448/// # Panics
449///
450/// Panics if the destination buffer is shorter than stated above.
451///
452/// # Safety
453///
454/// If you want to convert into a `&mut str`, use `convert_utf16_to_str()`
455/// instead of using this function together with the `unsafe` method
456/// `as_bytes_mut()` on `&mut str`.
457///
458/// # Undefined behavior
459///
460/// UB ensues if `src` and `src_len` don't designate a valid memory block, if
461/// `src` is `NULL`, if `dst` and `dst_len` don't designate a valid memory
462/// block, if `dst` is `NULL` or if the two memory blocks overlap. (If
463/// `src_len` is `0`, `src` may be bogus but still has to be non-`NULL` and
464/// aligned. Likewise for `dst` and `dst_len`.)
465#[no_mangle]
466pub unsafe extern "C" fn encoding_mem_convert_utf16_to_utf8(
467 src: *const u16,
468 src_len: usize,
469 dst: *mut u8,
470 dst_len: usize,
471) -> usize {
472 encoding_rs::mem::convert_utf16_to_utf8(
473 ::std::slice::from_raw_parts(src, src_len),
474 ::std::slice::from_raw_parts_mut(dst, dst_len),
475 )
476}
477
478/// Converts bytes whose unsigned value is interpreted as Unicode code point
479/// (i.e. U+0000 to U+00FF, inclusive) to UTF-16.
480///
481/// The length of the destination buffer must be at least the length of the
482/// source buffer.
483///
484/// The number of `u16`s written equals the length of the source buffer.
485///
486/// # Panics
487///
488/// Panics if the destination buffer is shorter than stated above.
489///
490/// # Undefined behavior
491///
492/// UB ensues if `src` and `src_len` don't designate a valid memory block, if
493/// `src` is `NULL`, if `dst` and `dst_len` don't designate a valid memory
494/// block, if `dst` is `NULL` or if the two memory blocks overlap. (If
495/// `src_len` is `0`, `src` may be bogus but still has to be non-`NULL` and
496/// aligned. Likewise for `dst` and `dst_len`.)
497#[no_mangle]
498pub unsafe extern "C" fn encoding_mem_convert_latin1_to_utf16(
499 src: *const u8,
500 src_len: usize,
501 dst: *mut u16,
502 dst_len: usize,
503) {
504 encoding_rs::mem::convert_latin1_to_utf16(
505 ::std::slice::from_raw_parts(src, src_len),
506 ::std::slice::from_raw_parts_mut(dst, dst_len),
507 );
508}
509
510/// Converts bytes whose unsigned value is interpreted as Unicode code point
511/// (i.e. U+0000 to U+00FF, inclusive) to UTF-8 with potentially insufficient
512/// output space.
513///
514/// Writes the number of code units read into `*src_len` and the number of
515/// bytes written into `*dst_len`.
516///
517/// If the output isn't large enough, not all input is consumed.
518///
519/// # Safety
520///
521/// If you want to convert into a `&mut str`, use
522/// `encoding_mem_convert_latin1_to_str_partial()` instead of using this function
523/// together with the `unsafe` method `as_bytes_mut()` on `&mut str`.
524///
525/// # Undefined behavior
526///
527/// UB ensues if `src` and `src_len` don't designate a valid memory block, if
528/// `src` is `NULL`, if `dst` and `dst_len` don't designate a valid memory
529/// block, if `dst` is `NULL` or if the two memory blocks overlap. (If
530/// `src_len` is `0`, `src` may be bogus but still has to be non-`NULL` and
531/// aligned. Likewise for `dst` and `dst_len`.)
532#[no_mangle]
533pub unsafe extern "C" fn encoding_mem_convert_latin1_to_utf8_partial(
534 src: *const u8,
535 src_len: *mut usize,
536 dst: *mut u8,
537 dst_len: *mut usize,
538) {
539 let (read, written) = encoding_rs::mem::convert_latin1_to_utf8_partial(
540 ::std::slice::from_raw_parts(src, *src_len),
541 ::std::slice::from_raw_parts_mut(dst, *dst_len),
542 );
543 *src_len = read;
544 *dst_len = written;
545}
546
547/// Converts bytes whose unsigned value is interpreted as Unicode code point
548/// (i.e. U+0000 to U+00FF, inclusive) to UTF-8.
549///
550/// The length of the destination buffer must be at least the length of the
551/// source buffer times two.
552///
553/// Returns the number of bytes written.
554///
555/// # Panics
556///
557/// Panics if the destination buffer is shorter than stated above.
558///
559/// # Safety
560///
561/// Note that this function may write garbage beyond the number of bytes
562/// indicated by the return value, so using a `&mut str` interpreted as
563/// `&mut [u8]` as the destination is not safe. If you want to convert into
564/// a `&mut str`, use `convert_utf16_to_str()` instead of this function.
565///
566/// # Undefined behavior
567///
568/// UB ensues if `src` and `src_len` don't designate a valid memory block, if
569/// `src` is `NULL`, if `dst` and `dst_len` don't designate a valid memory
570/// block, if `dst` is `NULL` or if the two memory blocks overlap. (If
571/// `src_len` is `0`, `src` may be bogus but still has to be non-`NULL` and
572/// aligned. Likewise for `dst` and `dst_len`.)
573#[no_mangle]
574pub unsafe extern "C" fn encoding_mem_convert_latin1_to_utf8(
575 src: *const u8,
576 src_len: usize,
577 dst: *mut u8,
578 dst_len: usize,
579) -> usize {
580 encoding_rs::mem::convert_latin1_to_utf8(
581 ::std::slice::from_raw_parts(src, src_len),
582 ::std::slice::from_raw_parts_mut(dst, dst_len),
583 )
584}
585
586/// If the input is valid UTF-8 representing only Unicode code points from
587/// U+0000 to U+00FF, inclusive, converts the input into output that
588/// represents the value of each code point as the unsigned byte value of
589/// each output byte.
590///
591/// If the input does not fulfill the condition stated above, this function
592/// panics if debug assertions are enabled (and fuzzing isn't) and otherwise
593/// does something that is memory-safe without any promises about any
594/// properties of the output. In particular, callers shouldn't assume the
595/// output to be the same across crate versions or CPU architectures and
596/// should not assume that non-ASCII input can't map to ASCII output.
597///
598/// The length of the destination buffer must be at least the length of the
599/// source buffer.
600///
601/// Returns the number of bytes written.
602///
603/// # Panics
604///
605/// Panics if the destination buffer is shorter than stated above.
606///
607/// If debug assertions are enabled (and not fuzzing) and the input is
608/// not in the range U+0000 to U+00FF, inclusive.
609///
610/// # Undefined behavior
611///
612/// UB ensues if `src` and `src_len` don't designate a valid memory block, if
613/// `src` is `NULL`, if `dst` and `dst_len` don't designate a valid memory
614/// block, if `dst` is `NULL` or if the two memory blocks overlap. (If
615/// `src_len` is `0`, `src` may be bogus but still has to be non-`NULL` and
616/// aligned. Likewise for `dst` and `dst_len`.)
617#[no_mangle]
618pub unsafe extern "C" fn encoding_mem_convert_utf8_to_latin1_lossy(
619 src: *const u8,
620 src_len: usize,
621 dst: *mut u8,
622 dst_len: usize,
623) -> usize {
624 encoding_rs::mem::convert_utf8_to_latin1_lossy(
625 ::std::slice::from_raw_parts(src, src_len),
626 ::std::slice::from_raw_parts_mut(dst, dst_len),
627 )
628}
629
630/// If the input is valid UTF-16 representing only Unicode code points from
631/// U+0000 to U+00FF, inclusive, converts the input into output that
632/// represents the value of each code point as the unsigned byte value of
633/// each output byte.
634///
635/// If the input does not fulfill the condition stated above, does something
636/// that is memory-safe without any promises about any properties of the
637/// output and will probably assert in debug builds in future versions.
638/// In particular, callers shouldn't assume the output to be the same across
639/// crate versions or CPU architectures and should not assume that non-ASCII
640/// input can't map to ASCII output.
641///
642/// The length of the destination buffer must be at least the length of the
643/// source buffer.
644///
645/// The number of bytes written equals the length of the source buffer.
646///
647/// # Panics
648///
649/// Panics if the destination buffer is shorter than stated above.
650///
651/// (Probably in future versions if debug assertions are enabled (and not
652/// fuzzing) and the input is not in the range U+0000 to U+00FF, inclusive.)
653///
654/// # Undefined behavior
655///
656/// UB ensues if `src` and `src_len` don't designate a valid memory block, if
657/// `src` is `NULL`, if `dst` and `dst_len` don't designate a valid memory
658/// block, if `dst` is `NULL` or if the two memory blocks overlap. (If
659/// `src_len` is `0`, `src` may be bogus but still has to be non-`NULL` and
660/// aligned. Likewise for `dst` and `dst_len`.)
661#[no_mangle]
662pub unsafe extern "C" fn encoding_mem_convert_utf16_to_latin1_lossy(
663 src: *const u16,
664 src_len: usize,
665 dst: *mut u8,
666 dst_len: usize,
667) {
668 encoding_rs::mem::convert_utf16_to_latin1_lossy(
669 ::std::slice::from_raw_parts(src, src_len),
670 ::std::slice::from_raw_parts_mut(dst, dst_len),
671 );
672}
673
674/// Returns the index of the first unpaired surrogate or, if the input is
675/// valid UTF-16 in its entirety, the length of the input.
676///
677/// # Undefined behavior
678///
679/// UB ensues if `buffer` and `buffer_len` don't designate a valid memory block
680/// or if `buffer` is `NULL`. (If `buffer_len` is `0`, `buffer` may be bogus but
681/// still has to be non-`NULL` and aligned.)
682#[no_mangle]
683pub unsafe extern "C" fn encoding_mem_utf16_valid_up_to(buffer: *const u16, len: usize) -> usize {
684 encoding_rs::mem::utf16_valid_up_to(::std::slice::from_raw_parts(buffer, len))
685}
686
687/// Returns the index of first byte that starts an invalid byte
688/// sequence or a non-Latin1 byte sequence, or the length of the
689/// string if there are neither.
690///
691/// # Undefined behavior
692///
693/// UB ensues if `buffer` and `buffer_len` don't designate a valid memory block
694/// or if `buffer` is `NULL`. (If `buffer_len` is `0`, `buffer` may be bogus but
695/// still has to be non-`NULL` and aligned.)
696#[no_mangle]
697pub unsafe extern "C" fn encoding_mem_utf8_latin1_up_to(buffer: *const u8, len: usize) -> usize {
698 encoding_rs::mem::utf8_latin1_up_to(::std::slice::from_raw_parts(buffer, len))
699}
700
701/// Returns the index of first byte that starts a non-Latin1 byte
702/// sequence, or the length of the string if there are none.
703///
704/// # Undefined behavior
705///
706/// UB ensues if `buffer` and `buffer_len` don't designate a valid memory block,
707/// if `buffer` is `NULL`, or if the memory block does not contain valid UTF-8.
708/// (If `buffer_len` is `0`, `buffer` may be bogus but still has to be non-`NULL`
709/// and aligned.)
710#[no_mangle]
711pub unsafe extern "C" fn encoding_mem_str_latin1_up_to(buffer: *const u8, len: usize) -> usize {
712 encoding_rs::mem::str_latin1_up_to(::std::str::from_utf8_unchecked(
713 ::std::slice::from_raw_parts(buffer, len),
714 ))
715}
716
717/// Replaces unpaired surrogates in the input with the REPLACEMENT CHARACTER.
718///
719/// # Undefined behavior
720///
721/// UB ensues if `buffer` and `buffer_len` don't designate a valid memory block
722/// or if `buffer` is `NULL`. (If `buffer_len` is `0`, `buffer` may be bogus but
723/// still has to be non-`NULL` and aligned.)
724#[no_mangle]
725pub unsafe extern "C" fn encoding_mem_ensure_utf16_validity(buffer: *mut u16, len: usize) {
726 encoding_rs::mem::ensure_utf16_validity(::std::slice::from_raw_parts_mut(buffer, len));
727}
728
729/// Copies ASCII from source to destination up to the first non-ASCII byte
730/// (or the end of the input if it is ASCII in its entirety).
731///
732/// The length of the destination buffer must be at least the length of the
733/// source buffer.
734///
735/// Returns the number of bytes written.
736///
737/// # Panics
738///
739/// Panics if the destination buffer is shorter than stated above.
740///
741/// # Undefined behavior
742///
743/// UB ensues if `src` and `src_len` don't designate a valid memory block, if
744/// `src` is `NULL`, if `dst` and `dst_len` don't designate a valid memory
745/// block, if `dst` is `NULL` or if the two memory blocks overlap. (If
746/// `src_len` is `0`, `src` may be bogus but still has to be non-`NULL` and
747/// aligned. Likewise for `dst` and `dst_len`.)
748#[no_mangle]
749pub unsafe extern "C" fn encoding_mem_copy_ascii_to_ascii(
750 src: *const u8,
751 src_len: usize,
752 dst: *mut u8,
753 dst_len: usize,
754) -> usize {
755 encoding_rs::mem::copy_ascii_to_ascii(
756 ::std::slice::from_raw_parts(src, src_len),
757 ::std::slice::from_raw_parts_mut(dst, dst_len),
758 )
759}
760
761/// Copies ASCII from source to destination zero-extending it to UTF-16 up to
762/// the first non-ASCII byte (or the end of the input if it is ASCII in its
763/// entirety).
764///
765/// The length of the destination buffer must be at least the length of the
766/// source buffer.
767///
768/// Returns the number of `u16`s written.
769///
770/// # Panics
771///
772/// Panics if the destination buffer is shorter than stated above.
773///
774/// # Undefined behavior
775///
776/// UB ensues if `src` and `src_len` don't designate a valid memory block, if
777/// `src` is `NULL`, if `dst` and `dst_len` don't designate a valid memory
778/// block, if `dst` is `NULL` or if the two memory blocks overlap. (If
779/// `src_len` is `0`, `src` may be bogus but still has to be non-`NULL` and
780/// aligned. Likewise for `dst` and `dst_len`.)
781#[no_mangle]
782pub unsafe extern "C" fn encoding_mem_copy_ascii_to_basic_latin(
783 src: *const u8,
784 src_len: usize,
785 dst: *mut u16,
786 dst_len: usize,
787) -> usize {
788 encoding_rs::mem::copy_ascii_to_basic_latin(
789 ::std::slice::from_raw_parts(src, src_len),
790 ::std::slice::from_raw_parts_mut(dst, dst_len),
791 )
792}
793
794/// Copies Basic Latin from source to destination narrowing it to ASCII up to
795/// the first non-Basic Latin code unit (or the end of the input if it is
796/// Basic Latin in its entirety).
797///
798/// The length of the destination buffer must be at least the length of the
799/// source buffer.
800///
801/// Returns the number of bytes written.
802///
803/// # Panics
804///
805/// Panics if the destination buffer is shorter than stated above.
806///
807/// # Undefined behavior
808///
809/// UB ensues if `src` and `src_len` don't designate a valid memory block, if
810/// `src` is `NULL`, if `dst` and `dst_len` don't designate a valid memory
811/// block, if `dst` is `NULL` or if the two memory blocks overlap. (If
812/// `src_len` is `0`, `src` may be bogus but still has to be non-`NULL` and
813/// aligned. Likewise for `dst` and `dst_len`.)
814#[no_mangle]
815pub unsafe extern "C" fn encoding_mem_copy_basic_latin_to_ascii(
816 src: *const u16,
817 src_len: usize,
818 dst: *mut u8,
819 dst_len: usize,
820) -> usize {
821 encoding_rs::mem::copy_basic_latin_to_ascii(
822 ::std::slice::from_raw_parts(src, src_len),
823 ::std::slice::from_raw_parts_mut(dst, dst_len),
824 )
825}