glib/
convert.rs

1// Take a look at the license at the top of the repository in the LICENSE file.
2
3use std::{fmt, io, os::raw::c_char, path::PathBuf, ptr};
4
5use crate::{ffi, translate::*, ConvertError, Error, GString, NormalizeMode, Slice};
6
7// rustdoc-stripper-ignore-next
8/// A wrapper for [`ConvertError`](crate::ConvertError) that can hold an offset into the input
9/// string.
10#[derive(Debug)]
11pub enum CvtError {
12    Convert(Error),
13    IllegalSequence { source: Error, offset: usize },
14}
15
16impl std::error::Error for CvtError {
17    fn source(&self) -> ::core::option::Option<&(dyn std::error::Error + 'static)> {
18        match self {
19            CvtError::Convert(err) => std::error::Error::source(err),
20            CvtError::IllegalSequence { source, .. } => Some(source),
21        }
22    }
23}
24
25impl fmt::Display for CvtError {
26    fn fmt(&self, fmt: &mut fmt::Formatter) -> ::core::fmt::Result {
27        match self {
28            CvtError::Convert(err) => fmt::Display::fmt(err, fmt),
29            CvtError::IllegalSequence { source, offset } => {
30                write!(fmt, "{source} at offset {offset}")
31            }
32        }
33    }
34}
35
36impl std::convert::From<Error> for CvtError {
37    fn from(err: Error) -> Self {
38        CvtError::Convert(err)
39    }
40}
41
42impl CvtError {
43    #[inline]
44    fn new(err: Error, bytes_read: usize) -> Self {
45        if err.kind::<ConvertError>() == Some(ConvertError::IllegalSequence) {
46            Self::IllegalSequence {
47                source: err,
48                offset: bytes_read,
49            }
50        } else {
51            err.into()
52        }
53    }
54}
55
56#[doc(alias = "g_convert")]
57pub fn convert(
58    str_: &[u8],
59    to_codeset: impl IntoGStr,
60    from_codeset: impl IntoGStr,
61) -> Result<(Slice<u8>, usize), CvtError> {
62    assert!(str_.len() <= isize::MAX as usize);
63    let mut bytes_read = 0;
64    let mut bytes_written = 0;
65    let mut error = ptr::null_mut();
66    let result = to_codeset.run_with_gstr(|to_codeset| {
67        from_codeset.run_with_gstr(|from_codeset| unsafe {
68            ffi::g_convert(
69                str_.as_ptr(),
70                str_.len() as isize,
71                to_codeset.to_glib_none().0,
72                from_codeset.to_glib_none().0,
73                &mut bytes_read,
74                &mut bytes_written,
75                &mut error,
76            )
77        })
78    });
79    if result.is_null() {
80        Err(CvtError::new(unsafe { from_glib_full(error) }, bytes_read))
81    } else {
82        let slice = unsafe { Slice::from_glib_full_num(result, bytes_written as _) };
83        Ok((slice, bytes_read))
84    }
85}
86
87#[doc(alias = "g_convert_with_fallback")]
88pub fn convert_with_fallback(
89    str_: &[u8],
90    to_codeset: impl IntoGStr,
91    from_codeset: impl IntoGStr,
92    fallback: Option<impl IntoGStr>,
93) -> Result<(Slice<u8>, usize), CvtError> {
94    assert!(str_.len() <= isize::MAX as usize);
95    let mut bytes_read = 0;
96    let mut bytes_written = 0;
97    let mut error = ptr::null_mut();
98    let result = to_codeset.run_with_gstr(|to_codeset| {
99        from_codeset.run_with_gstr(|from_codeset| {
100            fallback.run_with_gstr(|fallback| unsafe {
101                ffi::g_convert_with_fallback(
102                    str_.as_ptr(),
103                    str_.len() as isize,
104                    to_codeset.to_glib_none().0,
105                    from_codeset.to_glib_none().0,
106                    fallback.to_glib_none().0,
107                    &mut bytes_read,
108                    &mut bytes_written,
109                    &mut error,
110                )
111            })
112        })
113    });
114    if result.is_null() {
115        Err(CvtError::new(unsafe { from_glib_full(error) }, bytes_read))
116    } else {
117        let slice = unsafe { Slice::from_glib_full_num(result, bytes_written as _) };
118        Ok((slice, bytes_read))
119    }
120}
121
122// rustdoc-stripper-ignore-next
123/// A wrapper for [`std::io::Error`] that can hold an offset into an input string.
124#[derive(Debug)]
125pub enum IConvError {
126    Error(io::Error),
127    WithOffset { source: io::Error, offset: usize },
128}
129
130impl std::error::Error for IConvError {
131    fn source(&self) -> Option<&(dyn std::error::Error + 'static)> {
132        match self {
133            IConvError::Error(err) => std::error::Error::source(err),
134            IConvError::WithOffset { source, .. } => Some(source),
135        }
136    }
137}
138
139impl fmt::Display for IConvError {
140    fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
141        match self {
142            IConvError::Error(err) => fmt::Display::fmt(err, fmt),
143            IConvError::WithOffset { source, offset } => write!(fmt, "{source} at offset {offset}"),
144        }
145    }
146}
147
148impl std::convert::From<io::Error> for IConvError {
149    fn from(err: io::Error) -> Self {
150        IConvError::Error(err)
151    }
152}
153
154#[derive(Debug)]
155#[repr(transparent)]
156#[doc(alias = "GIConv")]
157pub struct IConv(ffi::GIConv);
158
159unsafe impl Send for IConv {}
160
161impl IConv {
162    #[doc(alias = "g_iconv_open")]
163    #[allow(clippy::unnecessary_lazy_evaluations)]
164    pub fn new(to_codeset: impl IntoGStr, from_codeset: impl IntoGStr) -> Option<Self> {
165        let iconv = to_codeset.run_with_gstr(|to_codeset| {
166            from_codeset.run_with_gstr(|from_codeset| unsafe {
167                ffi::g_iconv_open(to_codeset.to_glib_none().0, from_codeset.to_glib_none().0)
168            })
169        });
170        (iconv as isize != -1).then(|| Self(iconv))
171    }
172    #[doc(alias = "g_convert_with_iconv")]
173    pub fn convert(&mut self, str_: &[u8]) -> Result<(Slice<u8>, usize), CvtError> {
174        assert!(str_.len() <= isize::MAX as usize);
175        let mut bytes_read = 0;
176        let mut bytes_written = 0;
177        let mut error = ptr::null_mut();
178        let result = unsafe {
179            ffi::g_convert_with_iconv(
180                str_.as_ptr(),
181                str_.len() as isize,
182                self.0,
183                &mut bytes_read,
184                &mut bytes_written,
185                &mut error,
186            )
187        };
188        if result.is_null() {
189            Err(CvtError::new(unsafe { from_glib_full(error) }, bytes_read))
190        } else {
191            let slice = unsafe { Slice::from_glib_full_num(result, bytes_written as _) };
192            Ok((slice, bytes_read))
193        }
194    }
195    #[doc(alias = "g_iconv")]
196    pub fn iconv(
197        &mut self,
198        inbuf: Option<&[u8]>,
199        outbuf: Option<&mut [std::mem::MaybeUninit<u8>]>,
200    ) -> Result<(usize, usize, usize), IConvError> {
201        let input_len = inbuf.as_ref().map(|b| b.len()).unwrap_or_default();
202        let mut inbytes_left = input_len;
203        let mut outbytes_left = outbuf.as_ref().map(|b| b.len()).unwrap_or_default();
204        let mut inbuf = inbuf
205            .map(|b| mut_override(b.as_ptr()) as *mut c_char)
206            .unwrap_or_else(ptr::null_mut);
207        let mut outbuf = outbuf
208            .map(|b| b.as_mut_ptr() as *mut c_char)
209            .unwrap_or_else(ptr::null_mut);
210        let conversions = unsafe {
211            ffi::g_iconv(
212                self.0,
213                &mut inbuf,
214                &mut inbytes_left,
215                &mut outbuf,
216                &mut outbytes_left,
217            )
218        };
219        if conversions as isize == -1 {
220            let err = io::Error::last_os_error();
221            let code = err.raw_os_error().unwrap();
222            if code == libc::EILSEQ || code == libc::EINVAL {
223                Err(IConvError::WithOffset {
224                    source: err,
225                    offset: input_len - inbytes_left,
226                })
227            } else {
228                Err(err.into())
229            }
230        } else {
231            Ok((conversions, inbytes_left, outbytes_left))
232        }
233    }
234}
235
236impl Drop for IConv {
237    #[inline]
238    fn drop(&mut self) {
239        unsafe {
240            ffi::g_iconv_close(self.0);
241        }
242    }
243}
244
245#[doc(alias = "g_get_filename_charsets")]
246#[doc(alias = "get_filename_charsets")]
247pub fn filename_charsets() -> (bool, Vec<GString>) {
248    let mut filename_charsets = ptr::null_mut();
249    unsafe {
250        let is_utf8 = ffi::g_get_filename_charsets(&mut filename_charsets);
251        (
252            from_glib(is_utf8),
253            FromGlibPtrContainer::from_glib_none(filename_charsets),
254        )
255    }
256}
257
258#[doc(alias = "g_filename_from_utf8")]
259pub fn filename_from_utf8(utf8string: impl IntoGStr) -> Result<(PathBuf, usize), CvtError> {
260    let mut bytes_read = 0;
261    let mut bytes_written = std::mem::MaybeUninit::uninit();
262    let mut error = ptr::null_mut();
263    let ret = utf8string.run_with_gstr(|utf8string| {
264        assert!(utf8string.len() <= isize::MAX as usize);
265        let len = utf8string.len() as isize;
266        unsafe {
267            ffi::g_filename_from_utf8(
268                utf8string.to_glib_none().0,
269                len,
270                &mut bytes_read,
271                bytes_written.as_mut_ptr(),
272                &mut error,
273            )
274        }
275    });
276    if error.is_null() {
277        Ok(unsafe {
278            (
279                PathBuf::from_glib_full_num(ret, bytes_written.assume_init()),
280                bytes_read,
281            )
282        })
283    } else {
284        Err(unsafe { CvtError::new(from_glib_full(error), bytes_read) })
285    }
286}
287
288#[doc(alias = "g_filename_to_utf8")]
289pub fn filename_to_utf8(
290    opsysstring: impl AsRef<std::path::Path>,
291) -> Result<(crate::GString, usize), CvtError> {
292    let path = opsysstring.as_ref().to_glib_none();
293    let mut bytes_read = 0;
294    let mut bytes_written = std::mem::MaybeUninit::uninit();
295    let mut error = ptr::null_mut();
296    let ret = unsafe {
297        ffi::g_filename_to_utf8(
298            path.0,
299            path.1.as_bytes().len() as isize,
300            &mut bytes_read,
301            bytes_written.as_mut_ptr(),
302            &mut error,
303        )
304    };
305    if error.is_null() {
306        Ok(unsafe {
307            (
308                GString::from_glib_full_num(ret, bytes_written.assume_init()),
309                bytes_read,
310            )
311        })
312    } else {
313        Err(unsafe { CvtError::new(from_glib_full(error), bytes_read) })
314    }
315}
316
317#[doc(alias = "g_locale_from_utf8")]
318pub fn locale_from_utf8(utf8string: impl IntoGStr) -> Result<(Slice<u8>, usize), CvtError> {
319    let mut bytes_read = 0;
320    let mut bytes_written = std::mem::MaybeUninit::uninit();
321    let mut error = ptr::null_mut();
322    let ret = utf8string.run_with_gstr(|utf8string| {
323        assert!(utf8string.len() <= isize::MAX as usize);
324        unsafe {
325            ffi::g_locale_from_utf8(
326                utf8string.as_ptr(),
327                utf8string.len() as isize,
328                &mut bytes_read,
329                bytes_written.as_mut_ptr(),
330                &mut error,
331            )
332        }
333    });
334    if error.is_null() {
335        Ok(unsafe {
336            (
337                Slice::from_glib_full_num(ret, bytes_written.assume_init() + 1),
338                bytes_read,
339            )
340        })
341    } else {
342        Err(unsafe { CvtError::new(from_glib_full(error), bytes_read) })
343    }
344}
345
346#[doc(alias = "g_locale_to_utf8")]
347pub fn locale_to_utf8(opsysstring: &[u8]) -> Result<(crate::GString, usize), CvtError> {
348    let len = opsysstring.len() as isize;
349    let mut bytes_read = 0;
350    let mut bytes_written = std::mem::MaybeUninit::uninit();
351    let mut error = ptr::null_mut();
352    let ret = unsafe {
353        ffi::g_locale_to_utf8(
354            opsysstring.to_glib_none().0,
355            len,
356            &mut bytes_read,
357            bytes_written.as_mut_ptr(),
358            &mut error,
359        )
360    };
361    if error.is_null() {
362        Ok(unsafe {
363            (
364                GString::from_glib_full_num(ret, bytes_written.assume_init()),
365                bytes_read,
366            )
367        })
368    } else {
369        Err(unsafe { CvtError::new(from_glib_full(error), bytes_read) })
370    }
371}
372
373#[doc(alias = "g_utf8_to_ucs4")]
374#[doc(alias = "g_utf8_to_ucs4_fast")]
375#[doc(alias = "utf8_to_ucs4")]
376pub fn utf8_to_utf32(str: impl AsRef<str>) -> Slice<char> {
377    unsafe {
378        let mut items_written = 0;
379
380        let str_as_utf32 = ffi::g_utf8_to_ucs4_fast(
381            str.as_ref().as_ptr().cast::<c_char>(),
382            str.as_ref().len() as _,
383            &mut items_written,
384        );
385
386        // NOTE: We assume that u32 and char have the same layout and trust that glib won't give us
387        //       invalid UTF-32 codepoints
388        Slice::from_glib_full_num(str_as_utf32, items_written as usize)
389    }
390}
391
392#[doc(alias = "g_ucs4_to_utf8")]
393#[doc(alias = "ucs4_to_utf8")]
394pub fn utf32_to_utf8(str: impl AsRef<[char]>) -> GString {
395    let mut items_read = 0;
396    let mut items_written = 0;
397    let mut error = ptr::null_mut();
398
399    unsafe {
400        let str_as_utf8 = ffi::g_ucs4_to_utf8(
401            str.as_ref().as_ptr().cast::<u32>(),
402            str.as_ref().len() as _,
403            &mut items_read,
404            &mut items_written,
405            &mut error,
406        );
407
408        debug_assert!(
409            error.is_null(),
410            "Rust `char` should always be convertible to UTF-8"
411        );
412
413        GString::from_glib_full_num(str_as_utf8, items_written as usize)
414    }
415}
416
417#[doc(alias = "g_utf8_casefold")]
418#[doc(alias = "utf8_casefold")]
419pub fn casefold(str: impl AsRef<str>) -> GString {
420    unsafe {
421        let str = ffi::g_utf8_casefold(str.as_ref().as_ptr().cast(), str.as_ref().len() as isize);
422
423        from_glib_full(str)
424    }
425}
426
427#[doc(alias = "g_utf8_normalize")]
428#[doc(alias = "utf8_normalize")]
429pub fn normalize(str: impl AsRef<str>, mode: NormalizeMode) -> GString {
430    unsafe {
431        let str = ffi::g_utf8_normalize(
432            str.as_ref().as_ptr().cast(),
433            str.as_ref().len() as isize,
434            mode.into_glib(),
435        );
436
437        from_glib_full(str)
438    }
439}
440
441#[cfg(test)]
442mod tests {
443    #[test]
444    fn convert_ascii() {
445        assert!(super::convert(b"Hello", "utf-8", "ascii").is_ok());
446        assert!(super::convert(b"He\xaallo", "utf-8", "ascii").is_err());
447        assert_eq!(
448            super::convert_with_fallback(b"H\xc3\xa9llo", "ascii", "utf-8", crate::NONE_STR)
449                .unwrap()
450                .0
451                .as_slice(),
452            b"H\\u00e9llo"
453        );
454        assert_eq!(
455            super::convert_with_fallback(b"H\xc3\xa9llo", "ascii", "utf-8", Some("_"))
456                .unwrap()
457                .0
458                .as_slice(),
459            b"H_llo"
460        );
461    }
462    #[test]
463    fn iconv() {
464        let mut conv = super::IConv::new("utf-8", "ascii").unwrap();
465        assert!(conv.convert(b"Hello").is_ok());
466        assert!(conv.convert(b"He\xaallo").is_err());
467        assert!(super::IConv::new("utf-8", "badcharset123456789").is_none());
468    }
469    #[test]
470    fn filename_charsets() {
471        let _ = super::filename_charsets();
472    }
473
474    #[test]
475    fn utf8_and_utf32() {
476        let utf32 = ['A', 'b', '🤔'];
477        let utf8 = super::utf32_to_utf8(utf32);
478        assert_eq!(utf8, "Ab🤔");
479
480        let utf8 = "🤔 ț";
481        let utf32 = super::utf8_to_utf32(utf8);
482        assert_eq!(utf32.as_slice(), &['🤔', ' ', 'ț']);
483    }
484}