glib/
convert.rs

1// Take a look at the license at the top of the repository in the LICENSE file.
2
3use std::{fmt, io, os::raw::c_char, path::PathBuf, ptr};
4
5use crate::{ffi, translate::*, ConvertError, Error, GString, NormalizeMode, Slice};
6
7// rustdoc-stripper-ignore-next
8/// A wrapper for [`ConvertError`](crate::ConvertError) that can hold an offset into the input
9/// string.
10#[derive(Debug)]
11pub enum CvtError {
12    Convert(Error),
13    IllegalSequence { source: Error, offset: usize },
14}
15
16impl std::error::Error for CvtError {
17    fn source(&self) -> ::core::option::Option<&(dyn std::error::Error + 'static)> {
18        match self {
19            CvtError::Convert(err) => std::error::Error::source(err),
20            CvtError::IllegalSequence { source, .. } => Some(source),
21        }
22    }
23}
24
25impl fmt::Display for CvtError {
26    fn fmt(&self, fmt: &mut fmt::Formatter) -> ::core::fmt::Result {
27        match self {
28            CvtError::Convert(err) => fmt::Display::fmt(err, fmt),
29            CvtError::IllegalSequence { source, offset } => {
30                write!(fmt, "{source} at offset {offset}")
31            }
32        }
33    }
34}
35
36impl std::convert::From<Error> for CvtError {
37    fn from(err: Error) -> Self {
38        CvtError::Convert(err)
39    }
40}
41
42impl CvtError {
43    #[inline]
44    fn new(err: Error, bytes_read: usize) -> Self {
45        if err.kind::<ConvertError>() == Some(ConvertError::IllegalSequence) {
46            Self::IllegalSequence {
47                source: err,
48                offset: bytes_read,
49            }
50        } else {
51            err.into()
52        }
53    }
54}
55
56#[doc(alias = "g_convert")]
57pub fn convert(
58    str_: &[u8],
59    to_codeset: impl IntoGStr,
60    from_codeset: impl IntoGStr,
61) -> Result<(Slice<u8>, usize), CvtError> {
62    assert!(str_.len() <= isize::MAX as usize);
63    let mut bytes_read = 0;
64    let mut bytes_written = 0;
65    let mut error = ptr::null_mut();
66    let result = to_codeset.run_with_gstr(|to_codeset| {
67        from_codeset.run_with_gstr(|from_codeset| unsafe {
68            ffi::g_convert(
69                str_.as_ptr(),
70                str_.len() as isize,
71                to_codeset.to_glib_none().0,
72                from_codeset.to_glib_none().0,
73                &mut bytes_read,
74                &mut bytes_written,
75                &mut error,
76            )
77        })
78    });
79    if result.is_null() {
80        Err(CvtError::new(unsafe { from_glib_full(error) }, bytes_read))
81    } else {
82        let slice = unsafe { Slice::from_glib_full_num(result, bytes_written as _) };
83        Ok((slice, bytes_read))
84    }
85}
86
87#[doc(alias = "g_convert_with_fallback")]
88pub fn convert_with_fallback(
89    str_: &[u8],
90    to_codeset: impl IntoGStr,
91    from_codeset: impl IntoGStr,
92    fallback: Option<impl IntoGStr>,
93) -> Result<(Slice<u8>, usize), CvtError> {
94    assert!(str_.len() <= isize::MAX as usize);
95    let mut bytes_read = 0;
96    let mut bytes_written = 0;
97    let mut error = ptr::null_mut();
98    let result = to_codeset.run_with_gstr(|to_codeset| {
99        from_codeset.run_with_gstr(|from_codeset| {
100            fallback.run_with_gstr(|fallback| unsafe {
101                ffi::g_convert_with_fallback(
102                    str_.as_ptr(),
103                    str_.len() as isize,
104                    to_codeset.to_glib_none().0,
105                    from_codeset.to_glib_none().0,
106                    fallback.to_glib_none().0,
107                    &mut bytes_read,
108                    &mut bytes_written,
109                    &mut error,
110                )
111            })
112        })
113    });
114    if result.is_null() {
115        Err(CvtError::new(unsafe { from_glib_full(error) }, bytes_read))
116    } else {
117        let slice = unsafe { Slice::from_glib_full_num(result, bytes_written as _) };
118        Ok((slice, bytes_read))
119    }
120}
121
122// rustdoc-stripper-ignore-next
123/// A wrapper for [`std::io::Error`] that can hold an offset into an input string.
124#[derive(Debug)]
125pub enum IConvError {
126    Error(io::Error),
127    WithOffset { source: io::Error, offset: usize },
128}
129
130impl std::error::Error for IConvError {
131    fn source(&self) -> Option<&(dyn std::error::Error + 'static)> {
132        match self {
133            IConvError::Error(err) => std::error::Error::source(err),
134            IConvError::WithOffset { source, .. } => Some(source),
135        }
136    }
137}
138
139impl fmt::Display for IConvError {
140    fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
141        match self {
142            IConvError::Error(err) => fmt::Display::fmt(err, fmt),
143            IConvError::WithOffset { source, offset } => write!(fmt, "{source} at offset {offset}"),
144        }
145    }
146}
147
148impl std::convert::From<io::Error> for IConvError {
149    fn from(err: io::Error) -> Self {
150        IConvError::Error(err)
151    }
152}
153
154/// The GIConv struct wraps an iconv() conversion descriptor. It contains
155/// private data and should only be accessed using the following functions.
156// rustdoc-stripper-ignore-next-stop
157/// The GIConv struct wraps an iconv() conversion descriptor. It contains
158/// private data and should only be accessed using the following functions.
159#[derive(Debug)]
160#[repr(transparent)]
161#[doc(alias = "GIConv")]
162pub struct IConv(ffi::GIConv);
163
164unsafe impl Send for IConv {}
165
166impl IConv {
167    /// Same as the standard UNIX routine iconv_open(), but
168    /// may be implemented via libiconv on UNIX flavors that lack
169    /// a native implementation.
170    ///
171    /// GLib provides g_convert() and g_locale_to_utf8() which are likely
172    /// more convenient than the raw iconv wrappers.
173    /// ## `to_codeset`
174    /// destination codeset
175    /// ## `from_codeset`
176    /// source codeset
177    ///
178    /// # Returns
179    ///
180    /// a "conversion descriptor", or (GIConv)-1 if
181    ///  opening the converter failed.
182    // rustdoc-stripper-ignore-next-stop
183    /// Same as the standard UNIX routine iconv_open(), but
184    /// may be implemented via libiconv on UNIX flavors that lack
185    /// a native implementation.
186    ///
187    /// GLib provides g_convert() and g_locale_to_utf8() which are likely
188    /// more convenient than the raw iconv wrappers.
189    /// ## `to_codeset`
190    /// destination codeset
191    /// ## `from_codeset`
192    /// source codeset
193    ///
194    /// # Returns
195    ///
196    /// a "conversion descriptor", or (GIConv)-1 if
197    ///  opening the converter failed.
198    #[doc(alias = "g_iconv_open")]
199    #[allow(clippy::unnecessary_lazy_evaluations)]
200    pub fn new(to_codeset: impl IntoGStr, from_codeset: impl IntoGStr) -> Option<Self> {
201        let iconv = to_codeset.run_with_gstr(|to_codeset| {
202            from_codeset.run_with_gstr(|from_codeset| unsafe {
203                ffi::g_iconv_open(to_codeset.to_glib_none().0, from_codeset.to_glib_none().0)
204            })
205        });
206        (iconv as isize != -1).then(|| Self(iconv))
207    }
208    #[doc(alias = "g_convert_with_iconv")]
209    pub fn convert(&mut self, str_: &[u8]) -> Result<(Slice<u8>, usize), CvtError> {
210        assert!(str_.len() <= isize::MAX as usize);
211        let mut bytes_read = 0;
212        let mut bytes_written = 0;
213        let mut error = ptr::null_mut();
214        let result = unsafe {
215            ffi::g_convert_with_iconv(
216                str_.as_ptr(),
217                str_.len() as isize,
218                self.0,
219                &mut bytes_read,
220                &mut bytes_written,
221                &mut error,
222            )
223        };
224        if result.is_null() {
225            Err(CvtError::new(unsafe { from_glib_full(error) }, bytes_read))
226        } else {
227            let slice = unsafe { Slice::from_glib_full_num(result, bytes_written as _) };
228            Ok((slice, bytes_read))
229        }
230    }
231    #[doc(alias = "g_iconv")]
232    pub fn iconv(
233        &mut self,
234        inbuf: Option<&[u8]>,
235        outbuf: Option<&mut [std::mem::MaybeUninit<u8>]>,
236    ) -> Result<(usize, usize, usize), IConvError> {
237        let input_len = inbuf.as_ref().map(|b| b.len()).unwrap_or_default();
238        let mut inbytes_left = input_len;
239        let mut outbytes_left = outbuf.as_ref().map(|b| b.len()).unwrap_or_default();
240        let mut inbuf = inbuf
241            .map(|b| mut_override(b.as_ptr()) as *mut c_char)
242            .unwrap_or_else(ptr::null_mut);
243        let mut outbuf = outbuf
244            .map(|b| b.as_mut_ptr() as *mut c_char)
245            .unwrap_or_else(ptr::null_mut);
246        let conversions = unsafe {
247            ffi::g_iconv(
248                self.0,
249                &mut inbuf,
250                &mut inbytes_left,
251                &mut outbuf,
252                &mut outbytes_left,
253            )
254        };
255        if conversions as isize == -1 {
256            let err = io::Error::last_os_error();
257            let code = err.raw_os_error().unwrap();
258            if code == libc::EILSEQ || code == libc::EINVAL {
259                Err(IConvError::WithOffset {
260                    source: err,
261                    offset: input_len - inbytes_left,
262                })
263            } else {
264                Err(err.into())
265            }
266        } else {
267            Ok((conversions, inbytes_left, outbytes_left))
268        }
269    }
270}
271
272impl Drop for IConv {
273    #[inline]
274    fn drop(&mut self) {
275        unsafe {
276            ffi::g_iconv_close(self.0);
277        }
278    }
279}
280
281#[doc(alias = "g_get_filename_charsets")]
282#[doc(alias = "get_filename_charsets")]
283pub fn filename_charsets() -> (bool, Vec<GString>) {
284    let mut filename_charsets = ptr::null_mut();
285    unsafe {
286        let is_utf8 = ffi::g_get_filename_charsets(&mut filename_charsets);
287        (
288            from_glib(is_utf8),
289            FromGlibPtrContainer::from_glib_none(filename_charsets),
290        )
291    }
292}
293
294#[doc(alias = "g_filename_from_utf8")]
295pub fn filename_from_utf8(utf8string: impl IntoGStr) -> Result<(PathBuf, usize), CvtError> {
296    let mut bytes_read = 0;
297    let mut bytes_written = std::mem::MaybeUninit::uninit();
298    let mut error = ptr::null_mut();
299    let ret = utf8string.run_with_gstr(|utf8string| {
300        assert!(utf8string.len() <= isize::MAX as usize);
301        let len = utf8string.len() as isize;
302        unsafe {
303            ffi::g_filename_from_utf8(
304                utf8string.to_glib_none().0,
305                len,
306                &mut bytes_read,
307                bytes_written.as_mut_ptr(),
308                &mut error,
309            )
310        }
311    });
312    if error.is_null() {
313        Ok(unsafe {
314            (
315                PathBuf::from_glib_full_num(ret, bytes_written.assume_init()),
316                bytes_read,
317            )
318        })
319    } else {
320        Err(unsafe { CvtError::new(from_glib_full(error), bytes_read) })
321    }
322}
323
324#[doc(alias = "g_filename_to_utf8")]
325pub fn filename_to_utf8(
326    opsysstring: impl AsRef<std::path::Path>,
327) -> Result<(crate::GString, usize), CvtError> {
328    let path = opsysstring.as_ref().to_glib_none();
329    let mut bytes_read = 0;
330    let mut bytes_written = std::mem::MaybeUninit::uninit();
331    let mut error = ptr::null_mut();
332    let ret = unsafe {
333        ffi::g_filename_to_utf8(
334            path.0,
335            path.1.as_bytes().len() as isize,
336            &mut bytes_read,
337            bytes_written.as_mut_ptr(),
338            &mut error,
339        )
340    };
341    if error.is_null() {
342        Ok(unsafe {
343            (
344                GString::from_glib_full_num(ret, bytes_written.assume_init()),
345                bytes_read,
346            )
347        })
348    } else {
349        Err(unsafe { CvtError::new(from_glib_full(error), bytes_read) })
350    }
351}
352
353#[doc(alias = "g_locale_from_utf8")]
354pub fn locale_from_utf8(utf8string: impl IntoGStr) -> Result<(Slice<u8>, usize), CvtError> {
355    let mut bytes_read = 0;
356    let mut bytes_written = std::mem::MaybeUninit::uninit();
357    let mut error = ptr::null_mut();
358    let ret = utf8string.run_with_gstr(|utf8string| {
359        assert!(utf8string.len() <= isize::MAX as usize);
360        unsafe {
361            ffi::g_locale_from_utf8(
362                utf8string.as_ptr(),
363                utf8string.len() as isize,
364                &mut bytes_read,
365                bytes_written.as_mut_ptr(),
366                &mut error,
367            )
368        }
369    });
370    if error.is_null() {
371        Ok(unsafe {
372            (
373                Slice::from_glib_full_num(ret, bytes_written.assume_init() + 1),
374                bytes_read,
375            )
376        })
377    } else {
378        Err(unsafe { CvtError::new(from_glib_full(error), bytes_read) })
379    }
380}
381
382#[doc(alias = "g_locale_to_utf8")]
383pub fn locale_to_utf8(opsysstring: &[u8]) -> Result<(crate::GString, usize), CvtError> {
384    let len = opsysstring.len() as isize;
385    let mut bytes_read = 0;
386    let mut bytes_written = std::mem::MaybeUninit::uninit();
387    let mut error = ptr::null_mut();
388    let ret = unsafe {
389        ffi::g_locale_to_utf8(
390            opsysstring.to_glib_none().0,
391            len,
392            &mut bytes_read,
393            bytes_written.as_mut_ptr(),
394            &mut error,
395        )
396    };
397    if error.is_null() {
398        Ok(unsafe {
399            (
400                GString::from_glib_full_num(ret, bytes_written.assume_init()),
401                bytes_read,
402            )
403        })
404    } else {
405        Err(unsafe { CvtError::new(from_glib_full(error), bytes_read) })
406    }
407}
408
409#[doc(alias = "g_utf8_to_ucs4")]
410#[doc(alias = "g_utf8_to_ucs4_fast")]
411#[doc(alias = "utf8_to_ucs4")]
412pub fn utf8_to_utf32(str: impl AsRef<str>) -> Slice<char> {
413    unsafe {
414        let mut items_written = 0;
415
416        let str_as_utf32 = ffi::g_utf8_to_ucs4_fast(
417            str.as_ref().as_ptr().cast::<c_char>(),
418            str.as_ref().len() as _,
419            &mut items_written,
420        );
421
422        // NOTE: We assume that u32 and char have the same layout and trust that glib won't give us
423        //       invalid UTF-32 codepoints
424        Slice::from_glib_full_num(str_as_utf32, items_written as usize)
425    }
426}
427
428#[doc(alias = "g_ucs4_to_utf8")]
429#[doc(alias = "ucs4_to_utf8")]
430pub fn utf32_to_utf8(str: impl AsRef<[char]>) -> GString {
431    let mut items_read = 0;
432    let mut items_written = 0;
433    let mut error = ptr::null_mut();
434
435    unsafe {
436        let str_as_utf8 = ffi::g_ucs4_to_utf8(
437            str.as_ref().as_ptr().cast::<u32>(),
438            str.as_ref().len() as _,
439            &mut items_read,
440            &mut items_written,
441            &mut error,
442        );
443
444        debug_assert!(
445            error.is_null(),
446            "Rust `char` should always be convertible to UTF-8"
447        );
448
449        GString::from_glib_full_num(str_as_utf8, items_written as usize)
450    }
451}
452
453#[doc(alias = "g_utf8_casefold")]
454#[doc(alias = "utf8_casefold")]
455pub fn casefold(str: impl AsRef<str>) -> GString {
456    unsafe {
457        let str = ffi::g_utf8_casefold(str.as_ref().as_ptr().cast(), str.as_ref().len() as isize);
458
459        from_glib_full(str)
460    }
461}
462
463#[doc(alias = "g_utf8_normalize")]
464#[doc(alias = "utf8_normalize")]
465pub fn normalize(str: impl AsRef<str>, mode: NormalizeMode) -> GString {
466    unsafe {
467        let str = ffi::g_utf8_normalize(
468            str.as_ref().as_ptr().cast(),
469            str.as_ref().len() as isize,
470            mode.into_glib(),
471        );
472
473        from_glib_full(str)
474    }
475}
476
477#[cfg(test)]
478mod tests {
479    #[test]
480    fn convert_ascii() {
481        assert!(super::convert(b"Hello", "utf-8", "ascii").is_ok());
482        assert!(super::convert(b"He\xaallo", "utf-8", "ascii").is_err());
483        assert_eq!(
484            super::convert_with_fallback(b"H\xc3\xa9llo", "ascii", "utf-8", crate::NONE_STR)
485                .unwrap()
486                .0
487                .as_slice(),
488            b"H\\u00e9llo"
489        );
490        assert_eq!(
491            super::convert_with_fallback(b"H\xc3\xa9llo", "ascii", "utf-8", Some("_"))
492                .unwrap()
493                .0
494                .as_slice(),
495            b"H_llo"
496        );
497    }
498    #[test]
499    fn iconv() {
500        let mut conv = super::IConv::new("utf-8", "ascii").unwrap();
501        assert!(conv.convert(b"Hello").is_ok());
502        assert!(conv.convert(b"He\xaallo").is_err());
503        assert!(super::IConv::new("utf-8", "badcharset123456789").is_none());
504    }
505    #[test]
506    fn filename_charsets() {
507        let _ = super::filename_charsets();
508    }
509
510    #[test]
511    fn utf8_and_utf32() {
512        let utf32 = ['A', 'b', '🤔'];
513        let utf8 = super::utf32_to_utf8(utf32);
514        assert_eq!(utf8, "Ab🤔");
515
516        let utf8 = "🤔 ț";
517        let utf32 = super::utf8_to_utf32(utf8);
518        assert_eq!(utf32.as_slice(), &['🤔', ' ', 'ț']);
519    }
520}