glib/
convert.rs

1// Take a look at the license at the top of the repository in the LICENSE file.
2
3use std::{fmt, io, os::raw::c_char, path::PathBuf, ptr};
4
5use crate::{ffi, translate::*, ConvertError, Error, GString, NormalizeMode, Slice};
6
7// rustdoc-stripper-ignore-next
8/// A wrapper for [`ConvertError`](crate::ConvertError) that can hold an offset into the input
9/// string.
10#[derive(Debug)]
11pub enum CvtError {
12    Convert(Error),
13    IllegalSequence { source: Error, offset: usize },
14}
15
16impl std::error::Error for CvtError {
17    fn source(&self) -> ::core::option::Option<&(dyn std::error::Error + 'static)> {
18        match self {
19            CvtError::Convert(err) => std::error::Error::source(err),
20            CvtError::IllegalSequence { source, .. } => Some(source),
21        }
22    }
23}
24
25impl fmt::Display for CvtError {
26    fn fmt(&self, fmt: &mut fmt::Formatter) -> ::core::fmt::Result {
27        match self {
28            CvtError::Convert(err) => fmt::Display::fmt(err, fmt),
29            CvtError::IllegalSequence { source, offset } => {
30                write!(fmt, "{source} at offset {offset}")
31            }
32        }
33    }
34}
35
36impl std::convert::From<Error> for CvtError {
37    fn from(err: Error) -> Self {
38        CvtError::Convert(err)
39    }
40}
41
42impl CvtError {
43    #[inline]
44    fn new(err: Error, bytes_read: usize) -> Self {
45        if err.kind::<ConvertError>() == Some(ConvertError::IllegalSequence) {
46            Self::IllegalSequence {
47                source: err,
48                offset: bytes_read,
49            }
50        } else {
51            err.into()
52        }
53    }
54}
55
56#[doc(alias = "g_convert")]
57pub fn convert(
58    str_: &[u8],
59    to_codeset: impl IntoGStr,
60    from_codeset: impl IntoGStr,
61) -> Result<(Slice<u8>, usize), CvtError> {
62    assert!(str_.len() <= isize::MAX as usize);
63    let mut bytes_read = 0;
64    let mut bytes_written = 0;
65    let mut error = ptr::null_mut();
66    let result = to_codeset.run_with_gstr(|to_codeset| {
67        from_codeset.run_with_gstr(|from_codeset| unsafe {
68            ffi::g_convert(
69                str_.as_ptr(),
70                str_.len() as isize,
71                to_codeset.to_glib_none().0,
72                from_codeset.to_glib_none().0,
73                &mut bytes_read,
74                &mut bytes_written,
75                &mut error,
76            )
77        })
78    });
79    if result.is_null() {
80        Err(CvtError::new(unsafe { from_glib_full(error) }, bytes_read))
81    } else {
82        let slice = unsafe { Slice::from_glib_full_num(result, bytes_written as _) };
83        Ok((slice, bytes_read))
84    }
85}
86
87#[doc(alias = "g_convert_with_fallback")]
88pub fn convert_with_fallback(
89    str_: &[u8],
90    to_codeset: impl IntoGStr,
91    from_codeset: impl IntoGStr,
92    fallback: Option<impl IntoGStr>,
93) -> Result<(Slice<u8>, usize), CvtError> {
94    assert!(str_.len() <= isize::MAX as usize);
95    let mut bytes_read = 0;
96    let mut bytes_written = 0;
97    let mut error = ptr::null_mut();
98    let result = to_codeset.run_with_gstr(|to_codeset| {
99        from_codeset.run_with_gstr(|from_codeset| {
100            fallback.run_with_gstr(|fallback| unsafe {
101                ffi::g_convert_with_fallback(
102                    str_.as_ptr(),
103                    str_.len() as isize,
104                    to_codeset.to_glib_none().0,
105                    from_codeset.to_glib_none().0,
106                    fallback.to_glib_none().0,
107                    &mut bytes_read,
108                    &mut bytes_written,
109                    &mut error,
110                )
111            })
112        })
113    });
114    if result.is_null() {
115        Err(CvtError::new(unsafe { from_glib_full(error) }, bytes_read))
116    } else {
117        let slice = unsafe { Slice::from_glib_full_num(result, bytes_written as _) };
118        Ok((slice, bytes_read))
119    }
120}
121
122// rustdoc-stripper-ignore-next
123/// A wrapper for [`std::io::Error`] that can hold an offset into an input string.
124#[derive(Debug)]
125pub enum IConvError {
126    Error(io::Error),
127    WithOffset { source: io::Error, offset: usize },
128}
129
130impl std::error::Error for IConvError {
131    fn source(&self) -> Option<&(dyn std::error::Error + 'static)> {
132        match self {
133            IConvError::Error(err) => std::error::Error::source(err),
134            IConvError::WithOffset { source, .. } => Some(source),
135        }
136    }
137}
138
139impl fmt::Display for IConvError {
140    fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
141        match self {
142            IConvError::Error(err) => fmt::Display::fmt(err, fmt),
143            IConvError::WithOffset { source, offset } => write!(fmt, "{source} at offset {offset}"),
144        }
145    }
146}
147
148impl std::convert::From<io::Error> for IConvError {
149    fn from(err: io::Error) -> Self {
150        IConvError::Error(err)
151    }
152}
153
154/// The GIConv struct wraps an iconv() conversion descriptor. It contains
155/// private data and should only be accessed using the following functions.
156// rustdoc-stripper-ignore-next-stop
157/// The GIConv struct wraps an iconv() conversion descriptor. It contains
158/// private data and should only be accessed using the following functions.
159#[derive(Debug)]
160#[repr(transparent)]
161#[doc(alias = "GIConv")]
162pub struct IConv(ffi::GIConv);
163
164unsafe impl Send for IConv {}
165
166impl IConv {
167    /// Same as the standard UNIX routine iconv_open(), but
168    /// may be implemented via libiconv on UNIX flavors that lack
169    /// a native implementation.
170    ///
171    /// GLib provides g_convert() and g_locale_to_utf8() which are likely
172    /// more convenient than the raw iconv wrappers.
173    /// ## `to_codeset`
174    /// destination codeset
175    /// ## `from_codeset`
176    /// source codeset
177    ///
178    /// # Returns
179    ///
180    /// a "conversion descriptor", or (GIConv)-1 if
181    ///  opening the converter failed.
182    // rustdoc-stripper-ignore-next-stop
183    /// Same as the standard UNIX routine iconv_open(), but
184    /// may be implemented via libiconv on UNIX flavors that lack
185    /// a native implementation.
186    ///
187    /// GLib provides g_convert() and g_locale_to_utf8() which are likely
188    /// more convenient than the raw iconv wrappers.
189    /// ## `to_codeset`
190    /// destination codeset
191    /// ## `from_codeset`
192    /// source codeset
193    ///
194    /// # Returns
195    ///
196    /// a "conversion descriptor", or (GIConv)-1 if
197    ///  opening the converter failed.
198    #[doc(alias = "g_iconv_open")]
199    #[allow(clippy::unnecessary_lazy_evaluations)]
200    pub fn new(to_codeset: impl IntoGStr, from_codeset: impl IntoGStr) -> Option<Self> {
201        let iconv = to_codeset.run_with_gstr(|to_codeset| {
202            from_codeset.run_with_gstr(|from_codeset| unsafe {
203                ffi::g_iconv_open(to_codeset.to_glib_none().0, from_codeset.to_glib_none().0)
204            })
205        });
206        (iconv as isize != -1).then(|| Self(iconv))
207    }
208    #[doc(alias = "g_convert_with_iconv")]
209    pub fn convert(&mut self, str_: &[u8]) -> Result<(Slice<u8>, usize), CvtError> {
210        assert!(str_.len() <= isize::MAX as usize);
211        let mut bytes_read = 0;
212        let mut bytes_written = 0;
213        let mut error = ptr::null_mut();
214        let result = unsafe {
215            ffi::g_convert_with_iconv(
216                str_.as_ptr(),
217                str_.len() as isize,
218                self.0,
219                &mut bytes_read,
220                &mut bytes_written,
221                &mut error,
222            )
223        };
224        if result.is_null() {
225            Err(CvtError::new(unsafe { from_glib_full(error) }, bytes_read))
226        } else {
227            let slice = unsafe { Slice::from_glib_full_num(result, bytes_written as _) };
228            Ok((slice, bytes_read))
229        }
230    }
231    /// Same as the standard UNIX routine iconv(), but
232    /// may be implemented via libiconv on UNIX flavors that lack
233    /// a native implementation.
234    ///
235    /// GLib provides g_convert() and g_locale_to_utf8() which are likely
236    /// more convenient than the raw iconv wrappers.
237    ///
238    /// Note that the behaviour of iconv() for characters which are valid in the
239    /// input character set, but which have no representation in the output character
240    /// set, is implementation defined. This function may return success (with a
241    /// positive number of non-reversible conversions as replacement characters were
242    /// used), or it may return -1 and set an error such as `EILSEQ`, in such a
243    /// situation.
244    /// ## `converter`
245    /// conversion descriptor from g_iconv_open()
246    /// ## `inbuf`
247    /// bytes to convert
248    /// ## `inbytes_left`
249    /// inout parameter, bytes remaining to convert in @inbuf
250    /// ## `outbuf`
251    /// converted output bytes
252    /// ## `outbytes_left`
253    /// inout parameter, bytes available to fill in @outbuf
254    ///
255    /// # Returns
256    ///
257    /// count of non-reversible conversions, or -1 on error
258    #[doc(alias = "g_iconv")]
259    pub fn iconv(
260        &mut self,
261        inbuf: Option<&[u8]>,
262        outbuf: Option<&mut [std::mem::MaybeUninit<u8>]>,
263    ) -> Result<(usize, usize, usize), IConvError> {
264        let input_len = inbuf.as_ref().map(|b| b.len()).unwrap_or_default();
265        let mut inbytes_left = input_len;
266        let mut outbytes_left = outbuf.as_ref().map(|b| b.len()).unwrap_or_default();
267        let mut inbuf = inbuf
268            .map(|b| mut_override(b.as_ptr()) as *mut c_char)
269            .unwrap_or_else(ptr::null_mut);
270        let mut outbuf = outbuf
271            .map(|b| b.as_mut_ptr() as *mut c_char)
272            .unwrap_or_else(ptr::null_mut);
273        let conversions = unsafe {
274            ffi::g_iconv(
275                self.0,
276                &mut inbuf,
277                &mut inbytes_left,
278                &mut outbuf,
279                &mut outbytes_left,
280            )
281        };
282        if conversions as isize == -1 {
283            let err = io::Error::last_os_error();
284            let code = err.raw_os_error().unwrap();
285            if code == libc::EILSEQ || code == libc::EINVAL {
286                Err(IConvError::WithOffset {
287                    source: err,
288                    offset: input_len - inbytes_left,
289                })
290            } else {
291                Err(err.into())
292            }
293        } else {
294            Ok((conversions, inbytes_left, outbytes_left))
295        }
296    }
297}
298
299impl Drop for IConv {
300    #[inline]
301    fn drop(&mut self) {
302        unsafe {
303            ffi::g_iconv_close(self.0);
304        }
305    }
306}
307
308#[doc(alias = "g_get_filename_charsets")]
309#[doc(alias = "get_filename_charsets")]
310pub fn filename_charsets() -> (bool, Vec<GString>) {
311    let mut filename_charsets = ptr::null_mut();
312    unsafe {
313        let is_utf8 = ffi::g_get_filename_charsets(&mut filename_charsets);
314        (
315            from_glib(is_utf8),
316            FromGlibPtrContainer::from_glib_none(filename_charsets),
317        )
318    }
319}
320
321#[doc(alias = "g_filename_from_utf8")]
322pub fn filename_from_utf8(utf8string: impl IntoGStr) -> Result<(PathBuf, usize), CvtError> {
323    let mut bytes_read = 0;
324    let mut bytes_written = std::mem::MaybeUninit::uninit();
325    let mut error = ptr::null_mut();
326    let ret = utf8string.run_with_gstr(|utf8string| {
327        assert!(utf8string.len() <= isize::MAX as usize);
328        let len = utf8string.len() as isize;
329        unsafe {
330            ffi::g_filename_from_utf8(
331                utf8string.to_glib_none().0,
332                len,
333                &mut bytes_read,
334                bytes_written.as_mut_ptr(),
335                &mut error,
336            )
337        }
338    });
339    if error.is_null() {
340        Ok(unsafe {
341            (
342                PathBuf::from_glib_full_num(ret, bytes_written.assume_init()),
343                bytes_read,
344            )
345        })
346    } else {
347        Err(unsafe { CvtError::new(from_glib_full(error), bytes_read) })
348    }
349}
350
351#[doc(alias = "g_filename_to_utf8")]
352pub fn filename_to_utf8(
353    opsysstring: impl AsRef<std::path::Path>,
354) -> Result<(crate::GString, usize), CvtError> {
355    let path = opsysstring.as_ref().to_glib_none();
356    let mut bytes_read = 0;
357    let mut bytes_written = std::mem::MaybeUninit::uninit();
358    let mut error = ptr::null_mut();
359    let ret = unsafe {
360        ffi::g_filename_to_utf8(
361            path.0,
362            path.1.as_bytes().len() as isize,
363            &mut bytes_read,
364            bytes_written.as_mut_ptr(),
365            &mut error,
366        )
367    };
368    if error.is_null() {
369        Ok(unsafe {
370            (
371                GString::from_glib_full_num(ret, bytes_written.assume_init()),
372                bytes_read,
373            )
374        })
375    } else {
376        Err(unsafe { CvtError::new(from_glib_full(error), bytes_read) })
377    }
378}
379
380/// Converts a string from UTF-8 to the encoding used for strings by
381/// the C runtime (usually the same as that used by the operating
382/// system) in the [current locale](running.html#locale).
383/// On Windows this means the system codepage.
384///
385/// The input string shall not contain nul characters even if the @len
386/// argument is positive. A nul character found inside the string will result
387/// in error [`ConvertError::IllegalSequence`][crate::ConvertError::IllegalSequence]. Use g_convert() to convert
388/// input that may contain embedded nul characters.
389/// ## `utf8string`
390/// a UTF-8 encoded string
391/// ## `len`
392/// the length of the string, or -1 if the string is
393///                 nul-terminated.
394///
395/// # Returns
396///
397///
398///          A newly-allocated buffer containing the converted string,
399///          or [`None`] on an error, and error will be set.
400///
401/// ## `bytes_read`
402/// location to store the number of bytes in the
403///                 input string that were successfully converted, or [`None`].
404///                 Even if the conversion was successful, this may be
405///                 less than @len if there were partial characters
406///                 at the end of the input. If the error
407///                 [`ConvertError::IllegalSequence`][crate::ConvertError::IllegalSequence] occurs, the value
408///                 stored will be the byte offset after the last valid
409///                 input sequence.
410#[doc(alias = "g_locale_from_utf8")]
411pub fn locale_from_utf8(utf8string: impl IntoGStr) -> Result<(Slice<u8>, usize), CvtError> {
412    let mut bytes_read = 0;
413    let mut bytes_written = std::mem::MaybeUninit::uninit();
414    let mut error = ptr::null_mut();
415    let ret = utf8string.run_with_gstr(|utf8string| {
416        assert!(utf8string.len() <= isize::MAX as usize);
417        unsafe {
418            ffi::g_locale_from_utf8(
419                utf8string.as_ptr(),
420                utf8string.len() as isize,
421                &mut bytes_read,
422                bytes_written.as_mut_ptr(),
423                &mut error,
424            )
425        }
426    });
427    if error.is_null() {
428        Ok(unsafe {
429            (
430                Slice::from_glib_full_num(ret, bytes_written.assume_init() + 1),
431                bytes_read,
432            )
433        })
434    } else {
435        Err(unsafe { CvtError::new(from_glib_full(error), bytes_read) })
436    }
437}
438
439/// Converts a string which is in the encoding used for strings by
440/// the C runtime (usually the same as that used by the operating
441/// system) in the [current locale](running.html#locale) into a UTF-8 string.
442///
443/// If the source encoding is not UTF-8 and the conversion output contains a
444/// nul character, the error [`ConvertError::EmbeddedNul`][crate::ConvertError::EmbeddedNul] is set and the
445/// function returns [`None`].
446/// If the source encoding is UTF-8, an embedded nul character is treated with
447/// the [`ConvertError::IllegalSequence`][crate::ConvertError::IllegalSequence] error for backward compatibility with
448/// earlier versions of this library. Use g_convert() to produce output that
449/// may contain embedded nul characters.
450/// ## `opsysstring`
451/// a string in the
452///                 encoding of the current locale. On Windows
453///                 this means the system codepage.
454///
455/// # Returns
456///
457/// The converted string, or [`None`] on an error.
458///
459/// ## `bytes_read`
460/// location to store the number of bytes in the
461///                 input string that were successfully converted, or [`None`].
462///                 Even if the conversion was successful, this may be
463///                 less than @len if there were partial characters
464///                 at the end of the input. If the error
465///                 [`ConvertError::IllegalSequence`][crate::ConvertError::IllegalSequence] occurs, the value
466///                 stored will be the byte offset after the last valid
467///                 input sequence.
468///
469/// ## `bytes_written`
470/// the number of bytes stored in the output
471///                 buffer (not including the terminating nul).
472#[doc(alias = "g_locale_to_utf8")]
473pub fn locale_to_utf8(opsysstring: &[u8]) -> Result<(crate::GString, usize), CvtError> {
474    let len = opsysstring.len() as isize;
475    let mut bytes_read = 0;
476    let mut bytes_written = std::mem::MaybeUninit::uninit();
477    let mut error = ptr::null_mut();
478    let ret = unsafe {
479        ffi::g_locale_to_utf8(
480            opsysstring.to_glib_none().0,
481            len,
482            &mut bytes_read,
483            bytes_written.as_mut_ptr(),
484            &mut error,
485        )
486    };
487    if error.is_null() {
488        Ok(unsafe {
489            (
490                GString::from_glib_full_num(ret, bytes_written.assume_init()),
491                bytes_read,
492            )
493        })
494    } else {
495        Err(unsafe { CvtError::new(from_glib_full(error), bytes_read) })
496    }
497}
498
499#[doc(alias = "g_utf8_to_ucs4")]
500#[doc(alias = "g_utf8_to_ucs4_fast")]
501#[doc(alias = "utf8_to_ucs4")]
502pub fn utf8_to_utf32(str: impl AsRef<str>) -> Slice<char> {
503    unsafe {
504        let mut items_written = 0;
505
506        let str_as_utf32 = ffi::g_utf8_to_ucs4_fast(
507            str.as_ref().as_ptr().cast::<c_char>(),
508            str.as_ref().len() as _,
509            &mut items_written,
510        );
511
512        // NOTE: We assume that u32 and char have the same layout and trust that glib won't give us
513        //       invalid UTF-32 codepoints
514        Slice::from_glib_full_num(str_as_utf32, items_written as usize)
515    }
516}
517
518#[doc(alias = "g_ucs4_to_utf8")]
519#[doc(alias = "ucs4_to_utf8")]
520pub fn utf32_to_utf8(str: impl AsRef<[char]>) -> GString {
521    let mut items_read = 0;
522    let mut items_written = 0;
523    let mut error = ptr::null_mut();
524
525    unsafe {
526        let str_as_utf8 = ffi::g_ucs4_to_utf8(
527            str.as_ref().as_ptr().cast::<u32>(),
528            str.as_ref().len() as _,
529            &mut items_read,
530            &mut items_written,
531            &mut error,
532        );
533
534        debug_assert!(
535            error.is_null(),
536            "Rust `char` should always be convertible to UTF-8"
537        );
538
539        GString::from_glib_full_num(str_as_utf8, items_written as usize)
540    }
541}
542
543#[doc(alias = "g_utf8_casefold")]
544#[doc(alias = "utf8_casefold")]
545pub fn casefold(str: impl AsRef<str>) -> GString {
546    unsafe {
547        let str = ffi::g_utf8_casefold(str.as_ref().as_ptr().cast(), str.as_ref().len() as isize);
548
549        from_glib_full(str)
550    }
551}
552
553#[doc(alias = "g_utf8_normalize")]
554#[doc(alias = "utf8_normalize")]
555pub fn normalize(str: impl AsRef<str>, mode: NormalizeMode) -> GString {
556    unsafe {
557        let str = ffi::g_utf8_normalize(
558            str.as_ref().as_ptr().cast(),
559            str.as_ref().len() as isize,
560            mode.into_glib(),
561        );
562
563        from_glib_full(str)
564    }
565}
566
567#[cfg(test)]
568mod tests {
569    #[test]
570    fn convert_ascii() {
571        assert!(super::convert(b"Hello", "utf-8", "ascii").is_ok());
572        assert!(super::convert(b"He\xaallo", "utf-8", "ascii").is_err());
573        assert_eq!(
574            super::convert_with_fallback(b"H\xc3\xa9llo", "ascii", "utf-8", crate::NONE_STR)
575                .unwrap()
576                .0
577                .as_slice(),
578            b"H\\u00e9llo"
579        );
580        assert_eq!(
581            super::convert_with_fallback(b"H\xc3\xa9llo", "ascii", "utf-8", Some("_"))
582                .unwrap()
583                .0
584                .as_slice(),
585            b"H_llo"
586        );
587    }
588    #[test]
589    fn iconv() {
590        let mut conv = super::IConv::new("utf-8", "ascii").unwrap();
591        assert!(conv.convert(b"Hello").is_ok());
592        assert!(conv.convert(b"He\xaallo").is_err());
593        assert!(super::IConv::new("utf-8", "badcharset123456789").is_none());
594    }
595    #[test]
596    fn filename_charsets() {
597        let _ = super::filename_charsets();
598    }
599
600    #[test]
601    fn utf8_and_utf32() {
602        let utf32 = ['A', 'b', '🤔'];
603        let utf8 = super::utf32_to_utf8(utf32);
604        assert_eq!(utf8, "Ab🤔");
605
606        let utf8 = "🤔 ț";
607        let utf32 = super::utf8_to_utf32(utf8);
608        assert_eq!(utf32.as_slice(), &['🤔', ' ', 'ț']);
609    }
610}