Skip to main content

glib/
convert.rs

1// Take a look at the license at the top of the repository in the LICENSE file.
2
3use std::{fmt, io, os::raw::c_char, path::PathBuf, ptr};
4
5use crate::{ConvertError, Error, GString, NormalizeMode, Slice, ffi, translate::*};
6
7// rustdoc-stripper-ignore-next
8/// A wrapper for [`ConvertError`](crate::ConvertError) that can hold an offset into the input
9/// string.
10#[derive(Debug)]
11pub enum CvtError {
12    Convert(Error),
13    IllegalSequence { source: Error, offset: usize },
14}
15
16impl std::error::Error for CvtError {
17    fn source(&self) -> ::core::option::Option<&(dyn std::error::Error + 'static)> {
18        match self {
19            CvtError::Convert(err) => std::error::Error::source(err),
20            CvtError::IllegalSequence { source, .. } => Some(source),
21        }
22    }
23}
24
25impl fmt::Display for CvtError {
26    fn fmt(&self, fmt: &mut fmt::Formatter) -> ::core::fmt::Result {
27        match self {
28            CvtError::Convert(err) => fmt::Display::fmt(err, fmt),
29            CvtError::IllegalSequence { source, offset } => {
30                write!(fmt, "{source} at offset {offset}")
31            }
32        }
33    }
34}
35
36impl std::convert::From<Error> for CvtError {
37    fn from(err: Error) -> Self {
38        CvtError::Convert(err)
39    }
40}
41
42impl CvtError {
43    #[inline]
44    fn new(err: Error, bytes_read: usize) -> Self {
45        if err.kind::<ConvertError>() == Some(ConvertError::IllegalSequence) {
46            Self::IllegalSequence {
47                source: err,
48                offset: bytes_read,
49            }
50        } else {
51            err.into()
52        }
53    }
54}
55
56/// Converts a string from one character set to another.
57///
58/// Note that you should use g_iconv() for streaming conversions.
59/// Despite the fact that @bytes_read can return information about partial
60/// characters, the g_convert_... functions are not generally suitable
61/// for streaming. If the underlying converter maintains internal state,
62/// then this won't be preserved across successive calls to g_convert(),
63/// g_convert_with_iconv() or g_convert_with_fallback(). (An example of
64/// this is the GNU C converter for CP1255 which does not emit a base
65/// character until it knows that the next character is not a mark that
66/// could combine with the base character.)
67///
68/// Using extensions such as "//TRANSLIT" may not work (or may not work
69/// well) on many platforms.  Consider using g_str_to_ascii() instead.
70/// ## `str`
71///
72///                 the string to convert.
73/// ## `to_codeset`
74/// name of character set into which to convert @str
75/// ## `from_codeset`
76/// character set of @str.
77///
78/// # Returns
79///
80///
81///          If the conversion was successful, a newly allocated buffer
82///          containing the converted string, which must be freed with g_free().
83///          Otherwise [`None`] and @error will be set.
84///
85/// ## `bytes_read`
86/// location to store the number of bytes in
87///                 the input string that were successfully converted, or [`None`].
88///                 Even if the conversion was successful, this may be
89///                 less than @len if there were partial characters
90///                 at the end of the input. If the error
91///                 [`ConvertError::IllegalSequence`][crate::ConvertError::IllegalSequence] occurs, the value
92///                 stored will be the byte offset after the last valid
93///                 input sequence.
94#[doc(alias = "g_convert")]
95pub fn convert(
96    str_: &[u8],
97    to_codeset: impl IntoGStr,
98    from_codeset: impl IntoGStr,
99) -> Result<(Slice<u8>, usize), CvtError> {
100    assert!(str_.len() <= isize::MAX as usize);
101    let mut bytes_read = 0;
102    let mut bytes_written = 0;
103    let mut error = ptr::null_mut();
104    let result = to_codeset.run_with_gstr(|to_codeset| {
105        from_codeset.run_with_gstr(|from_codeset| unsafe {
106            ffi::g_convert(
107                str_.as_ptr(),
108                str_.len() as isize,
109                to_codeset.to_glib_none().0,
110                from_codeset.to_glib_none().0,
111                &mut bytes_read,
112                &mut bytes_written,
113                &mut error,
114            )
115        })
116    });
117    if result.is_null() {
118        Err(CvtError::new(unsafe { from_glib_full(error) }, bytes_read))
119    } else {
120        let slice = unsafe { Slice::from_glib_full_num(result, bytes_written as _) };
121        Ok((slice, bytes_read))
122    }
123}
124
125/// Converts a string from one character set to another, possibly
126/// including fallback sequences for characters not representable
127/// in the output. Note that it is not guaranteed that the specification
128/// for the fallback sequences in @fallback will be honored. Some
129/// systems may do an approximate conversion from @from_codeset
130/// to @to_codeset in their iconv() functions,
131/// in which case GLib will simply return that approximate conversion.
132///
133/// Note that you should use g_iconv() for streaming conversions.
134/// Despite the fact that @bytes_read can return information about partial
135/// characters, the g_convert_... functions are not generally suitable
136/// for streaming. If the underlying converter maintains internal state,
137/// then this won't be preserved across successive calls to g_convert(),
138/// g_convert_with_iconv() or g_convert_with_fallback(). (An example of
139/// this is the GNU C converter for CP1255 which does not emit a base
140/// character until it knows that the next character is not a mark that
141/// could combine with the base character.)
142/// ## `str`
143///
144///                the string to convert.
145/// ## `to_codeset`
146/// name of character set into which to convert @str
147/// ## `from_codeset`
148/// character set of @str.
149/// ## `fallback`
150/// UTF-8 string to use in place of characters not
151///                present in the target encoding. (The string must be
152///                representable in the target encoding).
153///                If [`None`], characters not in the target encoding will
154///                be represented as Unicode escapes \uxxxx or \Uxxxxyyyy.
155///
156/// # Returns
157///
158///
159///          If the conversion was successful, a newly allocated buffer
160///          containing the converted string, which must be freed with g_free().
161///          Otherwise [`None`] and @error will be set.
162///
163/// ## `bytes_read`
164/// location to store the number of bytes in
165///                the input string that were successfully converted, or [`None`].
166///                Even if the conversion was successful, this may be
167///                less than @len if there were partial characters
168///                at the end of the input.
169#[doc(alias = "g_convert_with_fallback")]
170pub fn convert_with_fallback(
171    str_: &[u8],
172    to_codeset: impl IntoGStr,
173    from_codeset: impl IntoGStr,
174    fallback: Option<impl IntoGStr>,
175) -> Result<(Slice<u8>, usize), CvtError> {
176    assert!(str_.len() <= isize::MAX as usize);
177    let mut bytes_read = 0;
178    let mut bytes_written = 0;
179    let mut error = ptr::null_mut();
180    let result = to_codeset.run_with_gstr(|to_codeset| {
181        from_codeset.run_with_gstr(|from_codeset| {
182            fallback.run_with_gstr(|fallback| unsafe {
183                ffi::g_convert_with_fallback(
184                    str_.as_ptr(),
185                    str_.len() as isize,
186                    to_codeset.to_glib_none().0,
187                    from_codeset.to_glib_none().0,
188                    fallback.to_glib_none().0,
189                    &mut bytes_read,
190                    &mut bytes_written,
191                    &mut error,
192                )
193            })
194        })
195    });
196    if result.is_null() {
197        Err(CvtError::new(unsafe { from_glib_full(error) }, bytes_read))
198    } else {
199        let slice = unsafe { Slice::from_glib_full_num(result, bytes_written as _) };
200        Ok((slice, bytes_read))
201    }
202}
203
204// rustdoc-stripper-ignore-next
205/// A wrapper for [`std::io::Error`] that can hold an offset into an input string.
206#[derive(Debug)]
207pub enum IConvError {
208    Error(io::Error),
209    WithOffset { source: io::Error, offset: usize },
210}
211
212impl std::error::Error for IConvError {
213    fn source(&self) -> Option<&(dyn std::error::Error + 'static)> {
214        match self {
215            IConvError::Error(err) => std::error::Error::source(err),
216            IConvError::WithOffset { source, .. } => Some(source),
217        }
218    }
219}
220
221impl fmt::Display for IConvError {
222    fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
223        match self {
224            IConvError::Error(err) => fmt::Display::fmt(err, fmt),
225            IConvError::WithOffset { source, offset } => write!(fmt, "{source} at offset {offset}"),
226        }
227    }
228}
229
230impl std::convert::From<io::Error> for IConvError {
231    fn from(err: io::Error) -> Self {
232        IConvError::Error(err)
233    }
234}
235
236/// The GIConv struct wraps an iconv() conversion descriptor. It contains
237/// private data and should only be accessed using the following functions.
238#[derive(Debug)]
239#[repr(transparent)]
240#[doc(alias = "GIConv")]
241pub struct IConv(ffi::GIConv);
242
243unsafe impl Send for IConv {}
244
245impl IConv {
246    /// Same as the standard UNIX routine iconv_open(), but
247    /// may be implemented via libiconv on UNIX flavors that lack
248    /// a native implementation.
249    ///
250    /// GLib provides g_convert() and g_locale_to_utf8() which are likely
251    /// more convenient than the raw iconv wrappers.
252    /// ## `to_codeset`
253    /// destination codeset
254    /// ## `from_codeset`
255    /// source codeset
256    ///
257    /// # Returns
258    ///
259    /// a "conversion descriptor", or (GIConv)-1 if
260    ///  opening the converter failed.
261    #[doc(alias = "g_iconv_open")]
262    #[allow(clippy::unnecessary_lazy_evaluations)]
263    pub fn new(to_codeset: impl IntoGStr, from_codeset: impl IntoGStr) -> Option<Self> {
264        let iconv = to_codeset.run_with_gstr(|to_codeset| {
265            from_codeset.run_with_gstr(|from_codeset| unsafe {
266                ffi::g_iconv_open(to_codeset.to_glib_none().0, from_codeset.to_glib_none().0)
267            })
268        });
269        (iconv.addr() as isize != -1).then(|| Self(iconv))
270    }
271    /// Converts a string from one character set to another.
272    ///
273    /// Note that you should use g_iconv() for streaming conversions.
274    /// Despite the fact that @bytes_read can return information about partial
275    /// characters, the g_convert_... functions are not generally suitable
276    /// for streaming. If the underlying converter maintains internal state,
277    /// then this won't be preserved across successive calls to g_convert(),
278    /// g_convert_with_iconv() or g_convert_with_fallback(). (An example of
279    /// this is the GNU C converter for CP1255 which does not emit a base
280    /// character until it knows that the next character is not a mark that
281    /// could combine with the base character.)
282    ///
283    /// Characters which are valid in the input character set, but which have no
284    /// representation in the output character set will result in a
285    /// [`ConvertError::IllegalSequence`][crate::ConvertError::IllegalSequence] error. This is in contrast to the iconv()
286    /// specification, which leaves this behaviour implementation defined. Note that
287    /// this is the same error code as is returned for an invalid byte sequence in
288    /// the input character set. To get defined behaviour for conversion of
289    /// unrepresentable characters, use g_convert_with_fallback().
290    /// ## `str`
291    ///
292    ///                 the string to convert.
293    /// ## `converter`
294    /// conversion descriptor from g_iconv_open()
295    ///
296    /// # Returns
297    ///
298    ///
299    ///               If the conversion was successful, a newly allocated buffer
300    ///               containing the converted string, which must be freed with
301    ///               g_free(). Otherwise [`None`] and @error will be set.
302    ///
303    /// ## `bytes_read`
304    /// location to store the number of bytes in
305    ///                 the input string that were successfully converted, or [`None`].
306    ///                 Even if the conversion was successful, this may be
307    ///                 less than @len if there were partial characters
308    ///                 at the end of the input. If the error
309    ///                 [`ConvertError::IllegalSequence`][crate::ConvertError::IllegalSequence] occurs, the value
310    ///                 stored will be the byte offset after the last valid
311    ///                 input sequence.
312    #[doc(alias = "g_convert_with_iconv")]
313    pub fn convert(&mut self, str_: &[u8]) -> Result<(Slice<u8>, usize), CvtError> {
314        assert!(str_.len() <= isize::MAX as usize);
315        let mut bytes_read = 0;
316        let mut bytes_written = 0;
317        let mut error = ptr::null_mut();
318        let result = unsafe {
319            ffi::g_convert_with_iconv(
320                str_.as_ptr(),
321                str_.len() as isize,
322                self.0,
323                &mut bytes_read,
324                &mut bytes_written,
325                &mut error,
326            )
327        };
328        if result.is_null() {
329            Err(CvtError::new(unsafe { from_glib_full(error) }, bytes_read))
330        } else {
331            let slice = unsafe { Slice::from_glib_full_num(result, bytes_written as _) };
332            Ok((slice, bytes_read))
333        }
334    }
335    /// Same as the standard UNIX routine iconv(), but
336    /// may be implemented via libiconv on UNIX flavors that lack
337    /// a native implementation.
338    ///
339    /// GLib provides g_convert() and g_locale_to_utf8() which are likely
340    /// more convenient than the raw iconv wrappers.
341    ///
342    /// Note that the behaviour of iconv() for characters which are valid in the
343    /// input character set, but which have no representation in the output character
344    /// set, is implementation defined. This function may return success (with a
345    /// positive number of non-reversible conversions as replacement characters were
346    /// used), or it may return -1 and set an error such as `EILSEQ`, in such a
347    /// situation.
348    ///
349    /// See [`iconv(3posix)`](man:iconv(3posix)) and [`iconv(3)`](man:iconv(3)) for more details about behavior when an
350    /// error occurs.
351    /// ## `converter`
352    /// conversion descriptor from g_iconv_open()
353    /// ## `inbuf`
354    /// bytes to convert
355    /// ## `outbuf`
356    /// converted output bytes
357    ///
358    /// # Returns
359    ///
360    /// count of non-reversible conversions, or -1 on error
361    ///
362    /// ## `inbytes_left`
363    /// inout parameter, bytes remaining to convert in @inbuf
364    ///
365    /// ## `outbytes_left`
366    /// inout parameter, bytes available to fill in @outbuf
367    #[doc(alias = "g_iconv")]
368    pub fn iconv(
369        &mut self,
370        inbuf: Option<&[u8]>,
371        outbuf: Option<&mut [std::mem::MaybeUninit<u8>]>,
372    ) -> Result<(usize, usize, usize), IConvError> {
373        let input_len = inbuf.as_ref().map(|b| b.len()).unwrap_or_default();
374        let mut inbytes_left = input_len;
375        let mut outbytes_left = outbuf.as_ref().map(|b| b.len()).unwrap_or_default();
376        let mut inbuf = inbuf
377            .map(|b| mut_override(b.as_ptr()) as *mut c_char)
378            .unwrap_or_else(ptr::null_mut);
379        let mut outbuf = outbuf
380            .map(|b| b.as_mut_ptr() as *mut c_char)
381            .unwrap_or_else(ptr::null_mut);
382        let conversions = unsafe {
383            ffi::g_iconv(
384                self.0,
385                &mut inbuf,
386                &mut inbytes_left,
387                &mut outbuf,
388                &mut outbytes_left,
389            )
390        };
391        if conversions as isize == -1 {
392            let err = io::Error::last_os_error();
393            let code = err.raw_os_error().unwrap();
394            if code == libc::EILSEQ || code == libc::EINVAL {
395                Err(IConvError::WithOffset {
396                    source: err,
397                    offset: input_len - inbytes_left,
398                })
399            } else {
400                Err(err.into())
401            }
402        } else {
403            Ok((conversions, inbytes_left, outbytes_left))
404        }
405    }
406}
407
408impl Drop for IConv {
409    #[inline]
410    fn drop(&mut self) {
411        unsafe {
412            ffi::g_iconv_close(self.0);
413        }
414    }
415}
416
417/// Determines the preferred character sets used for filenames.
418/// The first character set from the @charsets is the filename encoding, the
419/// subsequent character sets are used when trying to generate a displayable
420/// representation of a filename, see g_filename_display_name().
421///
422/// On Unix, the character sets are determined by consulting the
423/// environment variables `G_FILENAME_ENCODING` and `G_BROKEN_FILENAMES`.
424/// On Windows, the character set used in the GLib API is always UTF-8
425/// and said environment variables have no effect.
426///
427/// `G_FILENAME_ENCODING` may be set to a comma-separated list of
428/// character set names. The special token `@locale` is taken to mean the
429/// character set for the [current locale](running.html#locale).
430/// If `G_FILENAME_ENCODING` is not set, but `G_BROKEN_FILENAMES` is,
431/// the character set of the current locale is taken as the filename
432/// encoding. If neither environment variable  is set, UTF-8 is taken
433/// as the filename encoding, but the character set of the current locale
434/// is also put in the list of encodings.
435///
436/// The returned @charsets belong to GLib and must not be freed.
437///
438/// Note that on Unix, regardless of the locale character set or
439/// `G_FILENAME_ENCODING` value, the actual file names present
440/// on a system might be in any random encoding or just gibberish.
441///
442/// # Returns
443///
444/// [`true`] if the filename encoding is UTF-8.
445///
446/// ## `filename_charsets`
447///
448///    return location for the [`None`]-terminated list of encoding names
449#[doc(alias = "g_get_filename_charsets")]
450#[doc(alias = "get_filename_charsets")]
451pub fn filename_charsets() -> (bool, Vec<GString>) {
452    let mut filename_charsets = ptr::null_mut();
453    unsafe {
454        let is_utf8 = ffi::g_get_filename_charsets(&mut filename_charsets);
455        (
456            from_glib(is_utf8),
457            FromGlibPtrContainer::from_glib_none(filename_charsets),
458        )
459    }
460}
461
462/// Converts a string from UTF-8 to the encoding GLib uses for
463/// filenames. Note that on Windows GLib uses UTF-8 for filenames;
464/// on other platforms, this function indirectly depends on the
465/// [current locale](running.html#locale).
466///
467/// The input string shall not contain nul characters even if the @len
468/// argument is positive. A nul character found inside the string will result
469/// in error [`ConvertError::IllegalSequence`][crate::ConvertError::IllegalSequence]. If the filename encoding is
470/// not UTF-8 and the conversion output contains a nul character, the error
471/// [`ConvertError::EmbeddedNul`][crate::ConvertError::EmbeddedNul] is set and the function returns [`None`].
472/// ## `utf8string`
473/// a UTF-8 encoded string.
474/// ## `len`
475/// the length of the string, or -1 if the string is
476///                 nul-terminated.
477///
478/// # Returns
479///
480///
481///               The converted string, or [`None`] on an error.
482///
483/// ## `bytes_read`
484/// location to store the number of bytes in
485///                 the input string that were successfully converted, or [`None`].
486///                 Even if the conversion was successful, this may be
487///                 less than @len if there were partial characters
488///                 at the end of the input. If the error
489///                 [`ConvertError::IllegalSequence`][crate::ConvertError::IllegalSequence] occurs, the value
490///                 stored will be the byte offset after the last valid
491///                 input sequence.
492///
493/// ## `bytes_written`
494/// the number of bytes stored in
495///                 the output buffer (not including the terminating nul).
496#[doc(alias = "g_filename_from_utf8")]
497pub fn filename_from_utf8(utf8string: impl IntoGStr) -> Result<(PathBuf, usize), CvtError> {
498    let mut bytes_read = 0;
499    let mut bytes_written = std::mem::MaybeUninit::uninit();
500    let mut error = ptr::null_mut();
501    let ret = utf8string.run_with_gstr(|utf8string| {
502        assert!(utf8string.len() <= isize::MAX as usize);
503        let len = utf8string.len() as isize;
504        unsafe {
505            ffi::g_filename_from_utf8(
506                utf8string.to_glib_none().0,
507                len,
508                &mut bytes_read,
509                bytes_written.as_mut_ptr(),
510                &mut error,
511            )
512        }
513    });
514    if error.is_null() {
515        Ok(unsafe {
516            (
517                PathBuf::from_glib_full_num(ret, bytes_written.assume_init()),
518                bytes_read,
519            )
520        })
521    } else {
522        Err(unsafe { CvtError::new(from_glib_full(error), bytes_read) })
523    }
524}
525
526/// Converts a string which is in the encoding used by GLib for
527/// filenames into a UTF-8 string. Note that on Windows GLib uses UTF-8
528/// for filenames; on other platforms, this function indirectly depends on
529/// the [current locale](running.html#locale).
530///
531/// The input string shall not contain nul characters even if the @len
532/// argument is positive. A nul character found inside the string will result
533/// in error [`ConvertError::IllegalSequence`][crate::ConvertError::IllegalSequence].
534/// If the source encoding is not UTF-8 and the conversion output contains a
535/// nul character, the error [`ConvertError::EmbeddedNul`][crate::ConvertError::EmbeddedNul] is set and the
536/// function returns [`None`]. Use g_convert() to produce output that
537/// may contain embedded nul characters.
538/// ## `opsysstring`
539/// a string in the encoding for filenames
540/// ## `len`
541/// the length of the string, or -1 if the string is
542///                 nul-terminated (Note that some encodings may allow nul
543///                 bytes to occur inside strings. In that case, using -1
544///                 for the @len parameter is unsafe)
545///
546/// # Returns
547///
548/// The converted string, or [`None`] on an error.
549///
550/// ## `bytes_read`
551/// location to store the number of bytes in the
552///                 input string that were successfully converted, or [`None`].
553///                 Even if the conversion was successful, this may be
554///                 less than @len if there were partial characters
555///                 at the end of the input. If the error
556///                 [`ConvertError::IllegalSequence`][crate::ConvertError::IllegalSequence] occurs, the value
557///                 stored will be the byte offset after the last valid
558///                 input sequence.
559///
560/// ## `bytes_written`
561/// the number of bytes stored in the output
562///                 buffer (not including the terminating nul).
563#[doc(alias = "g_filename_to_utf8")]
564pub fn filename_to_utf8(
565    opsysstring: impl AsRef<std::path::Path>,
566) -> Result<(crate::GString, usize), CvtError> {
567    let path = opsysstring.as_ref().to_glib_none();
568    let mut bytes_read = 0;
569    let mut bytes_written = std::mem::MaybeUninit::uninit();
570    let mut error = ptr::null_mut();
571    let ret = unsafe {
572        ffi::g_filename_to_utf8(
573            path.0,
574            path.1.as_bytes().len() as isize,
575            &mut bytes_read,
576            bytes_written.as_mut_ptr(),
577            &mut error,
578        )
579    };
580    if error.is_null() {
581        Ok(unsafe {
582            (
583                GString::from_glib_full_num(ret, bytes_written.assume_init()),
584                bytes_read,
585            )
586        })
587    } else {
588        Err(unsafe { CvtError::new(from_glib_full(error), bytes_read) })
589    }
590}
591
592/// Converts a string from UTF-8 to the encoding used for strings by
593/// the C runtime (usually the same as that used by the operating
594/// system) in the [current locale](running.html#locale).
595/// On Windows this means the system codepage.
596///
597/// The input string shall not contain nul characters even if the @len
598/// argument is positive. A nul character found inside the string will result
599/// in error [`ConvertError::IllegalSequence`][crate::ConvertError::IllegalSequence]. Use g_convert() to convert
600/// input that may contain embedded nul characters.
601/// ## `utf8string`
602/// a UTF-8 encoded string
603/// ## `len`
604/// the length of the string, or -1 if the string is
605///                 nul-terminated.
606///
607/// # Returns
608///
609///
610///          A newly-allocated buffer containing the converted string,
611///          or [`None`] on an error, and error will be set.
612///
613/// ## `bytes_read`
614/// location to store the number of bytes in the
615///                 input string that were successfully converted, or [`None`].
616///                 Even if the conversion was successful, this may be
617///                 less than @len if there were partial characters
618///                 at the end of the input. If the error
619///                 [`ConvertError::IllegalSequence`][crate::ConvertError::IllegalSequence] occurs, the value
620///                 stored will be the byte offset after the last valid
621///                 input sequence.
622#[doc(alias = "g_locale_from_utf8")]
623pub fn locale_from_utf8(utf8string: impl IntoGStr) -> Result<(Slice<u8>, usize), CvtError> {
624    let mut bytes_read = 0;
625    let mut bytes_written = std::mem::MaybeUninit::uninit();
626    let mut error = ptr::null_mut();
627    let ret = utf8string.run_with_gstr(|utf8string| {
628        assert!(utf8string.len() <= isize::MAX as usize);
629        unsafe {
630            ffi::g_locale_from_utf8(
631                utf8string.as_ptr(),
632                utf8string.len() as isize,
633                &mut bytes_read,
634                bytes_written.as_mut_ptr(),
635                &mut error,
636            )
637        }
638    });
639    if error.is_null() {
640        Ok(unsafe {
641            (
642                Slice::from_glib_full_num(ret, bytes_written.assume_init() + 1),
643                bytes_read,
644            )
645        })
646    } else {
647        Err(unsafe { CvtError::new(from_glib_full(error), bytes_read) })
648    }
649}
650
651/// Converts a string which is in the encoding used for strings by
652/// the C runtime (usually the same as that used by the operating
653/// system) in the [current locale](running.html#locale) into a UTF-8 string.
654///
655/// If the source encoding is not UTF-8 and the conversion output contains a
656/// nul character, the error [`ConvertError::EmbeddedNul`][crate::ConvertError::EmbeddedNul] is set and the
657/// function returns [`None`].
658/// If the source encoding is UTF-8, an embedded nul character is treated with
659/// the [`ConvertError::IllegalSequence`][crate::ConvertError::IllegalSequence] error for backward compatibility with
660/// earlier versions of this library. Use g_convert() to produce output that
661/// may contain embedded nul characters.
662/// ## `opsysstring`
663/// a string in the
664///                 encoding of the current locale. On Windows
665///                 this means the system codepage.
666///
667/// # Returns
668///
669/// The converted string, or [`None`] on an error.
670///
671/// ## `bytes_read`
672/// location to store the number of bytes in the
673///                 input string that were successfully converted, or [`None`].
674///                 Even if the conversion was successful, this may be
675///                 less than @len if there were partial characters
676///                 at the end of the input. If the error
677///                 [`ConvertError::IllegalSequence`][crate::ConvertError::IllegalSequence] occurs, the value
678///                 stored will be the byte offset after the last valid
679///                 input sequence.
680///
681/// ## `bytes_written`
682/// the number of bytes stored in the output
683///                 buffer (not including the terminating nul).
684#[doc(alias = "g_locale_to_utf8")]
685pub fn locale_to_utf8(opsysstring: &[u8]) -> Result<(crate::GString, usize), CvtError> {
686    let len = opsysstring.len() as isize;
687    let mut bytes_read = 0;
688    let mut bytes_written = std::mem::MaybeUninit::uninit();
689    let mut error = ptr::null_mut();
690    let ret = unsafe {
691        ffi::g_locale_to_utf8(
692            opsysstring.to_glib_none().0,
693            len,
694            &mut bytes_read,
695            bytes_written.as_mut_ptr(),
696            &mut error,
697        )
698    };
699    if error.is_null() {
700        Ok(unsafe {
701            (
702                GString::from_glib_full_num(ret, bytes_written.assume_init()),
703                bytes_read,
704            )
705        })
706    } else {
707        Err(unsafe { CvtError::new(from_glib_full(error), bytes_read) })
708    }
709}
710
711#[doc(alias = "g_utf8_to_ucs4")]
712#[doc(alias = "g_utf8_to_ucs4_fast")]
713#[doc(alias = "utf8_to_ucs4")]
714pub fn utf8_to_utf32(str: impl AsRef<str>) -> Slice<char> {
715    unsafe {
716        let mut items_written = 0;
717
718        let str_as_utf32 = ffi::g_utf8_to_ucs4_fast(
719            str.as_ref().as_ptr().cast::<c_char>(),
720            str.as_ref().len() as _,
721            &mut items_written,
722        );
723
724        // NOTE: We assume that u32 and char have the same layout and trust that glib won't give us
725        //       invalid UTF-32 codepoints
726        Slice::from_glib_full_num(str_as_utf32, items_written as usize)
727    }
728}
729
730#[doc(alias = "g_ucs4_to_utf8")]
731#[doc(alias = "ucs4_to_utf8")]
732pub fn utf32_to_utf8(str: impl AsRef<[char]>) -> GString {
733    let mut items_read = 0;
734    let mut items_written = 0;
735    let mut error = ptr::null_mut();
736
737    unsafe {
738        let str_as_utf8 = ffi::g_ucs4_to_utf8(
739            str.as_ref().as_ptr().cast::<u32>(),
740            str.as_ref().len() as _,
741            &mut items_read,
742            &mut items_written,
743            &mut error,
744        );
745
746        debug_assert!(
747            error.is_null(),
748            "Rust `char` should always be convertible to UTF-8"
749        );
750
751        GString::from_glib_full_num(str_as_utf8, items_written as usize)
752    }
753}
754
755#[doc(alias = "g_utf8_casefold")]
756#[doc(alias = "utf8_casefold")]
757pub fn casefold(str: impl AsRef<str>) -> GString {
758    unsafe {
759        let str = ffi::g_utf8_casefold(str.as_ref().as_ptr().cast(), str.as_ref().len() as isize);
760
761        from_glib_full(str)
762    }
763}
764
765#[doc(alias = "g_utf8_normalize")]
766#[doc(alias = "utf8_normalize")]
767pub fn normalize(str: impl AsRef<str>, mode: NormalizeMode) -> GString {
768    unsafe {
769        let str = ffi::g_utf8_normalize(
770            str.as_ref().as_ptr().cast(),
771            str.as_ref().len() as isize,
772            mode.into_glib(),
773        );
774
775        from_glib_full(str)
776    }
777}
778
779#[cfg(test)]
780mod tests {
781    #[test]
782    fn convert_ascii() {
783        assert!(super::convert(b"Hello", "utf-8", "ascii").is_ok());
784        assert!(super::convert(b"He\xaallo", "utf-8", "ascii").is_err());
785        assert_eq!(
786            super::convert_with_fallback(b"H\xc3\xa9llo", "ascii", "utf-8", crate::NONE_STR)
787                .unwrap()
788                .0
789                .as_slice(),
790            b"H\\u00e9llo"
791        );
792        assert_eq!(
793            super::convert_with_fallback(b"H\xc3\xa9llo", "ascii", "utf-8", Some("_"))
794                .unwrap()
795                .0
796                .as_slice(),
797            b"H_llo"
798        );
799    }
800    #[test]
801    fn iconv() {
802        let mut conv = super::IConv::new("utf-8", "ascii").unwrap();
803        assert!(conv.convert(b"Hello").is_ok());
804        assert!(conv.convert(b"He\xaallo").is_err());
805        assert!(super::IConv::new("utf-8", "badcharset123456789").is_none());
806    }
807    #[test]
808    fn filename_charsets() {
809        let _ = super::filename_charsets();
810    }
811
812    #[test]
813    fn utf8_and_utf32() {
814        let utf32 = ['A', 'b', '🤔'];
815        let utf8 = super::utf32_to_utf8(utf32);
816        assert_eq!(utf8, "Ab🤔");
817
818        let utf8 = "🤔 ț";
819        let utf32 = super::utf8_to_utf32(utf8);
820        assert_eq!(utf32.as_slice(), &['🤔', ' ', 'ț']);
821    }
822}