glib/
convert.rs

1// Take a look at the license at the top of the repository in the LICENSE file.
2
3use std::{fmt, io, os::raw::c_char, path::PathBuf, ptr};
4
5use crate::{ffi, translate::*, ConvertError, Error, GString, NormalizeMode, Slice};
6
7// rustdoc-stripper-ignore-next
8/// A wrapper for [`ConvertError`](crate::ConvertError) that can hold an offset into the input
9/// string.
10#[derive(Debug)]
11pub enum CvtError {
12    Convert(Error),
13    IllegalSequence { source: Error, offset: usize },
14}
15
16impl std::error::Error for CvtError {
17    fn source(&self) -> ::core::option::Option<&(dyn std::error::Error + 'static)> {
18        match self {
19            CvtError::Convert(err) => std::error::Error::source(err),
20            CvtError::IllegalSequence { source, .. } => Some(source),
21        }
22    }
23}
24
25impl fmt::Display for CvtError {
26    fn fmt(&self, fmt: &mut fmt::Formatter) -> ::core::fmt::Result {
27        match self {
28            CvtError::Convert(err) => fmt::Display::fmt(err, fmt),
29            CvtError::IllegalSequence { source, offset } => {
30                write!(fmt, "{source} at offset {offset}")
31            }
32        }
33    }
34}
35
36impl std::convert::From<Error> for CvtError {
37    fn from(err: Error) -> Self {
38        CvtError::Convert(err)
39    }
40}
41
42impl CvtError {
43    #[inline]
44    fn new(err: Error, bytes_read: usize) -> Self {
45        if err.kind::<ConvertError>() == Some(ConvertError::IllegalSequence) {
46            Self::IllegalSequence {
47                source: err,
48                offset: bytes_read,
49            }
50        } else {
51            err.into()
52        }
53    }
54}
55
56/// Converts a string from one character set to another.
57///
58/// Note that you should use g_iconv() for streaming conversions.
59/// Despite the fact that @bytes_read can return information about partial
60/// characters, the g_convert_... functions are not generally suitable
61/// for streaming. If the underlying converter maintains internal state,
62/// then this won't be preserved across successive calls to g_convert(),
63/// g_convert_with_iconv() or g_convert_with_fallback(). (An example of
64/// this is the GNU C converter for CP1255 which does not emit a base
65/// character until it knows that the next character is not a mark that
66/// could combine with the base character.)
67///
68/// Using extensions such as "//TRANSLIT" may not work (or may not work
69/// well) on many platforms.  Consider using g_str_to_ascii() instead.
70/// ## `str`
71///
72///                 the string to convert.
73/// ## `to_codeset`
74/// name of character set into which to convert @str
75/// ## `from_codeset`
76/// character set of @str.
77///
78/// # Returns
79///
80///
81///          If the conversion was successful, a newly allocated buffer
82///          containing the converted string, which must be freed with g_free().
83///          Otherwise [`None`] and @error will be set.
84///
85/// ## `bytes_read`
86/// location to store the number of bytes in
87///                 the input string that were successfully converted, or [`None`].
88///                 Even if the conversion was successful, this may be
89///                 less than @len if there were partial characters
90///                 at the end of the input. If the error
91///                 [`ConvertError::IllegalSequence`][crate::ConvertError::IllegalSequence] occurs, the value
92///                 stored will be the byte offset after the last valid
93///                 input sequence.
94// rustdoc-stripper-ignore-next-stop
95/// Converts a string from one character set to another.
96///
97/// Note that you should use g_iconv() for streaming conversions.
98/// Despite the fact that @bytes_read can return information about partial
99/// characters, the g_convert_... functions are not generally suitable
100/// for streaming. If the underlying converter maintains internal state,
101/// then this won't be preserved across successive calls to g_convert(),
102/// g_convert_with_iconv() or g_convert_with_fallback(). (An example of
103/// this is the GNU C converter for CP1255 which does not emit a base
104/// character until it knows that the next character is not a mark that
105/// could combine with the base character.)
106///
107/// Using extensions such as "//TRANSLIT" may not work (or may not work
108/// well) on many platforms.  Consider using g_str_to_ascii() instead.
109/// ## `str`
110///
111///                 the string to convert.
112/// ## `to_codeset`
113/// name of character set into which to convert @str
114/// ## `from_codeset`
115/// character set of @str.
116///
117/// # Returns
118///
119///
120///          If the conversion was successful, a newly allocated buffer
121///          containing the converted string, which must be freed with g_free().
122///          Otherwise [`None`] and @error will be set.
123///
124/// ## `bytes_read`
125/// location to store the number of bytes in
126///                 the input string that were successfully converted, or [`None`].
127///                 Even if the conversion was successful, this may be
128///                 less than @len if there were partial characters
129///                 at the end of the input. If the error
130///                 [`ConvertError::IllegalSequence`][crate::ConvertError::IllegalSequence] occurs, the value
131///                 stored will be the byte offset after the last valid
132///                 input sequence.
133#[doc(alias = "g_convert")]
134pub fn convert(
135    str_: &[u8],
136    to_codeset: impl IntoGStr,
137    from_codeset: impl IntoGStr,
138) -> Result<(Slice<u8>, usize), CvtError> {
139    assert!(str_.len() <= isize::MAX as usize);
140    let mut bytes_read = 0;
141    let mut bytes_written = 0;
142    let mut error = ptr::null_mut();
143    let result = to_codeset.run_with_gstr(|to_codeset| {
144        from_codeset.run_with_gstr(|from_codeset| unsafe {
145            ffi::g_convert(
146                str_.as_ptr(),
147                str_.len() as isize,
148                to_codeset.to_glib_none().0,
149                from_codeset.to_glib_none().0,
150                &mut bytes_read,
151                &mut bytes_written,
152                &mut error,
153            )
154        })
155    });
156    if result.is_null() {
157        Err(CvtError::new(unsafe { from_glib_full(error) }, bytes_read))
158    } else {
159        let slice = unsafe { Slice::from_glib_full_num(result, bytes_written as _) };
160        Ok((slice, bytes_read))
161    }
162}
163
164/// Converts a string from one character set to another, possibly
165/// including fallback sequences for characters not representable
166/// in the output. Note that it is not guaranteed that the specification
167/// for the fallback sequences in @fallback will be honored. Some
168/// systems may do an approximate conversion from @from_codeset
169/// to @to_codeset in their iconv() functions,
170/// in which case GLib will simply return that approximate conversion.
171///
172/// Note that you should use g_iconv() for streaming conversions.
173/// Despite the fact that @bytes_read can return information about partial
174/// characters, the g_convert_... functions are not generally suitable
175/// for streaming. If the underlying converter maintains internal state,
176/// then this won't be preserved across successive calls to g_convert(),
177/// g_convert_with_iconv() or g_convert_with_fallback(). (An example of
178/// this is the GNU C converter for CP1255 which does not emit a base
179/// character until it knows that the next character is not a mark that
180/// could combine with the base character.)
181/// ## `str`
182///
183///                the string to convert.
184/// ## `to_codeset`
185/// name of character set into which to convert @str
186/// ## `from_codeset`
187/// character set of @str.
188/// ## `fallback`
189/// UTF-8 string to use in place of characters not
190///                present in the target encoding. (The string must be
191///                representable in the target encoding).
192///                If [`None`], characters not in the target encoding will
193///                be represented as Unicode escapes \uxxxx or \Uxxxxyyyy.
194///
195/// # Returns
196///
197///
198///          If the conversion was successful, a newly allocated buffer
199///          containing the converted string, which must be freed with g_free().
200///          Otherwise [`None`] and @error will be set.
201///
202/// ## `bytes_read`
203/// location to store the number of bytes in
204///                the input string that were successfully converted, or [`None`].
205///                Even if the conversion was successful, this may be
206///                less than @len if there were partial characters
207///                at the end of the input.
208// rustdoc-stripper-ignore-next-stop
209/// Converts a string from one character set to another, possibly
210/// including fallback sequences for characters not representable
211/// in the output. Note that it is not guaranteed that the specification
212/// for the fallback sequences in @fallback will be honored. Some
213/// systems may do an approximate conversion from @from_codeset
214/// to @to_codeset in their iconv() functions,
215/// in which case GLib will simply return that approximate conversion.
216///
217/// Note that you should use g_iconv() for streaming conversions.
218/// Despite the fact that @bytes_read can return information about partial
219/// characters, the g_convert_... functions are not generally suitable
220/// for streaming. If the underlying converter maintains internal state,
221/// then this won't be preserved across successive calls to g_convert(),
222/// g_convert_with_iconv() or g_convert_with_fallback(). (An example of
223/// this is the GNU C converter for CP1255 which does not emit a base
224/// character until it knows that the next character is not a mark that
225/// could combine with the base character.)
226/// ## `str`
227///
228///                the string to convert.
229/// ## `to_codeset`
230/// name of character set into which to convert @str
231/// ## `from_codeset`
232/// character set of @str.
233/// ## `fallback`
234/// UTF-8 string to use in place of characters not
235///                present in the target encoding. (The string must be
236///                representable in the target encoding).
237///                If [`None`], characters not in the target encoding will
238///                be represented as Unicode escapes \uxxxx or \Uxxxxyyyy.
239///
240/// # Returns
241///
242///
243///          If the conversion was successful, a newly allocated buffer
244///          containing the converted string, which must be freed with g_free().
245///          Otherwise [`None`] and @error will be set.
246///
247/// ## `bytes_read`
248/// location to store the number of bytes in
249///                the input string that were successfully converted, or [`None`].
250///                Even if the conversion was successful, this may be
251///                less than @len if there were partial characters
252///                at the end of the input.
253#[doc(alias = "g_convert_with_fallback")]
254pub fn convert_with_fallback(
255    str_: &[u8],
256    to_codeset: impl IntoGStr,
257    from_codeset: impl IntoGStr,
258    fallback: Option<impl IntoGStr>,
259) -> Result<(Slice<u8>, usize), CvtError> {
260    assert!(str_.len() <= isize::MAX as usize);
261    let mut bytes_read = 0;
262    let mut bytes_written = 0;
263    let mut error = ptr::null_mut();
264    let result = to_codeset.run_with_gstr(|to_codeset| {
265        from_codeset.run_with_gstr(|from_codeset| {
266            fallback.run_with_gstr(|fallback| unsafe {
267                ffi::g_convert_with_fallback(
268                    str_.as_ptr(),
269                    str_.len() as isize,
270                    to_codeset.to_glib_none().0,
271                    from_codeset.to_glib_none().0,
272                    fallback.to_glib_none().0,
273                    &mut bytes_read,
274                    &mut bytes_written,
275                    &mut error,
276                )
277            })
278        })
279    });
280    if result.is_null() {
281        Err(CvtError::new(unsafe { from_glib_full(error) }, bytes_read))
282    } else {
283        let slice = unsafe { Slice::from_glib_full_num(result, bytes_written as _) };
284        Ok((slice, bytes_read))
285    }
286}
287
288// rustdoc-stripper-ignore-next
289/// A wrapper for [`std::io::Error`] that can hold an offset into an input string.
290#[derive(Debug)]
291pub enum IConvError {
292    Error(io::Error),
293    WithOffset { source: io::Error, offset: usize },
294}
295
296impl std::error::Error for IConvError {
297    fn source(&self) -> Option<&(dyn std::error::Error + 'static)> {
298        match self {
299            IConvError::Error(err) => std::error::Error::source(err),
300            IConvError::WithOffset { source, .. } => Some(source),
301        }
302    }
303}
304
305impl fmt::Display for IConvError {
306    fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
307        match self {
308            IConvError::Error(err) => fmt::Display::fmt(err, fmt),
309            IConvError::WithOffset { source, offset } => write!(fmt, "{source} at offset {offset}"),
310        }
311    }
312}
313
314impl std::convert::From<io::Error> for IConvError {
315    fn from(err: io::Error) -> Self {
316        IConvError::Error(err)
317    }
318}
319
320/// The GIConv struct wraps an iconv() conversion descriptor. It contains
321/// private data and should only be accessed using the following functions.
322#[derive(Debug)]
323#[repr(transparent)]
324#[doc(alias = "GIConv")]
325pub struct IConv(ffi::GIConv);
326
327unsafe impl Send for IConv {}
328
329impl IConv {
330    /// Same as the standard UNIX routine iconv_open(), but
331    /// may be implemented via libiconv on UNIX flavors that lack
332    /// a native implementation.
333    ///
334    /// GLib provides g_convert() and g_locale_to_utf8() which are likely
335    /// more convenient than the raw iconv wrappers.
336    /// ## `to_codeset`
337    /// destination codeset
338    /// ## `from_codeset`
339    /// source codeset
340    ///
341    /// # Returns
342    ///
343    /// a "conversion descriptor", or (GIConv)-1 if
344    ///  opening the converter failed.
345    #[doc(alias = "g_iconv_open")]
346    #[allow(clippy::unnecessary_lazy_evaluations)]
347    pub fn new(to_codeset: impl IntoGStr, from_codeset: impl IntoGStr) -> Option<Self> {
348        let iconv = to_codeset.run_with_gstr(|to_codeset| {
349            from_codeset.run_with_gstr(|from_codeset| unsafe {
350                ffi::g_iconv_open(to_codeset.to_glib_none().0, from_codeset.to_glib_none().0)
351            })
352        });
353        (iconv as isize != -1).then(|| Self(iconv))
354    }
355    /// Converts a string from one character set to another.
356    ///
357    /// Note that you should use g_iconv() for streaming conversions.
358    /// Despite the fact that @bytes_read can return information about partial
359    /// characters, the g_convert_... functions are not generally suitable
360    /// for streaming. If the underlying converter maintains internal state,
361    /// then this won't be preserved across successive calls to g_convert(),
362    /// g_convert_with_iconv() or g_convert_with_fallback(). (An example of
363    /// this is the GNU C converter for CP1255 which does not emit a base
364    /// character until it knows that the next character is not a mark that
365    /// could combine with the base character.)
366    ///
367    /// Characters which are valid in the input character set, but which have no
368    /// representation in the output character set will result in a
369    /// [`ConvertError::IllegalSequence`][crate::ConvertError::IllegalSequence] error. This is in contrast to the iconv()
370    /// specification, which leaves this behaviour implementation defined. Note that
371    /// this is the same error code as is returned for an invalid byte sequence in
372    /// the input character set. To get defined behaviour for conversion of
373    /// unrepresentable characters, use g_convert_with_fallback().
374    /// ## `str`
375    ///
376    ///                 the string to convert.
377    /// ## `converter`
378    /// conversion descriptor from g_iconv_open()
379    ///
380    /// # Returns
381    ///
382    ///
383    ///               If the conversion was successful, a newly allocated buffer
384    ///               containing the converted string, which must be freed with
385    ///               g_free(). Otherwise [`None`] and @error will be set.
386    ///
387    /// ## `bytes_read`
388    /// location to store the number of bytes in
389    ///                 the input string that were successfully converted, or [`None`].
390    ///                 Even if the conversion was successful, this may be
391    ///                 less than @len if there were partial characters
392    ///                 at the end of the input. If the error
393    ///                 [`ConvertError::IllegalSequence`][crate::ConvertError::IllegalSequence] occurs, the value
394    ///                 stored will be the byte offset after the last valid
395    ///                 input sequence.
396    // rustdoc-stripper-ignore-next-stop
397    /// Converts a string from one character set to another.
398    ///
399    /// Note that you should use g_iconv() for streaming conversions.
400    /// Despite the fact that @bytes_read can return information about partial
401    /// characters, the g_convert_... functions are not generally suitable
402    /// for streaming. If the underlying converter maintains internal state,
403    /// then this won't be preserved across successive calls to g_convert(),
404    /// g_convert_with_iconv() or g_convert_with_fallback(). (An example of
405    /// this is the GNU C converter for CP1255 which does not emit a base
406    /// character until it knows that the next character is not a mark that
407    /// could combine with the base character.)
408    ///
409    /// Characters which are valid in the input character set, but which have no
410    /// representation in the output character set will result in a
411    /// [`ConvertError::IllegalSequence`][crate::ConvertError::IllegalSequence] error. This is in contrast to the iconv()
412    /// specification, which leaves this behaviour implementation defined. Note that
413    /// this is the same error code as is returned for an invalid byte sequence in
414    /// the input character set. To get defined behaviour for conversion of
415    /// unrepresentable characters, use g_convert_with_fallback().
416    /// ## `str`
417    ///
418    ///                 the string to convert.
419    /// ## `converter`
420    /// conversion descriptor from g_iconv_open()
421    ///
422    /// # Returns
423    ///
424    ///
425    ///               If the conversion was successful, a newly allocated buffer
426    ///               containing the converted string, which must be freed with
427    ///               g_free(). Otherwise [`None`] and @error will be set.
428    ///
429    /// ## `bytes_read`
430    /// location to store the number of bytes in
431    ///                 the input string that were successfully converted, or [`None`].
432    ///                 Even if the conversion was successful, this may be
433    ///                 less than @len if there were partial characters
434    ///                 at the end of the input. If the error
435    ///                 [`ConvertError::IllegalSequence`][crate::ConvertError::IllegalSequence] occurs, the value
436    ///                 stored will be the byte offset after the last valid
437    ///                 input sequence.
438    #[doc(alias = "g_convert_with_iconv")]
439    pub fn convert(&mut self, str_: &[u8]) -> Result<(Slice<u8>, usize), CvtError> {
440        assert!(str_.len() <= isize::MAX as usize);
441        let mut bytes_read = 0;
442        let mut bytes_written = 0;
443        let mut error = ptr::null_mut();
444        let result = unsafe {
445            ffi::g_convert_with_iconv(
446                str_.as_ptr(),
447                str_.len() as isize,
448                self.0,
449                &mut bytes_read,
450                &mut bytes_written,
451                &mut error,
452            )
453        };
454        if result.is_null() {
455            Err(CvtError::new(unsafe { from_glib_full(error) }, bytes_read))
456        } else {
457            let slice = unsafe { Slice::from_glib_full_num(result, bytes_written as _) };
458            Ok((slice, bytes_read))
459        }
460    }
461    /// Same as the standard UNIX routine iconv(), but
462    /// may be implemented via libiconv on UNIX flavors that lack
463    /// a native implementation.
464    ///
465    /// GLib provides g_convert() and g_locale_to_utf8() which are likely
466    /// more convenient than the raw iconv wrappers.
467    ///
468    /// Note that the behaviour of iconv() for characters which are valid in the
469    /// input character set, but which have no representation in the output character
470    /// set, is implementation defined. This function may return success (with a
471    /// positive number of non-reversible conversions as replacement characters were
472    /// used), or it may return -1 and set an error such as `EILSEQ`, in such a
473    /// situation.
474    ///
475    /// See [`iconv(3posix)`](man:iconv(3posix)) and [`iconv(3)`](man:iconv(3)) for more details about behavior when an
476    /// error occurs.
477    /// ## `converter`
478    /// conversion descriptor from g_iconv_open()
479    /// ## `inbuf`
480    /// bytes to convert
481    /// ## `inbytes_left`
482    /// inout parameter, bytes remaining to convert in @inbuf
483    /// ## `outbuf`
484    /// converted output bytes
485    /// ## `outbytes_left`
486    /// inout parameter, bytes available to fill in @outbuf
487    ///
488    /// # Returns
489    ///
490    /// count of non-reversible conversions, or -1 on error
491    // rustdoc-stripper-ignore-next-stop
492    /// Same as the standard UNIX routine iconv(), but
493    /// may be implemented via libiconv on UNIX flavors that lack
494    /// a native implementation.
495    ///
496    /// GLib provides g_convert() and g_locale_to_utf8() which are likely
497    /// more convenient than the raw iconv wrappers.
498    ///
499    /// Note that the behaviour of iconv() for characters which are valid in the
500    /// input character set, but which have no representation in the output character
501    /// set, is implementation defined. This function may return success (with a
502    /// positive number of non-reversible conversions as replacement characters were
503    /// used), or it may return -1 and set an error such as `EILSEQ`, in such a
504    /// situation.
505    ///
506    /// See [`iconv(3posix)`](man:iconv(3posix)) and [`iconv(3)`](man:iconv(3)) for more details about behavior when an
507    /// error occurs.
508    /// ## `converter`
509    /// conversion descriptor from g_iconv_open()
510    /// ## `inbuf`
511    /// bytes to convert
512    /// ## `inbytes_left`
513    /// inout parameter, bytes remaining to convert in @inbuf
514    /// ## `outbuf`
515    /// converted output bytes
516    /// ## `outbytes_left`
517    /// inout parameter, bytes available to fill in @outbuf
518    ///
519    /// # Returns
520    ///
521    /// count of non-reversible conversions, or -1 on error
522    #[doc(alias = "g_iconv")]
523    pub fn iconv(
524        &mut self,
525        inbuf: Option<&[u8]>,
526        outbuf: Option<&mut [std::mem::MaybeUninit<u8>]>,
527    ) -> Result<(usize, usize, usize), IConvError> {
528        let input_len = inbuf.as_ref().map(|b| b.len()).unwrap_or_default();
529        let mut inbytes_left = input_len;
530        let mut outbytes_left = outbuf.as_ref().map(|b| b.len()).unwrap_or_default();
531        let mut inbuf = inbuf
532            .map(|b| mut_override(b.as_ptr()) as *mut c_char)
533            .unwrap_or_else(ptr::null_mut);
534        let mut outbuf = outbuf
535            .map(|b| b.as_mut_ptr() as *mut c_char)
536            .unwrap_or_else(ptr::null_mut);
537        let conversions = unsafe {
538            ffi::g_iconv(
539                self.0,
540                &mut inbuf,
541                &mut inbytes_left,
542                &mut outbuf,
543                &mut outbytes_left,
544            )
545        };
546        if conversions as isize == -1 {
547            let err = io::Error::last_os_error();
548            let code = err.raw_os_error().unwrap();
549            if code == libc::EILSEQ || code == libc::EINVAL {
550                Err(IConvError::WithOffset {
551                    source: err,
552                    offset: input_len - inbytes_left,
553                })
554            } else {
555                Err(err.into())
556            }
557        } else {
558            Ok((conversions, inbytes_left, outbytes_left))
559        }
560    }
561}
562
563impl Drop for IConv {
564    #[inline]
565    fn drop(&mut self) {
566        unsafe {
567            ffi::g_iconv_close(self.0);
568        }
569    }
570}
571
572/// Determines the preferred character sets used for filenames.
573/// The first character set from the @charsets is the filename encoding, the
574/// subsequent character sets are used when trying to generate a displayable
575/// representation of a filename, see g_filename_display_name().
576///
577/// On Unix, the character sets are determined by consulting the
578/// environment variables `G_FILENAME_ENCODING` and `G_BROKEN_FILENAMES`.
579/// On Windows, the character set used in the GLib API is always UTF-8
580/// and said environment variables have no effect.
581///
582/// `G_FILENAME_ENCODING` may be set to a comma-separated list of
583/// character set names. The special token `@locale` is taken to mean the
584/// character set for the [current locale](running.html#locale).
585/// If `G_FILENAME_ENCODING` is not set, but `G_BROKEN_FILENAMES` is,
586/// the character set of the current locale is taken as the filename
587/// encoding. If neither environment variable  is set, UTF-8 is taken
588/// as the filename encoding, but the character set of the current locale
589/// is also put in the list of encodings.
590///
591/// The returned @charsets belong to GLib and must not be freed.
592///
593/// Note that on Unix, regardless of the locale character set or
594/// `G_FILENAME_ENCODING` value, the actual file names present
595/// on a system might be in any random encoding or just gibberish.
596///
597/// # Returns
598///
599/// [`true`] if the filename encoding is UTF-8.
600///
601/// ## `filename_charsets`
602///
603///    return location for the [`None`]-terminated list of encoding names
604// rustdoc-stripper-ignore-next-stop
605/// Determines the preferred character sets used for filenames.
606/// The first character set from the @charsets is the filename encoding, the
607/// subsequent character sets are used when trying to generate a displayable
608/// representation of a filename, see g_filename_display_name().
609///
610/// On Unix, the character sets are determined by consulting the
611/// environment variables `G_FILENAME_ENCODING` and `G_BROKEN_FILENAMES`.
612/// On Windows, the character set used in the GLib API is always UTF-8
613/// and said environment variables have no effect.
614///
615/// `G_FILENAME_ENCODING` may be set to a comma-separated list of
616/// character set names. The special token `@locale` is taken to mean the
617/// character set for the [current locale](running.html#locale).
618/// If `G_FILENAME_ENCODING` is not set, but `G_BROKEN_FILENAMES` is,
619/// the character set of the current locale is taken as the filename
620/// encoding. If neither environment variable  is set, UTF-8 is taken
621/// as the filename encoding, but the character set of the current locale
622/// is also put in the list of encodings.
623///
624/// The returned @charsets belong to GLib and must not be freed.
625///
626/// Note that on Unix, regardless of the locale character set or
627/// `G_FILENAME_ENCODING` value, the actual file names present
628/// on a system might be in any random encoding or just gibberish.
629///
630/// # Returns
631///
632/// [`true`] if the filename encoding is UTF-8.
633///
634/// ## `filename_charsets`
635///
636///    return location for the [`None`]-terminated list of encoding names
637#[doc(alias = "g_get_filename_charsets")]
638#[doc(alias = "get_filename_charsets")]
639pub fn filename_charsets() -> (bool, Vec<GString>) {
640    let mut filename_charsets = ptr::null_mut();
641    unsafe {
642        let is_utf8 = ffi::g_get_filename_charsets(&mut filename_charsets);
643        (
644            from_glib(is_utf8),
645            FromGlibPtrContainer::from_glib_none(filename_charsets),
646        )
647    }
648}
649
650/// Converts a string from UTF-8 to the encoding GLib uses for
651/// filenames. Note that on Windows GLib uses UTF-8 for filenames;
652/// on other platforms, this function indirectly depends on the
653/// [current locale](running.html#locale).
654///
655/// The input string shall not contain nul characters even if the @len
656/// argument is positive. A nul character found inside the string will result
657/// in error [`ConvertError::IllegalSequence`][crate::ConvertError::IllegalSequence]. If the filename encoding is
658/// not UTF-8 and the conversion output contains a nul character, the error
659/// [`ConvertError::EmbeddedNul`][crate::ConvertError::EmbeddedNul] is set and the function returns [`None`].
660/// ## `utf8string`
661/// a UTF-8 encoded string.
662/// ## `len`
663/// the length of the string, or -1 if the string is
664///                 nul-terminated.
665///
666/// # Returns
667///
668///
669///               The converted string, or [`None`] on an error.
670///
671/// ## `bytes_read`
672/// location to store the number of bytes in
673///                 the input string that were successfully converted, or [`None`].
674///                 Even if the conversion was successful, this may be
675///                 less than @len if there were partial characters
676///                 at the end of the input. If the error
677///                 [`ConvertError::IllegalSequence`][crate::ConvertError::IllegalSequence] occurs, the value
678///                 stored will be the byte offset after the last valid
679///                 input sequence.
680///
681/// ## `bytes_written`
682/// the number of bytes stored in
683///                 the output buffer (not including the terminating nul).
684// rustdoc-stripper-ignore-next-stop
685/// Converts a string from UTF-8 to the encoding GLib uses for
686/// filenames. Note that on Windows GLib uses UTF-8 for filenames;
687/// on other platforms, this function indirectly depends on the
688/// [current locale](running.html#locale).
689///
690/// The input string shall not contain nul characters even if the @len
691/// argument is positive. A nul character found inside the string will result
692/// in error [`ConvertError::IllegalSequence`][crate::ConvertError::IllegalSequence]. If the filename encoding is
693/// not UTF-8 and the conversion output contains a nul character, the error
694/// [`ConvertError::EmbeddedNul`][crate::ConvertError::EmbeddedNul] is set and the function returns [`None`].
695/// ## `utf8string`
696/// a UTF-8 encoded string.
697/// ## `len`
698/// the length of the string, or -1 if the string is
699///                 nul-terminated.
700///
701/// # Returns
702///
703///
704///               The converted string, or [`None`] on an error.
705///
706/// ## `bytes_read`
707/// location to store the number of bytes in
708///                 the input string that were successfully converted, or [`None`].
709///                 Even if the conversion was successful, this may be
710///                 less than @len if there were partial characters
711///                 at the end of the input. If the error
712///                 [`ConvertError::IllegalSequence`][crate::ConvertError::IllegalSequence] occurs, the value
713///                 stored will be the byte offset after the last valid
714///                 input sequence.
715///
716/// ## `bytes_written`
717/// the number of bytes stored in
718///                 the output buffer (not including the terminating nul).
719#[doc(alias = "g_filename_from_utf8")]
720pub fn filename_from_utf8(utf8string: impl IntoGStr) -> Result<(PathBuf, usize), CvtError> {
721    let mut bytes_read = 0;
722    let mut bytes_written = std::mem::MaybeUninit::uninit();
723    let mut error = ptr::null_mut();
724    let ret = utf8string.run_with_gstr(|utf8string| {
725        assert!(utf8string.len() <= isize::MAX as usize);
726        let len = utf8string.len() as isize;
727        unsafe {
728            ffi::g_filename_from_utf8(
729                utf8string.to_glib_none().0,
730                len,
731                &mut bytes_read,
732                bytes_written.as_mut_ptr(),
733                &mut error,
734            )
735        }
736    });
737    if error.is_null() {
738        Ok(unsafe {
739            (
740                PathBuf::from_glib_full_num(ret, bytes_written.assume_init()),
741                bytes_read,
742            )
743        })
744    } else {
745        Err(unsafe { CvtError::new(from_glib_full(error), bytes_read) })
746    }
747}
748
749/// Converts a string which is in the encoding used by GLib for
750/// filenames into a UTF-8 string. Note that on Windows GLib uses UTF-8
751/// for filenames; on other platforms, this function indirectly depends on
752/// the [current locale](running.html#locale).
753///
754/// The input string shall not contain nul characters even if the @len
755/// argument is positive. A nul character found inside the string will result
756/// in error [`ConvertError::IllegalSequence`][crate::ConvertError::IllegalSequence].
757/// If the source encoding is not UTF-8 and the conversion output contains a
758/// nul character, the error [`ConvertError::EmbeddedNul`][crate::ConvertError::EmbeddedNul] is set and the
759/// function returns [`None`]. Use g_convert() to produce output that
760/// may contain embedded nul characters.
761/// ## `opsysstring`
762/// a string in the encoding for filenames
763/// ## `len`
764/// the length of the string, or -1 if the string is
765///                 nul-terminated (Note that some encodings may allow nul
766///                 bytes to occur inside strings. In that case, using -1
767///                 for the @len parameter is unsafe)
768///
769/// # Returns
770///
771/// The converted string, or [`None`] on an error.
772///
773/// ## `bytes_read`
774/// location to store the number of bytes in the
775///                 input string that were successfully converted, or [`None`].
776///                 Even if the conversion was successful, this may be
777///                 less than @len if there were partial characters
778///                 at the end of the input. If the error
779///                 [`ConvertError::IllegalSequence`][crate::ConvertError::IllegalSequence] occurs, the value
780///                 stored will be the byte offset after the last valid
781///                 input sequence.
782///
783/// ## `bytes_written`
784/// the number of bytes stored in the output
785///                 buffer (not including the terminating nul).
786// rustdoc-stripper-ignore-next-stop
787/// Converts a string which is in the encoding used by GLib for
788/// filenames into a UTF-8 string. Note that on Windows GLib uses UTF-8
789/// for filenames; on other platforms, this function indirectly depends on
790/// the [current locale](running.html#locale).
791///
792/// The input string shall not contain nul characters even if the @len
793/// argument is positive. A nul character found inside the string will result
794/// in error [`ConvertError::IllegalSequence`][crate::ConvertError::IllegalSequence].
795/// If the source encoding is not UTF-8 and the conversion output contains a
796/// nul character, the error [`ConvertError::EmbeddedNul`][crate::ConvertError::EmbeddedNul] is set and the
797/// function returns [`None`]. Use g_convert() to produce output that
798/// may contain embedded nul characters.
799/// ## `opsysstring`
800/// a string in the encoding for filenames
801/// ## `len`
802/// the length of the string, or -1 if the string is
803///                 nul-terminated (Note that some encodings may allow nul
804///                 bytes to occur inside strings. In that case, using -1
805///                 for the @len parameter is unsafe)
806///
807/// # Returns
808///
809/// The converted string, or [`None`] on an error.
810///
811/// ## `bytes_read`
812/// location to store the number of bytes in the
813///                 input string that were successfully converted, or [`None`].
814///                 Even if the conversion was successful, this may be
815///                 less than @len if there were partial characters
816///                 at the end of the input. If the error
817///                 [`ConvertError::IllegalSequence`][crate::ConvertError::IllegalSequence] occurs, the value
818///                 stored will be the byte offset after the last valid
819///                 input sequence.
820///
821/// ## `bytes_written`
822/// the number of bytes stored in the output
823///                 buffer (not including the terminating nul).
824#[doc(alias = "g_filename_to_utf8")]
825pub fn filename_to_utf8(
826    opsysstring: impl AsRef<std::path::Path>,
827) -> Result<(crate::GString, usize), CvtError> {
828    let path = opsysstring.as_ref().to_glib_none();
829    let mut bytes_read = 0;
830    let mut bytes_written = std::mem::MaybeUninit::uninit();
831    let mut error = ptr::null_mut();
832    let ret = unsafe {
833        ffi::g_filename_to_utf8(
834            path.0,
835            path.1.as_bytes().len() as isize,
836            &mut bytes_read,
837            bytes_written.as_mut_ptr(),
838            &mut error,
839        )
840    };
841    if error.is_null() {
842        Ok(unsafe {
843            (
844                GString::from_glib_full_num(ret, bytes_written.assume_init()),
845                bytes_read,
846            )
847        })
848    } else {
849        Err(unsafe { CvtError::new(from_glib_full(error), bytes_read) })
850    }
851}
852
853/// Converts a string from UTF-8 to the encoding used for strings by
854/// the C runtime (usually the same as that used by the operating
855/// system) in the [current locale](running.html#locale).
856/// On Windows this means the system codepage.
857///
858/// The input string shall not contain nul characters even if the @len
859/// argument is positive. A nul character found inside the string will result
860/// in error [`ConvertError::IllegalSequence`][crate::ConvertError::IllegalSequence]. Use g_convert() to convert
861/// input that may contain embedded nul characters.
862/// ## `utf8string`
863/// a UTF-8 encoded string
864/// ## `len`
865/// the length of the string, or -1 if the string is
866///                 nul-terminated.
867///
868/// # Returns
869///
870///
871///          A newly-allocated buffer containing the converted string,
872///          or [`None`] on an error, and error will be set.
873///
874/// ## `bytes_read`
875/// location to store the number of bytes in the
876///                 input string that were successfully converted, or [`None`].
877///                 Even if the conversion was successful, this may be
878///                 less than @len if there were partial characters
879///                 at the end of the input. If the error
880///                 [`ConvertError::IllegalSequence`][crate::ConvertError::IllegalSequence] occurs, the value
881///                 stored will be the byte offset after the last valid
882///                 input sequence.
883// rustdoc-stripper-ignore-next-stop
884/// Converts a string from UTF-8 to the encoding used for strings by
885/// the C runtime (usually the same as that used by the operating
886/// system) in the [current locale](running.html#locale).
887/// On Windows this means the system codepage.
888///
889/// The input string shall not contain nul characters even if the @len
890/// argument is positive. A nul character found inside the string will result
891/// in error [`ConvertError::IllegalSequence`][crate::ConvertError::IllegalSequence]. Use g_convert() to convert
892/// input that may contain embedded nul characters.
893/// ## `utf8string`
894/// a UTF-8 encoded string
895/// ## `len`
896/// the length of the string, or -1 if the string is
897///                 nul-terminated.
898///
899/// # Returns
900///
901///
902///          A newly-allocated buffer containing the converted string,
903///          or [`None`] on an error, and error will be set.
904///
905/// ## `bytes_read`
906/// location to store the number of bytes in the
907///                 input string that were successfully converted, or [`None`].
908///                 Even if the conversion was successful, this may be
909///                 less than @len if there were partial characters
910///                 at the end of the input. If the error
911///                 [`ConvertError::IllegalSequence`][crate::ConvertError::IllegalSequence] occurs, the value
912///                 stored will be the byte offset after the last valid
913///                 input sequence.
914#[doc(alias = "g_locale_from_utf8")]
915pub fn locale_from_utf8(utf8string: impl IntoGStr) -> Result<(Slice<u8>, usize), CvtError> {
916    let mut bytes_read = 0;
917    let mut bytes_written = std::mem::MaybeUninit::uninit();
918    let mut error = ptr::null_mut();
919    let ret = utf8string.run_with_gstr(|utf8string| {
920        assert!(utf8string.len() <= isize::MAX as usize);
921        unsafe {
922            ffi::g_locale_from_utf8(
923                utf8string.as_ptr(),
924                utf8string.len() as isize,
925                &mut bytes_read,
926                bytes_written.as_mut_ptr(),
927                &mut error,
928            )
929        }
930    });
931    if error.is_null() {
932        Ok(unsafe {
933            (
934                Slice::from_glib_full_num(ret, bytes_written.assume_init() + 1),
935                bytes_read,
936            )
937        })
938    } else {
939        Err(unsafe { CvtError::new(from_glib_full(error), bytes_read) })
940    }
941}
942
943/// Converts a string which is in the encoding used for strings by
944/// the C runtime (usually the same as that used by the operating
945/// system) in the [current locale](running.html#locale) into a UTF-8 string.
946///
947/// If the source encoding is not UTF-8 and the conversion output contains a
948/// nul character, the error [`ConvertError::EmbeddedNul`][crate::ConvertError::EmbeddedNul] is set and the
949/// function returns [`None`].
950/// If the source encoding is UTF-8, an embedded nul character is treated with
951/// the [`ConvertError::IllegalSequence`][crate::ConvertError::IllegalSequence] error for backward compatibility with
952/// earlier versions of this library. Use g_convert() to produce output that
953/// may contain embedded nul characters.
954/// ## `opsysstring`
955/// a string in the
956///                 encoding of the current locale. On Windows
957///                 this means the system codepage.
958///
959/// # Returns
960///
961/// The converted string, or [`None`] on an error.
962///
963/// ## `bytes_read`
964/// location to store the number of bytes in the
965///                 input string that were successfully converted, or [`None`].
966///                 Even if the conversion was successful, this may be
967///                 less than @len if there were partial characters
968///                 at the end of the input. If the error
969///                 [`ConvertError::IllegalSequence`][crate::ConvertError::IllegalSequence] occurs, the value
970///                 stored will be the byte offset after the last valid
971///                 input sequence.
972///
973/// ## `bytes_written`
974/// the number of bytes stored in the output
975///                 buffer (not including the terminating nul).
976// rustdoc-stripper-ignore-next-stop
977/// Converts a string which is in the encoding used for strings by
978/// the C runtime (usually the same as that used by the operating
979/// system) in the [current locale](running.html#locale) into a UTF-8 string.
980///
981/// If the source encoding is not UTF-8 and the conversion output contains a
982/// nul character, the error [`ConvertError::EmbeddedNul`][crate::ConvertError::EmbeddedNul] is set and the
983/// function returns [`None`].
984/// If the source encoding is UTF-8, an embedded nul character is treated with
985/// the [`ConvertError::IllegalSequence`][crate::ConvertError::IllegalSequence] error for backward compatibility with
986/// earlier versions of this library. Use g_convert() to produce output that
987/// may contain embedded nul characters.
988/// ## `opsysstring`
989/// a string in the
990///                 encoding of the current locale. On Windows
991///                 this means the system codepage.
992///
993/// # Returns
994///
995/// The converted string, or [`None`] on an error.
996///
997/// ## `bytes_read`
998/// location to store the number of bytes in the
999///                 input string that were successfully converted, or [`None`].
1000///                 Even if the conversion was successful, this may be
1001///                 less than @len if there were partial characters
1002///                 at the end of the input. If the error
1003///                 [`ConvertError::IllegalSequence`][crate::ConvertError::IllegalSequence] occurs, the value
1004///                 stored will be the byte offset after the last valid
1005///                 input sequence.
1006///
1007/// ## `bytes_written`
1008/// the number of bytes stored in the output
1009///                 buffer (not including the terminating nul).
1010#[doc(alias = "g_locale_to_utf8")]
1011pub fn locale_to_utf8(opsysstring: &[u8]) -> Result<(crate::GString, usize), CvtError> {
1012    let len = opsysstring.len() as isize;
1013    let mut bytes_read = 0;
1014    let mut bytes_written = std::mem::MaybeUninit::uninit();
1015    let mut error = ptr::null_mut();
1016    let ret = unsafe {
1017        ffi::g_locale_to_utf8(
1018            opsysstring.to_glib_none().0,
1019            len,
1020            &mut bytes_read,
1021            bytes_written.as_mut_ptr(),
1022            &mut error,
1023        )
1024    };
1025    if error.is_null() {
1026        Ok(unsafe {
1027            (
1028                GString::from_glib_full_num(ret, bytes_written.assume_init()),
1029                bytes_read,
1030            )
1031        })
1032    } else {
1033        Err(unsafe { CvtError::new(from_glib_full(error), bytes_read) })
1034    }
1035}
1036
1037#[doc(alias = "g_utf8_to_ucs4")]
1038#[doc(alias = "g_utf8_to_ucs4_fast")]
1039#[doc(alias = "utf8_to_ucs4")]
1040pub fn utf8_to_utf32(str: impl AsRef<str>) -> Slice<char> {
1041    unsafe {
1042        let mut items_written = 0;
1043
1044        let str_as_utf32 = ffi::g_utf8_to_ucs4_fast(
1045            str.as_ref().as_ptr().cast::<c_char>(),
1046            str.as_ref().len() as _,
1047            &mut items_written,
1048        );
1049
1050        // NOTE: We assume that u32 and char have the same layout and trust that glib won't give us
1051        //       invalid UTF-32 codepoints
1052        Slice::from_glib_full_num(str_as_utf32, items_written as usize)
1053    }
1054}
1055
1056#[doc(alias = "g_ucs4_to_utf8")]
1057#[doc(alias = "ucs4_to_utf8")]
1058pub fn utf32_to_utf8(str: impl AsRef<[char]>) -> GString {
1059    let mut items_read = 0;
1060    let mut items_written = 0;
1061    let mut error = ptr::null_mut();
1062
1063    unsafe {
1064        let str_as_utf8 = ffi::g_ucs4_to_utf8(
1065            str.as_ref().as_ptr().cast::<u32>(),
1066            str.as_ref().len() as _,
1067            &mut items_read,
1068            &mut items_written,
1069            &mut error,
1070        );
1071
1072        debug_assert!(
1073            error.is_null(),
1074            "Rust `char` should always be convertible to UTF-8"
1075        );
1076
1077        GString::from_glib_full_num(str_as_utf8, items_written as usize)
1078    }
1079}
1080
1081#[doc(alias = "g_utf8_casefold")]
1082#[doc(alias = "utf8_casefold")]
1083pub fn casefold(str: impl AsRef<str>) -> GString {
1084    unsafe {
1085        let str = ffi::g_utf8_casefold(str.as_ref().as_ptr().cast(), str.as_ref().len() as isize);
1086
1087        from_glib_full(str)
1088    }
1089}
1090
1091#[doc(alias = "g_utf8_normalize")]
1092#[doc(alias = "utf8_normalize")]
1093pub fn normalize(str: impl AsRef<str>, mode: NormalizeMode) -> GString {
1094    unsafe {
1095        let str = ffi::g_utf8_normalize(
1096            str.as_ref().as_ptr().cast(),
1097            str.as_ref().len() as isize,
1098            mode.into_glib(),
1099        );
1100
1101        from_glib_full(str)
1102    }
1103}
1104
1105#[cfg(test)]
1106mod tests {
1107    #[test]
1108    fn convert_ascii() {
1109        assert!(super::convert(b"Hello", "utf-8", "ascii").is_ok());
1110        assert!(super::convert(b"He\xaallo", "utf-8", "ascii").is_err());
1111        assert_eq!(
1112            super::convert_with_fallback(b"H\xc3\xa9llo", "ascii", "utf-8", crate::NONE_STR)
1113                .unwrap()
1114                .0
1115                .as_slice(),
1116            b"H\\u00e9llo"
1117        );
1118        assert_eq!(
1119            super::convert_with_fallback(b"H\xc3\xa9llo", "ascii", "utf-8", Some("_"))
1120                .unwrap()
1121                .0
1122                .as_slice(),
1123            b"H_llo"
1124        );
1125    }
1126    #[test]
1127    fn iconv() {
1128        let mut conv = super::IConv::new("utf-8", "ascii").unwrap();
1129        assert!(conv.convert(b"Hello").is_ok());
1130        assert!(conv.convert(b"He\xaallo").is_err());
1131        assert!(super::IConv::new("utf-8", "badcharset123456789").is_none());
1132    }
1133    #[test]
1134    fn filename_charsets() {
1135        let _ = super::filename_charsets();
1136    }
1137
1138    #[test]
1139    fn utf8_and_utf32() {
1140        let utf32 = ['A', 'b', '🤔'];
1141        let utf8 = super::utf32_to_utf8(utf32);
1142        assert_eq!(utf8, "Ab🤔");
1143
1144        let utf8 = "🤔 ț";
1145        let utf32 = super::utf8_to_utf32(utf8);
1146        assert_eq!(utf32.as_slice(), &['🤔', ' ', 'ț']);
1147    }
1148}