glib/
convert.rs

1// Take a look at the license at the top of the repository in the LICENSE file.
2
3use std::{fmt, io, os::raw::c_char, path::PathBuf, ptr};
4
5use crate::{ffi, translate::*, ConvertError, Error, GString, NormalizeMode, Slice};
6
7// rustdoc-stripper-ignore-next
8/// A wrapper for [`ConvertError`](crate::ConvertError) that can hold an offset into the input
9/// string.
10#[derive(Debug)]
11pub enum CvtError {
12    Convert(Error),
13    IllegalSequence { source: Error, offset: usize },
14}
15
16impl std::error::Error for CvtError {
17    fn source(&self) -> ::core::option::Option<&(dyn std::error::Error + 'static)> {
18        match self {
19            CvtError::Convert(err) => std::error::Error::source(err),
20            CvtError::IllegalSequence { source, .. } => Some(source),
21        }
22    }
23}
24
25impl fmt::Display for CvtError {
26    fn fmt(&self, fmt: &mut fmt::Formatter) -> ::core::fmt::Result {
27        match self {
28            CvtError::Convert(err) => fmt::Display::fmt(err, fmt),
29            CvtError::IllegalSequence { source, offset } => {
30                write!(fmt, "{source} at offset {offset}")
31            }
32        }
33    }
34}
35
36impl std::convert::From<Error> for CvtError {
37    fn from(err: Error) -> Self {
38        CvtError::Convert(err)
39    }
40}
41
42impl CvtError {
43    #[inline]
44    fn new(err: Error, bytes_read: usize) -> Self {
45        if err.kind::<ConvertError>() == Some(ConvertError::IllegalSequence) {
46            Self::IllegalSequence {
47                source: err,
48                offset: bytes_read,
49            }
50        } else {
51            err.into()
52        }
53    }
54}
55
56/// Converts a string from one character set to another.
57///
58/// Note that you should use g_iconv() for streaming conversions.
59/// Despite the fact that @bytes_read can return information about partial
60/// characters, the g_convert_... functions are not generally suitable
61/// for streaming. If the underlying converter maintains internal state,
62/// then this won't be preserved across successive calls to g_convert(),
63/// g_convert_with_iconv() or g_convert_with_fallback(). (An example of
64/// this is the GNU C converter for CP1255 which does not emit a base
65/// character until it knows that the next character is not a mark that
66/// could combine with the base character.)
67///
68/// Using extensions such as "//TRANSLIT" may not work (or may not work
69/// well) on many platforms.  Consider using g_str_to_ascii() instead.
70/// ## `str`
71///
72///                 the string to convert.
73/// ## `to_codeset`
74/// name of character set into which to convert @str
75/// ## `from_codeset`
76/// character set of @str.
77///
78/// # Returns
79///
80///
81///          If the conversion was successful, a newly allocated buffer
82///          containing the converted string, which must be freed with g_free().
83///          Otherwise [`None`] and @error will be set.
84///
85/// ## `bytes_read`
86/// location to store the number of bytes in
87///                 the input string that were successfully converted, or [`None`].
88///                 Even if the conversion was successful, this may be
89///                 less than @len if there were partial characters
90///                 at the end of the input. If the error
91///                 [`ConvertError::IllegalSequence`][crate::ConvertError::IllegalSequence] occurs, the value
92///                 stored will be the byte offset after the last valid
93///                 input sequence.
94// rustdoc-stripper-ignore-next-stop
95/// Converts a string from one character set to another.
96///
97/// Note that you should use g_iconv() for streaming conversions.
98/// Despite the fact that @bytes_read can return information about partial
99/// characters, the g_convert_... functions are not generally suitable
100/// for streaming. If the underlying converter maintains internal state,
101/// then this won't be preserved across successive calls to g_convert(),
102/// g_convert_with_iconv() or g_convert_with_fallback(). (An example of
103/// this is the GNU C converter for CP1255 which does not emit a base
104/// character until it knows that the next character is not a mark that
105/// could combine with the base character.)
106///
107/// Using extensions such as "//TRANSLIT" may not work (or may not work
108/// well) on many platforms.  Consider using g_str_to_ascii() instead.
109/// ## `str`
110///
111///                 the string to convert.
112/// ## `to_codeset`
113/// name of character set into which to convert @str
114/// ## `from_codeset`
115/// character set of @str.
116///
117/// # Returns
118///
119///
120///          If the conversion was successful, a newly allocated buffer
121///          containing the converted string, which must be freed with g_free().
122///          Otherwise [`None`] and @error will be set.
123///
124/// ## `bytes_read`
125/// location to store the number of bytes in
126///                 the input string that were successfully converted, or [`None`].
127///                 Even if the conversion was successful, this may be
128///                 less than @len if there were partial characters
129///                 at the end of the input. If the error
130///                 [`ConvertError::IllegalSequence`][crate::ConvertError::IllegalSequence] occurs, the value
131///                 stored will be the byte offset after the last valid
132///                 input sequence.
133#[doc(alias = "g_convert")]
134pub fn convert(
135    str_: &[u8],
136    to_codeset: impl IntoGStr,
137    from_codeset: impl IntoGStr,
138) -> Result<(Slice<u8>, usize), CvtError> {
139    assert!(str_.len() <= isize::MAX as usize);
140    let mut bytes_read = 0;
141    let mut bytes_written = 0;
142    let mut error = ptr::null_mut();
143    let result = to_codeset.run_with_gstr(|to_codeset| {
144        from_codeset.run_with_gstr(|from_codeset| unsafe {
145            ffi::g_convert(
146                str_.as_ptr(),
147                str_.len() as isize,
148                to_codeset.to_glib_none().0,
149                from_codeset.to_glib_none().0,
150                &mut bytes_read,
151                &mut bytes_written,
152                &mut error,
153            )
154        })
155    });
156    if result.is_null() {
157        Err(CvtError::new(unsafe { from_glib_full(error) }, bytes_read))
158    } else {
159        let slice = unsafe { Slice::from_glib_full_num(result, bytes_written as _) };
160        Ok((slice, bytes_read))
161    }
162}
163
164/// Converts a string from one character set to another, possibly
165/// including fallback sequences for characters not representable
166/// in the output. Note that it is not guaranteed that the specification
167/// for the fallback sequences in @fallback will be honored. Some
168/// systems may do an approximate conversion from @from_codeset
169/// to @to_codeset in their iconv() functions,
170/// in which case GLib will simply return that approximate conversion.
171///
172/// Note that you should use g_iconv() for streaming conversions.
173/// Despite the fact that @bytes_read can return information about partial
174/// characters, the g_convert_... functions are not generally suitable
175/// for streaming. If the underlying converter maintains internal state,
176/// then this won't be preserved across successive calls to g_convert(),
177/// g_convert_with_iconv() or g_convert_with_fallback(). (An example of
178/// this is the GNU C converter for CP1255 which does not emit a base
179/// character until it knows that the next character is not a mark that
180/// could combine with the base character.)
181/// ## `str`
182///
183///                the string to convert.
184/// ## `to_codeset`
185/// name of character set into which to convert @str
186/// ## `from_codeset`
187/// character set of @str.
188/// ## `fallback`
189/// UTF-8 string to use in place of characters not
190///                present in the target encoding. (The string must be
191///                representable in the target encoding).
192///                If [`None`], characters not in the target encoding will
193///                be represented as Unicode escapes \uxxxx or \Uxxxxyyyy.
194///
195/// # Returns
196///
197///
198///          If the conversion was successful, a newly allocated buffer
199///          containing the converted string, which must be freed with g_free().
200///          Otherwise [`None`] and @error will be set.
201///
202/// ## `bytes_read`
203/// location to store the number of bytes in
204///                the input string that were successfully converted, or [`None`].
205///                Even if the conversion was successful, this may be
206///                less than @len if there were partial characters
207///                at the end of the input.
208// rustdoc-stripper-ignore-next-stop
209/// Converts a string from one character set to another, possibly
210/// including fallback sequences for characters not representable
211/// in the output. Note that it is not guaranteed that the specification
212/// for the fallback sequences in @fallback will be honored. Some
213/// systems may do an approximate conversion from @from_codeset
214/// to @to_codeset in their iconv() functions,
215/// in which case GLib will simply return that approximate conversion.
216///
217/// Note that you should use g_iconv() for streaming conversions.
218/// Despite the fact that @bytes_read can return information about partial
219/// characters, the g_convert_... functions are not generally suitable
220/// for streaming. If the underlying converter maintains internal state,
221/// then this won't be preserved across successive calls to g_convert(),
222/// g_convert_with_iconv() or g_convert_with_fallback(). (An example of
223/// this is the GNU C converter for CP1255 which does not emit a base
224/// character until it knows that the next character is not a mark that
225/// could combine with the base character.)
226/// ## `str`
227///
228///                the string to convert.
229/// ## `to_codeset`
230/// name of character set into which to convert @str
231/// ## `from_codeset`
232/// character set of @str.
233/// ## `fallback`
234/// UTF-8 string to use in place of characters not
235///                present in the target encoding. (The string must be
236///                representable in the target encoding).
237///                If [`None`], characters not in the target encoding will
238///                be represented as Unicode escapes \uxxxx or \Uxxxxyyyy.
239///
240/// # Returns
241///
242///
243///          If the conversion was successful, a newly allocated buffer
244///          containing the converted string, which must be freed with g_free().
245///          Otherwise [`None`] and @error will be set.
246///
247/// ## `bytes_read`
248/// location to store the number of bytes in
249///                the input string that were successfully converted, or [`None`].
250///                Even if the conversion was successful, this may be
251///                less than @len if there were partial characters
252///                at the end of the input.
253#[doc(alias = "g_convert_with_fallback")]
254pub fn convert_with_fallback(
255    str_: &[u8],
256    to_codeset: impl IntoGStr,
257    from_codeset: impl IntoGStr,
258    fallback: Option<impl IntoGStr>,
259) -> Result<(Slice<u8>, usize), CvtError> {
260    assert!(str_.len() <= isize::MAX as usize);
261    let mut bytes_read = 0;
262    let mut bytes_written = 0;
263    let mut error = ptr::null_mut();
264    let result = to_codeset.run_with_gstr(|to_codeset| {
265        from_codeset.run_with_gstr(|from_codeset| {
266            fallback.run_with_gstr(|fallback| unsafe {
267                ffi::g_convert_with_fallback(
268                    str_.as_ptr(),
269                    str_.len() as isize,
270                    to_codeset.to_glib_none().0,
271                    from_codeset.to_glib_none().0,
272                    fallback.to_glib_none().0,
273                    &mut bytes_read,
274                    &mut bytes_written,
275                    &mut error,
276                )
277            })
278        })
279    });
280    if result.is_null() {
281        Err(CvtError::new(unsafe { from_glib_full(error) }, bytes_read))
282    } else {
283        let slice = unsafe { Slice::from_glib_full_num(result, bytes_written as _) };
284        Ok((slice, bytes_read))
285    }
286}
287
288// rustdoc-stripper-ignore-next
289/// A wrapper for [`std::io::Error`] that can hold an offset into an input string.
290#[derive(Debug)]
291pub enum IConvError {
292    Error(io::Error),
293    WithOffset { source: io::Error, offset: usize },
294}
295
296impl std::error::Error for IConvError {
297    fn source(&self) -> Option<&(dyn std::error::Error + 'static)> {
298        match self {
299            IConvError::Error(err) => std::error::Error::source(err),
300            IConvError::WithOffset { source, .. } => Some(source),
301        }
302    }
303}
304
305impl fmt::Display for IConvError {
306    fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
307        match self {
308            IConvError::Error(err) => fmt::Display::fmt(err, fmt),
309            IConvError::WithOffset { source, offset } => write!(fmt, "{source} at offset {offset}"),
310        }
311    }
312}
313
314impl std::convert::From<io::Error> for IConvError {
315    fn from(err: io::Error) -> Self {
316        IConvError::Error(err)
317    }
318}
319
320/// The GIConv struct wraps an iconv() conversion descriptor. It contains
321/// private data and should only be accessed using the following functions.
322// rustdoc-stripper-ignore-next-stop
323/// The GIConv struct wraps an iconv() conversion descriptor. It contains
324/// private data and should only be accessed using the following functions.
325#[derive(Debug)]
326#[repr(transparent)]
327#[doc(alias = "GIConv")]
328pub struct IConv(ffi::GIConv);
329
330unsafe impl Send for IConv {}
331
332impl IConv {
333    /// Same as the standard UNIX routine iconv_open(), but
334    /// may be implemented via libiconv on UNIX flavors that lack
335    /// a native implementation.
336    ///
337    /// GLib provides g_convert() and g_locale_to_utf8() which are likely
338    /// more convenient than the raw iconv wrappers.
339    /// ## `to_codeset`
340    /// destination codeset
341    /// ## `from_codeset`
342    /// source codeset
343    ///
344    /// # Returns
345    ///
346    /// a "conversion descriptor", or (GIConv)-1 if
347    ///  opening the converter failed.
348    // rustdoc-stripper-ignore-next-stop
349    /// Same as the standard UNIX routine iconv_open(), but
350    /// may be implemented via libiconv on UNIX flavors that lack
351    /// a native implementation.
352    ///
353    /// GLib provides g_convert() and g_locale_to_utf8() which are likely
354    /// more convenient than the raw iconv wrappers.
355    /// ## `to_codeset`
356    /// destination codeset
357    /// ## `from_codeset`
358    /// source codeset
359    ///
360    /// # Returns
361    ///
362    /// a "conversion descriptor", or (GIConv)-1 if
363    ///  opening the converter failed.
364    #[doc(alias = "g_iconv_open")]
365    #[allow(clippy::unnecessary_lazy_evaluations)]
366    pub fn new(to_codeset: impl IntoGStr, from_codeset: impl IntoGStr) -> Option<Self> {
367        let iconv = to_codeset.run_with_gstr(|to_codeset| {
368            from_codeset.run_with_gstr(|from_codeset| unsafe {
369                ffi::g_iconv_open(to_codeset.to_glib_none().0, from_codeset.to_glib_none().0)
370            })
371        });
372        (iconv as isize != -1).then(|| Self(iconv))
373    }
374    /// Converts a string from one character set to another.
375    ///
376    /// Note that you should use g_iconv() for streaming conversions.
377    /// Despite the fact that @bytes_read can return information about partial
378    /// characters, the g_convert_... functions are not generally suitable
379    /// for streaming. If the underlying converter maintains internal state,
380    /// then this won't be preserved across successive calls to g_convert(),
381    /// g_convert_with_iconv() or g_convert_with_fallback(). (An example of
382    /// this is the GNU C converter for CP1255 which does not emit a base
383    /// character until it knows that the next character is not a mark that
384    /// could combine with the base character.)
385    ///
386    /// Characters which are valid in the input character set, but which have no
387    /// representation in the output character set will result in a
388    /// [`ConvertError::IllegalSequence`][crate::ConvertError::IllegalSequence] error. This is in contrast to the iconv()
389    /// specification, which leaves this behaviour implementation defined. Note that
390    /// this is the same error code as is returned for an invalid byte sequence in
391    /// the input character set. To get defined behaviour for conversion of
392    /// unrepresentable characters, use g_convert_with_fallback().
393    /// ## `str`
394    ///
395    ///                 the string to convert.
396    /// ## `converter`
397    /// conversion descriptor from g_iconv_open()
398    ///
399    /// # Returns
400    ///
401    ///
402    ///               If the conversion was successful, a newly allocated buffer
403    ///               containing the converted string, which must be freed with
404    ///               g_free(). Otherwise [`None`] and @error will be set.
405    ///
406    /// ## `bytes_read`
407    /// location to store the number of bytes in
408    ///                 the input string that were successfully converted, or [`None`].
409    ///                 Even if the conversion was successful, this may be
410    ///                 less than @len if there were partial characters
411    ///                 at the end of the input. If the error
412    ///                 [`ConvertError::IllegalSequence`][crate::ConvertError::IllegalSequence] occurs, the value
413    ///                 stored will be the byte offset after the last valid
414    ///                 input sequence.
415    // rustdoc-stripper-ignore-next-stop
416    /// Converts a string from one character set to another.
417    ///
418    /// Note that you should use g_iconv() for streaming conversions.
419    /// Despite the fact that @bytes_read can return information about partial
420    /// characters, the g_convert_... functions are not generally suitable
421    /// for streaming. If the underlying converter maintains internal state,
422    /// then this won't be preserved across successive calls to g_convert(),
423    /// g_convert_with_iconv() or g_convert_with_fallback(). (An example of
424    /// this is the GNU C converter for CP1255 which does not emit a base
425    /// character until it knows that the next character is not a mark that
426    /// could combine with the base character.)
427    ///
428    /// Characters which are valid in the input character set, but which have no
429    /// representation in the output character set will result in a
430    /// [`ConvertError::IllegalSequence`][crate::ConvertError::IllegalSequence] error. This is in contrast to the iconv()
431    /// specification, which leaves this behaviour implementation defined. Note that
432    /// this is the same error code as is returned for an invalid byte sequence in
433    /// the input character set. To get defined behaviour for conversion of
434    /// unrepresentable characters, use g_convert_with_fallback().
435    /// ## `str`
436    ///
437    ///                 the string to convert.
438    /// ## `converter`
439    /// conversion descriptor from g_iconv_open()
440    ///
441    /// # Returns
442    ///
443    ///
444    ///               If the conversion was successful, a newly allocated buffer
445    ///               containing the converted string, which must be freed with
446    ///               g_free(). Otherwise [`None`] and @error will be set.
447    ///
448    /// ## `bytes_read`
449    /// location to store the number of bytes in
450    ///                 the input string that were successfully converted, or [`None`].
451    ///                 Even if the conversion was successful, this may be
452    ///                 less than @len if there were partial characters
453    ///                 at the end of the input. If the error
454    ///                 [`ConvertError::IllegalSequence`][crate::ConvertError::IllegalSequence] occurs, the value
455    ///                 stored will be the byte offset after the last valid
456    ///                 input sequence.
457    #[doc(alias = "g_convert_with_iconv")]
458    pub fn convert(&mut self, str_: &[u8]) -> Result<(Slice<u8>, usize), CvtError> {
459        assert!(str_.len() <= isize::MAX as usize);
460        let mut bytes_read = 0;
461        let mut bytes_written = 0;
462        let mut error = ptr::null_mut();
463        let result = unsafe {
464            ffi::g_convert_with_iconv(
465                str_.as_ptr(),
466                str_.len() as isize,
467                self.0,
468                &mut bytes_read,
469                &mut bytes_written,
470                &mut error,
471            )
472        };
473        if result.is_null() {
474            Err(CvtError::new(unsafe { from_glib_full(error) }, bytes_read))
475        } else {
476            let slice = unsafe { Slice::from_glib_full_num(result, bytes_written as _) };
477            Ok((slice, bytes_read))
478        }
479    }
480    /// Same as the standard UNIX routine iconv(), but
481    /// may be implemented via libiconv on UNIX flavors that lack
482    /// a native implementation.
483    ///
484    /// GLib provides g_convert() and g_locale_to_utf8() which are likely
485    /// more convenient than the raw iconv wrappers.
486    ///
487    /// Note that the behaviour of iconv() for characters which are valid in the
488    /// input character set, but which have no representation in the output character
489    /// set, is implementation defined. This function may return success (with a
490    /// positive number of non-reversible conversions as replacement characters were
491    /// used), or it may return -1 and set an error such as `EILSEQ`, in such a
492    /// situation.
493    ///
494    /// See [`iconv(3posix)`](man:iconv(3posix)) and [`iconv(3)`](man:iconv(3)) for more details about behavior when an
495    /// error occurs.
496    /// ## `converter`
497    /// conversion descriptor from g_iconv_open()
498    /// ## `inbuf`
499    /// bytes to convert
500    /// ## `inbytes_left`
501    /// inout parameter, bytes remaining to convert in @inbuf
502    /// ## `outbuf`
503    /// converted output bytes
504    /// ## `outbytes_left`
505    /// inout parameter, bytes available to fill in @outbuf
506    ///
507    /// # Returns
508    ///
509    /// count of non-reversible conversions, or -1 on error
510    // rustdoc-stripper-ignore-next-stop
511    /// Same as the standard UNIX routine iconv(), but
512    /// may be implemented via libiconv on UNIX flavors that lack
513    /// a native implementation.
514    ///
515    /// GLib provides g_convert() and g_locale_to_utf8() which are likely
516    /// more convenient than the raw iconv wrappers.
517    ///
518    /// Note that the behaviour of iconv() for characters which are valid in the
519    /// input character set, but which have no representation in the output character
520    /// set, is implementation defined. This function may return success (with a
521    /// positive number of non-reversible conversions as replacement characters were
522    /// used), or it may return -1 and set an error such as `EILSEQ`, in such a
523    /// situation.
524    ///
525    /// See [`iconv(3posix)`](man:iconv(3posix)) and [`iconv(3)`](man:iconv(3)) for more details about behavior when an
526    /// error occurs.
527    /// ## `converter`
528    /// conversion descriptor from g_iconv_open()
529    /// ## `inbuf`
530    /// bytes to convert
531    /// ## `inbytes_left`
532    /// inout parameter, bytes remaining to convert in @inbuf
533    /// ## `outbuf`
534    /// converted output bytes
535    /// ## `outbytes_left`
536    /// inout parameter, bytes available to fill in @outbuf
537    ///
538    /// # Returns
539    ///
540    /// count of non-reversible conversions, or -1 on error
541    #[doc(alias = "g_iconv")]
542    pub fn iconv(
543        &mut self,
544        inbuf: Option<&[u8]>,
545        outbuf: Option<&mut [std::mem::MaybeUninit<u8>]>,
546    ) -> Result<(usize, usize, usize), IConvError> {
547        let input_len = inbuf.as_ref().map(|b| b.len()).unwrap_or_default();
548        let mut inbytes_left = input_len;
549        let mut outbytes_left = outbuf.as_ref().map(|b| b.len()).unwrap_or_default();
550        let mut inbuf = inbuf
551            .map(|b| mut_override(b.as_ptr()) as *mut c_char)
552            .unwrap_or_else(ptr::null_mut);
553        let mut outbuf = outbuf
554            .map(|b| b.as_mut_ptr() as *mut c_char)
555            .unwrap_or_else(ptr::null_mut);
556        let conversions = unsafe {
557            ffi::g_iconv(
558                self.0,
559                &mut inbuf,
560                &mut inbytes_left,
561                &mut outbuf,
562                &mut outbytes_left,
563            )
564        };
565        if conversions as isize == -1 {
566            let err = io::Error::last_os_error();
567            let code = err.raw_os_error().unwrap();
568            if code == libc::EILSEQ || code == libc::EINVAL {
569                Err(IConvError::WithOffset {
570                    source: err,
571                    offset: input_len - inbytes_left,
572                })
573            } else {
574                Err(err.into())
575            }
576        } else {
577            Ok((conversions, inbytes_left, outbytes_left))
578        }
579    }
580}
581
582impl Drop for IConv {
583    #[inline]
584    fn drop(&mut self) {
585        unsafe {
586            ffi::g_iconv_close(self.0);
587        }
588    }
589}
590
591/// Determines the preferred character sets used for filenames.
592/// The first character set from the @charsets is the filename encoding, the
593/// subsequent character sets are used when trying to generate a displayable
594/// representation of a filename, see g_filename_display_name().
595///
596/// On Unix, the character sets are determined by consulting the
597/// environment variables `G_FILENAME_ENCODING` and `G_BROKEN_FILENAMES`.
598/// On Windows, the character set used in the GLib API is always UTF-8
599/// and said environment variables have no effect.
600///
601/// `G_FILENAME_ENCODING` may be set to a comma-separated list of
602/// character set names. The special token `@locale` is taken to mean the
603/// character set for the [current locale](running.html#locale).
604/// If `G_FILENAME_ENCODING` is not set, but `G_BROKEN_FILENAMES` is,
605/// the character set of the current locale is taken as the filename
606/// encoding. If neither environment variable  is set, UTF-8 is taken
607/// as the filename encoding, but the character set of the current locale
608/// is also put in the list of encodings.
609///
610/// The returned @charsets belong to GLib and must not be freed.
611///
612/// Note that on Unix, regardless of the locale character set or
613/// `G_FILENAME_ENCODING` value, the actual file names present
614/// on a system might be in any random encoding or just gibberish.
615///
616/// # Returns
617///
618/// [`true`] if the filename encoding is UTF-8.
619///
620/// ## `filename_charsets`
621///
622///    return location for the [`None`]-terminated list of encoding names
623// rustdoc-stripper-ignore-next-stop
624/// Determines the preferred character sets used for filenames.
625/// The first character set from the @charsets is the filename encoding, the
626/// subsequent character sets are used when trying to generate a displayable
627/// representation of a filename, see g_filename_display_name().
628///
629/// On Unix, the character sets are determined by consulting the
630/// environment variables `G_FILENAME_ENCODING` and `G_BROKEN_FILENAMES`.
631/// On Windows, the character set used in the GLib API is always UTF-8
632/// and said environment variables have no effect.
633///
634/// `G_FILENAME_ENCODING` may be set to a comma-separated list of
635/// character set names. The special token `@locale` is taken to mean the
636/// character set for the [current locale](running.html#locale).
637/// If `G_FILENAME_ENCODING` is not set, but `G_BROKEN_FILENAMES` is,
638/// the character set of the current locale is taken as the filename
639/// encoding. If neither environment variable  is set, UTF-8 is taken
640/// as the filename encoding, but the character set of the current locale
641/// is also put in the list of encodings.
642///
643/// The returned @charsets belong to GLib and must not be freed.
644///
645/// Note that on Unix, regardless of the locale character set or
646/// `G_FILENAME_ENCODING` value, the actual file names present
647/// on a system might be in any random encoding or just gibberish.
648///
649/// # Returns
650///
651/// [`true`] if the filename encoding is UTF-8.
652///
653/// ## `filename_charsets`
654///
655///    return location for the [`None`]-terminated list of encoding names
656#[doc(alias = "g_get_filename_charsets")]
657#[doc(alias = "get_filename_charsets")]
658pub fn filename_charsets() -> (bool, Vec<GString>) {
659    let mut filename_charsets = ptr::null_mut();
660    unsafe {
661        let is_utf8 = ffi::g_get_filename_charsets(&mut filename_charsets);
662        (
663            from_glib(is_utf8),
664            FromGlibPtrContainer::from_glib_none(filename_charsets),
665        )
666    }
667}
668
669/// Converts a string from UTF-8 to the encoding GLib uses for
670/// filenames. Note that on Windows GLib uses UTF-8 for filenames;
671/// on other platforms, this function indirectly depends on the
672/// [current locale](running.html#locale).
673///
674/// The input string shall not contain nul characters even if the @len
675/// argument is positive. A nul character found inside the string will result
676/// in error [`ConvertError::IllegalSequence`][crate::ConvertError::IllegalSequence]. If the filename encoding is
677/// not UTF-8 and the conversion output contains a nul character, the error
678/// [`ConvertError::EmbeddedNul`][crate::ConvertError::EmbeddedNul] is set and the function returns [`None`].
679/// ## `utf8string`
680/// a UTF-8 encoded string.
681/// ## `len`
682/// the length of the string, or -1 if the string is
683///                 nul-terminated.
684///
685/// # Returns
686///
687///
688///               The converted string, or [`None`] on an error.
689///
690/// ## `bytes_read`
691/// location to store the number of bytes in
692///                 the input string that were successfully converted, or [`None`].
693///                 Even if the conversion was successful, this may be
694///                 less than @len if there were partial characters
695///                 at the end of the input. If the error
696///                 [`ConvertError::IllegalSequence`][crate::ConvertError::IllegalSequence] occurs, the value
697///                 stored will be the byte offset after the last valid
698///                 input sequence.
699///
700/// ## `bytes_written`
701/// the number of bytes stored in
702///                 the output buffer (not including the terminating nul).
703// rustdoc-stripper-ignore-next-stop
704/// Converts a string from UTF-8 to the encoding GLib uses for
705/// filenames. Note that on Windows GLib uses UTF-8 for filenames;
706/// on other platforms, this function indirectly depends on the
707/// [current locale](running.html#locale).
708///
709/// The input string shall not contain nul characters even if the @len
710/// argument is positive. A nul character found inside the string will result
711/// in error [`ConvertError::IllegalSequence`][crate::ConvertError::IllegalSequence]. If the filename encoding is
712/// not UTF-8 and the conversion output contains a nul character, the error
713/// [`ConvertError::EmbeddedNul`][crate::ConvertError::EmbeddedNul] is set and the function returns [`None`].
714/// ## `utf8string`
715/// a UTF-8 encoded string.
716/// ## `len`
717/// the length of the string, or -1 if the string is
718///                 nul-terminated.
719///
720/// # Returns
721///
722///
723///               The converted string, or [`None`] on an error.
724///
725/// ## `bytes_read`
726/// location to store the number of bytes in
727///                 the input string that were successfully converted, or [`None`].
728///                 Even if the conversion was successful, this may be
729///                 less than @len if there were partial characters
730///                 at the end of the input. If the error
731///                 [`ConvertError::IllegalSequence`][crate::ConvertError::IllegalSequence] occurs, the value
732///                 stored will be the byte offset after the last valid
733///                 input sequence.
734///
735/// ## `bytes_written`
736/// the number of bytes stored in
737///                 the output buffer (not including the terminating nul).
738#[doc(alias = "g_filename_from_utf8")]
739pub fn filename_from_utf8(utf8string: impl IntoGStr) -> Result<(PathBuf, usize), CvtError> {
740    let mut bytes_read = 0;
741    let mut bytes_written = std::mem::MaybeUninit::uninit();
742    let mut error = ptr::null_mut();
743    let ret = utf8string.run_with_gstr(|utf8string| {
744        assert!(utf8string.len() <= isize::MAX as usize);
745        let len = utf8string.len() as isize;
746        unsafe {
747            ffi::g_filename_from_utf8(
748                utf8string.to_glib_none().0,
749                len,
750                &mut bytes_read,
751                bytes_written.as_mut_ptr(),
752                &mut error,
753            )
754        }
755    });
756    if error.is_null() {
757        Ok(unsafe {
758            (
759                PathBuf::from_glib_full_num(ret, bytes_written.assume_init()),
760                bytes_read,
761            )
762        })
763    } else {
764        Err(unsafe { CvtError::new(from_glib_full(error), bytes_read) })
765    }
766}
767
768/// Converts a string which is in the encoding used by GLib for
769/// filenames into a UTF-8 string. Note that on Windows GLib uses UTF-8
770/// for filenames; on other platforms, this function indirectly depends on
771/// the [current locale](running.html#locale).
772///
773/// The input string shall not contain nul characters even if the @len
774/// argument is positive. A nul character found inside the string will result
775/// in error [`ConvertError::IllegalSequence`][crate::ConvertError::IllegalSequence].
776/// If the source encoding is not UTF-8 and the conversion output contains a
777/// nul character, the error [`ConvertError::EmbeddedNul`][crate::ConvertError::EmbeddedNul] is set and the
778/// function returns [`None`]. Use g_convert() to produce output that
779/// may contain embedded nul characters.
780/// ## `opsysstring`
781/// a string in the encoding for filenames
782/// ## `len`
783/// the length of the string, or -1 if the string is
784///                 nul-terminated (Note that some encodings may allow nul
785///                 bytes to occur inside strings. In that case, using -1
786///                 for the @len parameter is unsafe)
787///
788/// # Returns
789///
790/// The converted string, or [`None`] on an error.
791///
792/// ## `bytes_read`
793/// location to store the number of bytes in the
794///                 input string that were successfully converted, or [`None`].
795///                 Even if the conversion was successful, this may be
796///                 less than @len if there were partial characters
797///                 at the end of the input. If the error
798///                 [`ConvertError::IllegalSequence`][crate::ConvertError::IllegalSequence] occurs, the value
799///                 stored will be the byte offset after the last valid
800///                 input sequence.
801///
802/// ## `bytes_written`
803/// the number of bytes stored in the output
804///                 buffer (not including the terminating nul).
805// rustdoc-stripper-ignore-next-stop
806/// Converts a string which is in the encoding used by GLib for
807/// filenames into a UTF-8 string. Note that on Windows GLib uses UTF-8
808/// for filenames; on other platforms, this function indirectly depends on
809/// the [current locale](running.html#locale).
810///
811/// The input string shall not contain nul characters even if the @len
812/// argument is positive. A nul character found inside the string will result
813/// in error [`ConvertError::IllegalSequence`][crate::ConvertError::IllegalSequence].
814/// If the source encoding is not UTF-8 and the conversion output contains a
815/// nul character, the error [`ConvertError::EmbeddedNul`][crate::ConvertError::EmbeddedNul] is set and the
816/// function returns [`None`]. Use g_convert() to produce output that
817/// may contain embedded nul characters.
818/// ## `opsysstring`
819/// a string in the encoding for filenames
820/// ## `len`
821/// the length of the string, or -1 if the string is
822///                 nul-terminated (Note that some encodings may allow nul
823///                 bytes to occur inside strings. In that case, using -1
824///                 for the @len parameter is unsafe)
825///
826/// # Returns
827///
828/// The converted string, or [`None`] on an error.
829///
830/// ## `bytes_read`
831/// location to store the number of bytes in the
832///                 input string that were successfully converted, or [`None`].
833///                 Even if the conversion was successful, this may be
834///                 less than @len if there were partial characters
835///                 at the end of the input. If the error
836///                 [`ConvertError::IllegalSequence`][crate::ConvertError::IllegalSequence] occurs, the value
837///                 stored will be the byte offset after the last valid
838///                 input sequence.
839///
840/// ## `bytes_written`
841/// the number of bytes stored in the output
842///                 buffer (not including the terminating nul).
843#[doc(alias = "g_filename_to_utf8")]
844pub fn filename_to_utf8(
845    opsysstring: impl AsRef<std::path::Path>,
846) -> Result<(crate::GString, usize), CvtError> {
847    let path = opsysstring.as_ref().to_glib_none();
848    let mut bytes_read = 0;
849    let mut bytes_written = std::mem::MaybeUninit::uninit();
850    let mut error = ptr::null_mut();
851    let ret = unsafe {
852        ffi::g_filename_to_utf8(
853            path.0,
854            path.1.as_bytes().len() as isize,
855            &mut bytes_read,
856            bytes_written.as_mut_ptr(),
857            &mut error,
858        )
859    };
860    if error.is_null() {
861        Ok(unsafe {
862            (
863                GString::from_glib_full_num(ret, bytes_written.assume_init()),
864                bytes_read,
865            )
866        })
867    } else {
868        Err(unsafe { CvtError::new(from_glib_full(error), bytes_read) })
869    }
870}
871
872/// Converts a string from UTF-8 to the encoding used for strings by
873/// the C runtime (usually the same as that used by the operating
874/// system) in the [current locale](running.html#locale).
875/// On Windows this means the system codepage.
876///
877/// The input string shall not contain nul characters even if the @len
878/// argument is positive. A nul character found inside the string will result
879/// in error [`ConvertError::IllegalSequence`][crate::ConvertError::IllegalSequence]. Use g_convert() to convert
880/// input that may contain embedded nul characters.
881/// ## `utf8string`
882/// a UTF-8 encoded string
883/// ## `len`
884/// the length of the string, or -1 if the string is
885///                 nul-terminated.
886///
887/// # Returns
888///
889///
890///          A newly-allocated buffer containing the converted string,
891///          or [`None`] on an error, and error will be set.
892///
893/// ## `bytes_read`
894/// location to store the number of bytes in the
895///                 input string that were successfully converted, or [`None`].
896///                 Even if the conversion was successful, this may be
897///                 less than @len if there were partial characters
898///                 at the end of the input. If the error
899///                 [`ConvertError::IllegalSequence`][crate::ConvertError::IllegalSequence] occurs, the value
900///                 stored will be the byte offset after the last valid
901///                 input sequence.
902// rustdoc-stripper-ignore-next-stop
903/// Converts a string from UTF-8 to the encoding used for strings by
904/// the C runtime (usually the same as that used by the operating
905/// system) in the [current locale](running.html#locale).
906/// On Windows this means the system codepage.
907///
908/// The input string shall not contain nul characters even if the @len
909/// argument is positive. A nul character found inside the string will result
910/// in error [`ConvertError::IllegalSequence`][crate::ConvertError::IllegalSequence]. Use g_convert() to convert
911/// input that may contain embedded nul characters.
912/// ## `utf8string`
913/// a UTF-8 encoded string
914/// ## `len`
915/// the length of the string, or -1 if the string is
916///                 nul-terminated.
917///
918/// # Returns
919///
920///
921///          A newly-allocated buffer containing the converted string,
922///          or [`None`] on an error, and error will be set.
923///
924/// ## `bytes_read`
925/// location to store the number of bytes in the
926///                 input string that were successfully converted, or [`None`].
927///                 Even if the conversion was successful, this may be
928///                 less than @len if there were partial characters
929///                 at the end of the input. If the error
930///                 [`ConvertError::IllegalSequence`][crate::ConvertError::IllegalSequence] occurs, the value
931///                 stored will be the byte offset after the last valid
932///                 input sequence.
933#[doc(alias = "g_locale_from_utf8")]
934pub fn locale_from_utf8(utf8string: impl IntoGStr) -> Result<(Slice<u8>, usize), CvtError> {
935    let mut bytes_read = 0;
936    let mut bytes_written = std::mem::MaybeUninit::uninit();
937    let mut error = ptr::null_mut();
938    let ret = utf8string.run_with_gstr(|utf8string| {
939        assert!(utf8string.len() <= isize::MAX as usize);
940        unsafe {
941            ffi::g_locale_from_utf8(
942                utf8string.as_ptr(),
943                utf8string.len() as isize,
944                &mut bytes_read,
945                bytes_written.as_mut_ptr(),
946                &mut error,
947            )
948        }
949    });
950    if error.is_null() {
951        Ok(unsafe {
952            (
953                Slice::from_glib_full_num(ret, bytes_written.assume_init() + 1),
954                bytes_read,
955            )
956        })
957    } else {
958        Err(unsafe { CvtError::new(from_glib_full(error), bytes_read) })
959    }
960}
961
962/// Converts a string which is in the encoding used for strings by
963/// the C runtime (usually the same as that used by the operating
964/// system) in the [current locale](running.html#locale) into a UTF-8 string.
965///
966/// If the source encoding is not UTF-8 and the conversion output contains a
967/// nul character, the error [`ConvertError::EmbeddedNul`][crate::ConvertError::EmbeddedNul] is set and the
968/// function returns [`None`].
969/// If the source encoding is UTF-8, an embedded nul character is treated with
970/// the [`ConvertError::IllegalSequence`][crate::ConvertError::IllegalSequence] error for backward compatibility with
971/// earlier versions of this library. Use g_convert() to produce output that
972/// may contain embedded nul characters.
973/// ## `opsysstring`
974/// a string in the
975///                 encoding of the current locale. On Windows
976///                 this means the system codepage.
977///
978/// # Returns
979///
980/// The converted string, or [`None`] on an error.
981///
982/// ## `bytes_read`
983/// location to store the number of bytes in the
984///                 input string that were successfully converted, or [`None`].
985///                 Even if the conversion was successful, this may be
986///                 less than @len if there were partial characters
987///                 at the end of the input. If the error
988///                 [`ConvertError::IllegalSequence`][crate::ConvertError::IllegalSequence] occurs, the value
989///                 stored will be the byte offset after the last valid
990///                 input sequence.
991///
992/// ## `bytes_written`
993/// the number of bytes stored in the output
994///                 buffer (not including the terminating nul).
995// rustdoc-stripper-ignore-next-stop
996/// Converts a string which is in the encoding used for strings by
997/// the C runtime (usually the same as that used by the operating
998/// system) in the [current locale](running.html#locale) into a UTF-8 string.
999///
1000/// If the source encoding is not UTF-8 and the conversion output contains a
1001/// nul character, the error [`ConvertError::EmbeddedNul`][crate::ConvertError::EmbeddedNul] is set and the
1002/// function returns [`None`].
1003/// If the source encoding is UTF-8, an embedded nul character is treated with
1004/// the [`ConvertError::IllegalSequence`][crate::ConvertError::IllegalSequence] error for backward compatibility with
1005/// earlier versions of this library. Use g_convert() to produce output that
1006/// may contain embedded nul characters.
1007/// ## `opsysstring`
1008/// a string in the
1009///                 encoding of the current locale. On Windows
1010///                 this means the system codepage.
1011///
1012/// # Returns
1013///
1014/// The converted string, or [`None`] on an error.
1015///
1016/// ## `bytes_read`
1017/// location to store the number of bytes in the
1018///                 input string that were successfully converted, or [`None`].
1019///                 Even if the conversion was successful, this may be
1020///                 less than @len if there were partial characters
1021///                 at the end of the input. If the error
1022///                 [`ConvertError::IllegalSequence`][crate::ConvertError::IllegalSequence] occurs, the value
1023///                 stored will be the byte offset after the last valid
1024///                 input sequence.
1025///
1026/// ## `bytes_written`
1027/// the number of bytes stored in the output
1028///                 buffer (not including the terminating nul).
1029#[doc(alias = "g_locale_to_utf8")]
1030pub fn locale_to_utf8(opsysstring: &[u8]) -> Result<(crate::GString, usize), CvtError> {
1031    let len = opsysstring.len() as isize;
1032    let mut bytes_read = 0;
1033    let mut bytes_written = std::mem::MaybeUninit::uninit();
1034    let mut error = ptr::null_mut();
1035    let ret = unsafe {
1036        ffi::g_locale_to_utf8(
1037            opsysstring.to_glib_none().0,
1038            len,
1039            &mut bytes_read,
1040            bytes_written.as_mut_ptr(),
1041            &mut error,
1042        )
1043    };
1044    if error.is_null() {
1045        Ok(unsafe {
1046            (
1047                GString::from_glib_full_num(ret, bytes_written.assume_init()),
1048                bytes_read,
1049            )
1050        })
1051    } else {
1052        Err(unsafe { CvtError::new(from_glib_full(error), bytes_read) })
1053    }
1054}
1055
1056#[doc(alias = "g_utf8_to_ucs4")]
1057#[doc(alias = "g_utf8_to_ucs4_fast")]
1058#[doc(alias = "utf8_to_ucs4")]
1059pub fn utf8_to_utf32(str: impl AsRef<str>) -> Slice<char> {
1060    unsafe {
1061        let mut items_written = 0;
1062
1063        let str_as_utf32 = ffi::g_utf8_to_ucs4_fast(
1064            str.as_ref().as_ptr().cast::<c_char>(),
1065            str.as_ref().len() as _,
1066            &mut items_written,
1067        );
1068
1069        // NOTE: We assume that u32 and char have the same layout and trust that glib won't give us
1070        //       invalid UTF-32 codepoints
1071        Slice::from_glib_full_num(str_as_utf32, items_written as usize)
1072    }
1073}
1074
1075#[doc(alias = "g_ucs4_to_utf8")]
1076#[doc(alias = "ucs4_to_utf8")]
1077pub fn utf32_to_utf8(str: impl AsRef<[char]>) -> GString {
1078    let mut items_read = 0;
1079    let mut items_written = 0;
1080    let mut error = ptr::null_mut();
1081
1082    unsafe {
1083        let str_as_utf8 = ffi::g_ucs4_to_utf8(
1084            str.as_ref().as_ptr().cast::<u32>(),
1085            str.as_ref().len() as _,
1086            &mut items_read,
1087            &mut items_written,
1088            &mut error,
1089        );
1090
1091        debug_assert!(
1092            error.is_null(),
1093            "Rust `char` should always be convertible to UTF-8"
1094        );
1095
1096        GString::from_glib_full_num(str_as_utf8, items_written as usize)
1097    }
1098}
1099
1100#[doc(alias = "g_utf8_casefold")]
1101#[doc(alias = "utf8_casefold")]
1102pub fn casefold(str: impl AsRef<str>) -> GString {
1103    unsafe {
1104        let str = ffi::g_utf8_casefold(str.as_ref().as_ptr().cast(), str.as_ref().len() as isize);
1105
1106        from_glib_full(str)
1107    }
1108}
1109
1110#[doc(alias = "g_utf8_normalize")]
1111#[doc(alias = "utf8_normalize")]
1112pub fn normalize(str: impl AsRef<str>, mode: NormalizeMode) -> GString {
1113    unsafe {
1114        let str = ffi::g_utf8_normalize(
1115            str.as_ref().as_ptr().cast(),
1116            str.as_ref().len() as isize,
1117            mode.into_glib(),
1118        );
1119
1120        from_glib_full(str)
1121    }
1122}
1123
1124#[cfg(test)]
1125mod tests {
1126    #[test]
1127    fn convert_ascii() {
1128        assert!(super::convert(b"Hello", "utf-8", "ascii").is_ok());
1129        assert!(super::convert(b"He\xaallo", "utf-8", "ascii").is_err());
1130        assert_eq!(
1131            super::convert_with_fallback(b"H\xc3\xa9llo", "ascii", "utf-8", crate::NONE_STR)
1132                .unwrap()
1133                .0
1134                .as_slice(),
1135            b"H\\u00e9llo"
1136        );
1137        assert_eq!(
1138            super::convert_with_fallback(b"H\xc3\xa9llo", "ascii", "utf-8", Some("_"))
1139                .unwrap()
1140                .0
1141                .as_slice(),
1142            b"H_llo"
1143        );
1144    }
1145    #[test]
1146    fn iconv() {
1147        let mut conv = super::IConv::new("utf-8", "ascii").unwrap();
1148        assert!(conv.convert(b"Hello").is_ok());
1149        assert!(conv.convert(b"He\xaallo").is_err());
1150        assert!(super::IConv::new("utf-8", "badcharset123456789").is_none());
1151    }
1152    #[test]
1153    fn filename_charsets() {
1154        let _ = super::filename_charsets();
1155    }
1156
1157    #[test]
1158    fn utf8_and_utf32() {
1159        let utf32 = ['A', 'b', '🤔'];
1160        let utf8 = super::utf32_to_utf8(utf32);
1161        assert_eq!(utf8, "Ab🤔");
1162
1163        let utf8 = "🤔 ț";
1164        let utf32 = super::utf8_to_utf32(utf8);
1165        assert_eq!(utf32.as_slice(), &['🤔', ' ', 'ț']);
1166    }
1167}