glib/convert.rs
1// Take a look at the license at the top of the repository in the LICENSE file.
2
3use std::{fmt, io, os::raw::c_char, path::PathBuf, ptr};
4
5use crate::{ffi, translate::*, ConvertError, Error, GString, NormalizeMode, Slice};
6
7// rustdoc-stripper-ignore-next
8/// A wrapper for [`ConvertError`](crate::ConvertError) that can hold an offset into the input
9/// string.
10#[derive(Debug)]
11pub enum CvtError {
12 Convert(Error),
13 IllegalSequence { source: Error, offset: usize },
14}
15
16impl std::error::Error for CvtError {
17 fn source(&self) -> ::core::option::Option<&(dyn std::error::Error + 'static)> {
18 match self {
19 CvtError::Convert(err) => std::error::Error::source(err),
20 CvtError::IllegalSequence { source, .. } => Some(source),
21 }
22 }
23}
24
25impl fmt::Display for CvtError {
26 fn fmt(&self, fmt: &mut fmt::Formatter) -> ::core::fmt::Result {
27 match self {
28 CvtError::Convert(err) => fmt::Display::fmt(err, fmt),
29 CvtError::IllegalSequence { source, offset } => {
30 write!(fmt, "{source} at offset {offset}")
31 }
32 }
33 }
34}
35
36impl std::convert::From<Error> for CvtError {
37 fn from(err: Error) -> Self {
38 CvtError::Convert(err)
39 }
40}
41
42impl CvtError {
43 #[inline]
44 fn new(err: Error, bytes_read: usize) -> Self {
45 if err.kind::<ConvertError>() == Some(ConvertError::IllegalSequence) {
46 Self::IllegalSequence {
47 source: err,
48 offset: bytes_read,
49 }
50 } else {
51 err.into()
52 }
53 }
54}
55
56/// Converts a string from one character set to another.
57///
58/// Note that you should use g_iconv() for streaming conversions.
59/// Despite the fact that @bytes_read can return information about partial
60/// characters, the g_convert_... functions are not generally suitable
61/// for streaming. If the underlying converter maintains internal state,
62/// then this won't be preserved across successive calls to g_convert(),
63/// g_convert_with_iconv() or g_convert_with_fallback(). (An example of
64/// this is the GNU C converter for CP1255 which does not emit a base
65/// character until it knows that the next character is not a mark that
66/// could combine with the base character.)
67///
68/// Using extensions such as "//TRANSLIT" may not work (or may not work
69/// well) on many platforms. Consider using g_str_to_ascii() instead.
70/// ## `str`
71///
72/// the string to convert.
73/// ## `to_codeset`
74/// name of character set into which to convert @str
75/// ## `from_codeset`
76/// character set of @str.
77///
78/// # Returns
79///
80///
81/// If the conversion was successful, a newly allocated buffer
82/// containing the converted string, which must be freed with g_free().
83/// Otherwise [`None`] and @error will be set.
84///
85/// ## `bytes_read`
86/// location to store the number of bytes in
87/// the input string that were successfully converted, or [`None`].
88/// Even if the conversion was successful, this may be
89/// less than @len if there were partial characters
90/// at the end of the input. If the error
91/// [`ConvertError::IllegalSequence`][crate::ConvertError::IllegalSequence] occurs, the value
92/// stored will be the byte offset after the last valid
93/// input sequence.
94#[doc(alias = "g_convert")]
95pub fn convert(
96 str_: &[u8],
97 to_codeset: impl IntoGStr,
98 from_codeset: impl IntoGStr,
99) -> Result<(Slice<u8>, usize), CvtError> {
100 assert!(str_.len() <= isize::MAX as usize);
101 let mut bytes_read = 0;
102 let mut bytes_written = 0;
103 let mut error = ptr::null_mut();
104 let result = to_codeset.run_with_gstr(|to_codeset| {
105 from_codeset.run_with_gstr(|from_codeset| unsafe {
106 ffi::g_convert(
107 str_.as_ptr(),
108 str_.len() as isize,
109 to_codeset.to_glib_none().0,
110 from_codeset.to_glib_none().0,
111 &mut bytes_read,
112 &mut bytes_written,
113 &mut error,
114 )
115 })
116 });
117 if result.is_null() {
118 Err(CvtError::new(unsafe { from_glib_full(error) }, bytes_read))
119 } else {
120 let slice = unsafe { Slice::from_glib_full_num(result, bytes_written as _) };
121 Ok((slice, bytes_read))
122 }
123}
124
125/// Converts a string from one character set to another, possibly
126/// including fallback sequences for characters not representable
127/// in the output. Note that it is not guaranteed that the specification
128/// for the fallback sequences in @fallback will be honored. Some
129/// systems may do an approximate conversion from @from_codeset
130/// to @to_codeset in their iconv() functions,
131/// in which case GLib will simply return that approximate conversion.
132///
133/// Note that you should use g_iconv() for streaming conversions.
134/// Despite the fact that @bytes_read can return information about partial
135/// characters, the g_convert_... functions are not generally suitable
136/// for streaming. If the underlying converter maintains internal state,
137/// then this won't be preserved across successive calls to g_convert(),
138/// g_convert_with_iconv() or g_convert_with_fallback(). (An example of
139/// this is the GNU C converter for CP1255 which does not emit a base
140/// character until it knows that the next character is not a mark that
141/// could combine with the base character.)
142/// ## `str`
143///
144/// the string to convert.
145/// ## `to_codeset`
146/// name of character set into which to convert @str
147/// ## `from_codeset`
148/// character set of @str.
149/// ## `fallback`
150/// UTF-8 string to use in place of characters not
151/// present in the target encoding. (The string must be
152/// representable in the target encoding).
153/// If [`None`], characters not in the target encoding will
154/// be represented as Unicode escapes \uxxxx or \Uxxxxyyyy.
155///
156/// # Returns
157///
158///
159/// If the conversion was successful, a newly allocated buffer
160/// containing the converted string, which must be freed with g_free().
161/// Otherwise [`None`] and @error will be set.
162///
163/// ## `bytes_read`
164/// location to store the number of bytes in
165/// the input string that were successfully converted, or [`None`].
166/// Even if the conversion was successful, this may be
167/// less than @len if there were partial characters
168/// at the end of the input.
169#[doc(alias = "g_convert_with_fallback")]
170pub fn convert_with_fallback(
171 str_: &[u8],
172 to_codeset: impl IntoGStr,
173 from_codeset: impl IntoGStr,
174 fallback: Option<impl IntoGStr>,
175) -> Result<(Slice<u8>, usize), CvtError> {
176 assert!(str_.len() <= isize::MAX as usize);
177 let mut bytes_read = 0;
178 let mut bytes_written = 0;
179 let mut error = ptr::null_mut();
180 let result = to_codeset.run_with_gstr(|to_codeset| {
181 from_codeset.run_with_gstr(|from_codeset| {
182 fallback.run_with_gstr(|fallback| unsafe {
183 ffi::g_convert_with_fallback(
184 str_.as_ptr(),
185 str_.len() as isize,
186 to_codeset.to_glib_none().0,
187 from_codeset.to_glib_none().0,
188 fallback.to_glib_none().0,
189 &mut bytes_read,
190 &mut bytes_written,
191 &mut error,
192 )
193 })
194 })
195 });
196 if result.is_null() {
197 Err(CvtError::new(unsafe { from_glib_full(error) }, bytes_read))
198 } else {
199 let slice = unsafe { Slice::from_glib_full_num(result, bytes_written as _) };
200 Ok((slice, bytes_read))
201 }
202}
203
204// rustdoc-stripper-ignore-next
205/// A wrapper for [`std::io::Error`] that can hold an offset into an input string.
206#[derive(Debug)]
207pub enum IConvError {
208 Error(io::Error),
209 WithOffset { source: io::Error, offset: usize },
210}
211
212impl std::error::Error for IConvError {
213 fn source(&self) -> Option<&(dyn std::error::Error + 'static)> {
214 match self {
215 IConvError::Error(err) => std::error::Error::source(err),
216 IConvError::WithOffset { source, .. } => Some(source),
217 }
218 }
219}
220
221impl fmt::Display for IConvError {
222 fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
223 match self {
224 IConvError::Error(err) => fmt::Display::fmt(err, fmt),
225 IConvError::WithOffset { source, offset } => write!(fmt, "{source} at offset {offset}"),
226 }
227 }
228}
229
230impl std::convert::From<io::Error> for IConvError {
231 fn from(err: io::Error) -> Self {
232 IConvError::Error(err)
233 }
234}
235
236/// The GIConv struct wraps an iconv() conversion descriptor. It contains
237/// private data and should only be accessed using the following functions.
238#[derive(Debug)]
239#[repr(transparent)]
240#[doc(alias = "GIConv")]
241pub struct IConv(ffi::GIConv);
242
243unsafe impl Send for IConv {}
244
245impl IConv {
246 /// Same as the standard UNIX routine iconv_open(), but
247 /// may be implemented via libiconv on UNIX flavors that lack
248 /// a native implementation.
249 ///
250 /// GLib provides g_convert() and g_locale_to_utf8() which are likely
251 /// more convenient than the raw iconv wrappers.
252 /// ## `to_codeset`
253 /// destination codeset
254 /// ## `from_codeset`
255 /// source codeset
256 ///
257 /// # Returns
258 ///
259 /// a "conversion descriptor", or (GIConv)-1 if
260 /// opening the converter failed.
261 #[doc(alias = "g_iconv_open")]
262 #[allow(clippy::unnecessary_lazy_evaluations)]
263 pub fn new(to_codeset: impl IntoGStr, from_codeset: impl IntoGStr) -> Option<Self> {
264 let iconv = to_codeset.run_with_gstr(|to_codeset| {
265 from_codeset.run_with_gstr(|from_codeset| unsafe {
266 ffi::g_iconv_open(to_codeset.to_glib_none().0, from_codeset.to_glib_none().0)
267 })
268 });
269 (iconv as isize != -1).then(|| Self(iconv))
270 }
271 /// Converts a string from one character set to another.
272 ///
273 /// Note that you should use g_iconv() for streaming conversions.
274 /// Despite the fact that @bytes_read can return information about partial
275 /// characters, the g_convert_... functions are not generally suitable
276 /// for streaming. If the underlying converter maintains internal state,
277 /// then this won't be preserved across successive calls to g_convert(),
278 /// g_convert_with_iconv() or g_convert_with_fallback(). (An example of
279 /// this is the GNU C converter for CP1255 which does not emit a base
280 /// character until it knows that the next character is not a mark that
281 /// could combine with the base character.)
282 ///
283 /// Characters which are valid in the input character set, but which have no
284 /// representation in the output character set will result in a
285 /// [`ConvertError::IllegalSequence`][crate::ConvertError::IllegalSequence] error. This is in contrast to the iconv()
286 /// specification, which leaves this behaviour implementation defined. Note that
287 /// this is the same error code as is returned for an invalid byte sequence in
288 /// the input character set. To get defined behaviour for conversion of
289 /// unrepresentable characters, use g_convert_with_fallback().
290 /// ## `str`
291 ///
292 /// the string to convert.
293 /// ## `converter`
294 /// conversion descriptor from g_iconv_open()
295 ///
296 /// # Returns
297 ///
298 ///
299 /// If the conversion was successful, a newly allocated buffer
300 /// containing the converted string, which must be freed with
301 /// g_free(). Otherwise [`None`] and @error will be set.
302 ///
303 /// ## `bytes_read`
304 /// location to store the number of bytes in
305 /// the input string that were successfully converted, or [`None`].
306 /// Even if the conversion was successful, this may be
307 /// less than @len if there were partial characters
308 /// at the end of the input. If the error
309 /// [`ConvertError::IllegalSequence`][crate::ConvertError::IllegalSequence] occurs, the value
310 /// stored will be the byte offset after the last valid
311 /// input sequence.
312 #[doc(alias = "g_convert_with_iconv")]
313 pub fn convert(&mut self, str_: &[u8]) -> Result<(Slice<u8>, usize), CvtError> {
314 assert!(str_.len() <= isize::MAX as usize);
315 let mut bytes_read = 0;
316 let mut bytes_written = 0;
317 let mut error = ptr::null_mut();
318 let result = unsafe {
319 ffi::g_convert_with_iconv(
320 str_.as_ptr(),
321 str_.len() as isize,
322 self.0,
323 &mut bytes_read,
324 &mut bytes_written,
325 &mut error,
326 )
327 };
328 if result.is_null() {
329 Err(CvtError::new(unsafe { from_glib_full(error) }, bytes_read))
330 } else {
331 let slice = unsafe { Slice::from_glib_full_num(result, bytes_written as _) };
332 Ok((slice, bytes_read))
333 }
334 }
335 /// Same as the standard UNIX routine iconv(), but
336 /// may be implemented via libiconv on UNIX flavors that lack
337 /// a native implementation.
338 ///
339 /// GLib provides g_convert() and g_locale_to_utf8() which are likely
340 /// more convenient than the raw iconv wrappers.
341 ///
342 /// Note that the behaviour of iconv() for characters which are valid in the
343 /// input character set, but which have no representation in the output character
344 /// set, is implementation defined. This function may return success (with a
345 /// positive number of non-reversible conversions as replacement characters were
346 /// used), or it may return -1 and set an error such as `EILSEQ`, in such a
347 /// situation.
348 ///
349 /// See [`iconv(3posix)`](man:iconv(3posix)) and [`iconv(3)`](man:iconv(3)) for more details about behavior when an
350 /// error occurs.
351 /// ## `converter`
352 /// conversion descriptor from g_iconv_open()
353 /// ## `inbuf`
354 /// bytes to convert
355 /// ## `inbytes_left`
356 /// inout parameter, bytes remaining to convert in @inbuf
357 /// ## `outbuf`
358 /// converted output bytes
359 /// ## `outbytes_left`
360 /// inout parameter, bytes available to fill in @outbuf
361 ///
362 /// # Returns
363 ///
364 /// count of non-reversible conversions, or -1 on error
365 #[doc(alias = "g_iconv")]
366 pub fn iconv(
367 &mut self,
368 inbuf: Option<&[u8]>,
369 outbuf: Option<&mut [std::mem::MaybeUninit<u8>]>,
370 ) -> Result<(usize, usize, usize), IConvError> {
371 let input_len = inbuf.as_ref().map(|b| b.len()).unwrap_or_default();
372 let mut inbytes_left = input_len;
373 let mut outbytes_left = outbuf.as_ref().map(|b| b.len()).unwrap_or_default();
374 let mut inbuf = inbuf
375 .map(|b| mut_override(b.as_ptr()) as *mut c_char)
376 .unwrap_or_else(ptr::null_mut);
377 let mut outbuf = outbuf
378 .map(|b| b.as_mut_ptr() as *mut c_char)
379 .unwrap_or_else(ptr::null_mut);
380 let conversions = unsafe {
381 ffi::g_iconv(
382 self.0,
383 &mut inbuf,
384 &mut inbytes_left,
385 &mut outbuf,
386 &mut outbytes_left,
387 )
388 };
389 if conversions as isize == -1 {
390 let err = io::Error::last_os_error();
391 let code = err.raw_os_error().unwrap();
392 if code == libc::EILSEQ || code == libc::EINVAL {
393 Err(IConvError::WithOffset {
394 source: err,
395 offset: input_len - inbytes_left,
396 })
397 } else {
398 Err(err.into())
399 }
400 } else {
401 Ok((conversions, inbytes_left, outbytes_left))
402 }
403 }
404}
405
406impl Drop for IConv {
407 #[inline]
408 fn drop(&mut self) {
409 unsafe {
410 ffi::g_iconv_close(self.0);
411 }
412 }
413}
414
415/// Determines the preferred character sets used for filenames.
416/// The first character set from the @charsets is the filename encoding, the
417/// subsequent character sets are used when trying to generate a displayable
418/// representation of a filename, see g_filename_display_name().
419///
420/// On Unix, the character sets are determined by consulting the
421/// environment variables `G_FILENAME_ENCODING` and `G_BROKEN_FILENAMES`.
422/// On Windows, the character set used in the GLib API is always UTF-8
423/// and said environment variables have no effect.
424///
425/// `G_FILENAME_ENCODING` may be set to a comma-separated list of
426/// character set names. The special token `@locale` is taken to mean the
427/// character set for the [current locale](running.html#locale).
428/// If `G_FILENAME_ENCODING` is not set, but `G_BROKEN_FILENAMES` is,
429/// the character set of the current locale is taken as the filename
430/// encoding. If neither environment variable is set, UTF-8 is taken
431/// as the filename encoding, but the character set of the current locale
432/// is also put in the list of encodings.
433///
434/// The returned @charsets belong to GLib and must not be freed.
435///
436/// Note that on Unix, regardless of the locale character set or
437/// `G_FILENAME_ENCODING` value, the actual file names present
438/// on a system might be in any random encoding or just gibberish.
439///
440/// # Returns
441///
442/// [`true`] if the filename encoding is UTF-8.
443///
444/// ## `filename_charsets`
445///
446/// return location for the [`None`]-terminated list of encoding names
447#[doc(alias = "g_get_filename_charsets")]
448#[doc(alias = "get_filename_charsets")]
449pub fn filename_charsets() -> (bool, Vec<GString>) {
450 let mut filename_charsets = ptr::null_mut();
451 unsafe {
452 let is_utf8 = ffi::g_get_filename_charsets(&mut filename_charsets);
453 (
454 from_glib(is_utf8),
455 FromGlibPtrContainer::from_glib_none(filename_charsets),
456 )
457 }
458}
459
460/// Converts a string from UTF-8 to the encoding GLib uses for
461/// filenames. Note that on Windows GLib uses UTF-8 for filenames;
462/// on other platforms, this function indirectly depends on the
463/// [current locale](running.html#locale).
464///
465/// The input string shall not contain nul characters even if the @len
466/// argument is positive. A nul character found inside the string will result
467/// in error [`ConvertError::IllegalSequence`][crate::ConvertError::IllegalSequence]. If the filename encoding is
468/// not UTF-8 and the conversion output contains a nul character, the error
469/// [`ConvertError::EmbeddedNul`][crate::ConvertError::EmbeddedNul] is set and the function returns [`None`].
470/// ## `utf8string`
471/// a UTF-8 encoded string.
472/// ## `len`
473/// the length of the string, or -1 if the string is
474/// nul-terminated.
475///
476/// # Returns
477///
478///
479/// The converted string, or [`None`] on an error.
480///
481/// ## `bytes_read`
482/// location to store the number of bytes in
483/// the input string that were successfully converted, or [`None`].
484/// Even if the conversion was successful, this may be
485/// less than @len if there were partial characters
486/// at the end of the input. If the error
487/// [`ConvertError::IllegalSequence`][crate::ConvertError::IllegalSequence] occurs, the value
488/// stored will be the byte offset after the last valid
489/// input sequence.
490///
491/// ## `bytes_written`
492/// the number of bytes stored in
493/// the output buffer (not including the terminating nul).
494#[doc(alias = "g_filename_from_utf8")]
495pub fn filename_from_utf8(utf8string: impl IntoGStr) -> Result<(PathBuf, usize), CvtError> {
496 let mut bytes_read = 0;
497 let mut bytes_written = std::mem::MaybeUninit::uninit();
498 let mut error = ptr::null_mut();
499 let ret = utf8string.run_with_gstr(|utf8string| {
500 assert!(utf8string.len() <= isize::MAX as usize);
501 let len = utf8string.len() as isize;
502 unsafe {
503 ffi::g_filename_from_utf8(
504 utf8string.to_glib_none().0,
505 len,
506 &mut bytes_read,
507 bytes_written.as_mut_ptr(),
508 &mut error,
509 )
510 }
511 });
512 if error.is_null() {
513 Ok(unsafe {
514 (
515 PathBuf::from_glib_full_num(ret, bytes_written.assume_init()),
516 bytes_read,
517 )
518 })
519 } else {
520 Err(unsafe { CvtError::new(from_glib_full(error), bytes_read) })
521 }
522}
523
524/// Converts a string which is in the encoding used by GLib for
525/// filenames into a UTF-8 string. Note that on Windows GLib uses UTF-8
526/// for filenames; on other platforms, this function indirectly depends on
527/// the [current locale](running.html#locale).
528///
529/// The input string shall not contain nul characters even if the @len
530/// argument is positive. A nul character found inside the string will result
531/// in error [`ConvertError::IllegalSequence`][crate::ConvertError::IllegalSequence].
532/// If the source encoding is not UTF-8 and the conversion output contains a
533/// nul character, the error [`ConvertError::EmbeddedNul`][crate::ConvertError::EmbeddedNul] is set and the
534/// function returns [`None`]. Use g_convert() to produce output that
535/// may contain embedded nul characters.
536/// ## `opsysstring`
537/// a string in the encoding for filenames
538/// ## `len`
539/// the length of the string, or -1 if the string is
540/// nul-terminated (Note that some encodings may allow nul
541/// bytes to occur inside strings. In that case, using -1
542/// for the @len parameter is unsafe)
543///
544/// # Returns
545///
546/// The converted string, or [`None`] on an error.
547///
548/// ## `bytes_read`
549/// location to store the number of bytes in the
550/// input string that were successfully converted, or [`None`].
551/// Even if the conversion was successful, this may be
552/// less than @len if there were partial characters
553/// at the end of the input. If the error
554/// [`ConvertError::IllegalSequence`][crate::ConvertError::IllegalSequence] occurs, the value
555/// stored will be the byte offset after the last valid
556/// input sequence.
557///
558/// ## `bytes_written`
559/// the number of bytes stored in the output
560/// buffer (not including the terminating nul).
561#[doc(alias = "g_filename_to_utf8")]
562pub fn filename_to_utf8(
563 opsysstring: impl AsRef<std::path::Path>,
564) -> Result<(crate::GString, usize), CvtError> {
565 let path = opsysstring.as_ref().to_glib_none();
566 let mut bytes_read = 0;
567 let mut bytes_written = std::mem::MaybeUninit::uninit();
568 let mut error = ptr::null_mut();
569 let ret = unsafe {
570 ffi::g_filename_to_utf8(
571 path.0,
572 path.1.as_bytes().len() as isize,
573 &mut bytes_read,
574 bytes_written.as_mut_ptr(),
575 &mut error,
576 )
577 };
578 if error.is_null() {
579 Ok(unsafe {
580 (
581 GString::from_glib_full_num(ret, bytes_written.assume_init()),
582 bytes_read,
583 )
584 })
585 } else {
586 Err(unsafe { CvtError::new(from_glib_full(error), bytes_read) })
587 }
588}
589
590/// Converts a string from UTF-8 to the encoding used for strings by
591/// the C runtime (usually the same as that used by the operating
592/// system) in the [current locale](running.html#locale).
593/// On Windows this means the system codepage.
594///
595/// The input string shall not contain nul characters even if the @len
596/// argument is positive. A nul character found inside the string will result
597/// in error [`ConvertError::IllegalSequence`][crate::ConvertError::IllegalSequence]. Use g_convert() to convert
598/// input that may contain embedded nul characters.
599/// ## `utf8string`
600/// a UTF-8 encoded string
601/// ## `len`
602/// the length of the string, or -1 if the string is
603/// nul-terminated.
604///
605/// # Returns
606///
607///
608/// A newly-allocated buffer containing the converted string,
609/// or [`None`] on an error, and error will be set.
610///
611/// ## `bytes_read`
612/// location to store the number of bytes in the
613/// input string that were successfully converted, or [`None`].
614/// Even if the conversion was successful, this may be
615/// less than @len if there were partial characters
616/// at the end of the input. If the error
617/// [`ConvertError::IllegalSequence`][crate::ConvertError::IllegalSequence] occurs, the value
618/// stored will be the byte offset after the last valid
619/// input sequence.
620#[doc(alias = "g_locale_from_utf8")]
621pub fn locale_from_utf8(utf8string: impl IntoGStr) -> Result<(Slice<u8>, usize), CvtError> {
622 let mut bytes_read = 0;
623 let mut bytes_written = std::mem::MaybeUninit::uninit();
624 let mut error = ptr::null_mut();
625 let ret = utf8string.run_with_gstr(|utf8string| {
626 assert!(utf8string.len() <= isize::MAX as usize);
627 unsafe {
628 ffi::g_locale_from_utf8(
629 utf8string.as_ptr(),
630 utf8string.len() as isize,
631 &mut bytes_read,
632 bytes_written.as_mut_ptr(),
633 &mut error,
634 )
635 }
636 });
637 if error.is_null() {
638 Ok(unsafe {
639 (
640 Slice::from_glib_full_num(ret, bytes_written.assume_init() + 1),
641 bytes_read,
642 )
643 })
644 } else {
645 Err(unsafe { CvtError::new(from_glib_full(error), bytes_read) })
646 }
647}
648
649/// Converts a string which is in the encoding used for strings by
650/// the C runtime (usually the same as that used by the operating
651/// system) in the [current locale](running.html#locale) into a UTF-8 string.
652///
653/// If the source encoding is not UTF-8 and the conversion output contains a
654/// nul character, the error [`ConvertError::EmbeddedNul`][crate::ConvertError::EmbeddedNul] is set and the
655/// function returns [`None`].
656/// If the source encoding is UTF-8, an embedded nul character is treated with
657/// the [`ConvertError::IllegalSequence`][crate::ConvertError::IllegalSequence] error for backward compatibility with
658/// earlier versions of this library. Use g_convert() to produce output that
659/// may contain embedded nul characters.
660/// ## `opsysstring`
661/// a string in the
662/// encoding of the current locale. On Windows
663/// this means the system codepage.
664///
665/// # Returns
666///
667/// The converted string, or [`None`] on an error.
668///
669/// ## `bytes_read`
670/// location to store the number of bytes in the
671/// input string that were successfully converted, or [`None`].
672/// Even if the conversion was successful, this may be
673/// less than @len if there were partial characters
674/// at the end of the input. If the error
675/// [`ConvertError::IllegalSequence`][crate::ConvertError::IllegalSequence] occurs, the value
676/// stored will be the byte offset after the last valid
677/// input sequence.
678///
679/// ## `bytes_written`
680/// the number of bytes stored in the output
681/// buffer (not including the terminating nul).
682#[doc(alias = "g_locale_to_utf8")]
683pub fn locale_to_utf8(opsysstring: &[u8]) -> Result<(crate::GString, usize), CvtError> {
684 let len = opsysstring.len() as isize;
685 let mut bytes_read = 0;
686 let mut bytes_written = std::mem::MaybeUninit::uninit();
687 let mut error = ptr::null_mut();
688 let ret = unsafe {
689 ffi::g_locale_to_utf8(
690 opsysstring.to_glib_none().0,
691 len,
692 &mut bytes_read,
693 bytes_written.as_mut_ptr(),
694 &mut error,
695 )
696 };
697 if error.is_null() {
698 Ok(unsafe {
699 (
700 GString::from_glib_full_num(ret, bytes_written.assume_init()),
701 bytes_read,
702 )
703 })
704 } else {
705 Err(unsafe { CvtError::new(from_glib_full(error), bytes_read) })
706 }
707}
708
709#[doc(alias = "g_utf8_to_ucs4")]
710#[doc(alias = "g_utf8_to_ucs4_fast")]
711#[doc(alias = "utf8_to_ucs4")]
712pub fn utf8_to_utf32(str: impl AsRef<str>) -> Slice<char> {
713 unsafe {
714 let mut items_written = 0;
715
716 let str_as_utf32 = ffi::g_utf8_to_ucs4_fast(
717 str.as_ref().as_ptr().cast::<c_char>(),
718 str.as_ref().len() as _,
719 &mut items_written,
720 );
721
722 // NOTE: We assume that u32 and char have the same layout and trust that glib won't give us
723 // invalid UTF-32 codepoints
724 Slice::from_glib_full_num(str_as_utf32, items_written as usize)
725 }
726}
727
728#[doc(alias = "g_ucs4_to_utf8")]
729#[doc(alias = "ucs4_to_utf8")]
730pub fn utf32_to_utf8(str: impl AsRef<[char]>) -> GString {
731 let mut items_read = 0;
732 let mut items_written = 0;
733 let mut error = ptr::null_mut();
734
735 unsafe {
736 let str_as_utf8 = ffi::g_ucs4_to_utf8(
737 str.as_ref().as_ptr().cast::<u32>(),
738 str.as_ref().len() as _,
739 &mut items_read,
740 &mut items_written,
741 &mut error,
742 );
743
744 debug_assert!(
745 error.is_null(),
746 "Rust `char` should always be convertible to UTF-8"
747 );
748
749 GString::from_glib_full_num(str_as_utf8, items_written as usize)
750 }
751}
752
753#[doc(alias = "g_utf8_casefold")]
754#[doc(alias = "utf8_casefold")]
755pub fn casefold(str: impl AsRef<str>) -> GString {
756 unsafe {
757 let str = ffi::g_utf8_casefold(str.as_ref().as_ptr().cast(), str.as_ref().len() as isize);
758
759 from_glib_full(str)
760 }
761}
762
763#[doc(alias = "g_utf8_normalize")]
764#[doc(alias = "utf8_normalize")]
765pub fn normalize(str: impl AsRef<str>, mode: NormalizeMode) -> GString {
766 unsafe {
767 let str = ffi::g_utf8_normalize(
768 str.as_ref().as_ptr().cast(),
769 str.as_ref().len() as isize,
770 mode.into_glib(),
771 );
772
773 from_glib_full(str)
774 }
775}
776
777#[cfg(test)]
778mod tests {
779 #[test]
780 fn convert_ascii() {
781 assert!(super::convert(b"Hello", "utf-8", "ascii").is_ok());
782 assert!(super::convert(b"He\xaallo", "utf-8", "ascii").is_err());
783 assert_eq!(
784 super::convert_with_fallback(b"H\xc3\xa9llo", "ascii", "utf-8", crate::NONE_STR)
785 .unwrap()
786 .0
787 .as_slice(),
788 b"H\\u00e9llo"
789 );
790 assert_eq!(
791 super::convert_with_fallback(b"H\xc3\xa9llo", "ascii", "utf-8", Some("_"))
792 .unwrap()
793 .0
794 .as_slice(),
795 b"H_llo"
796 );
797 }
798 #[test]
799 fn iconv() {
800 let mut conv = super::IConv::new("utf-8", "ascii").unwrap();
801 assert!(conv.convert(b"Hello").is_ok());
802 assert!(conv.convert(b"He\xaallo").is_err());
803 assert!(super::IConv::new("utf-8", "badcharset123456789").is_none());
804 }
805 #[test]
806 fn filename_charsets() {
807 let _ = super::filename_charsets();
808 }
809
810 #[test]
811 fn utf8_and_utf32() {
812 let utf32 = ['A', 'b', '🤔'];
813 let utf8 = super::utf32_to_utf8(utf32);
814 assert_eq!(utf8, "Ab🤔");
815
816 let utf8 = "🤔 ț";
817 let utf32 = super::utf8_to_utf32(utf8);
818 assert_eq!(utf32.as_slice(), &['🤔', ' ', 'ț']);
819 }
820}