glib/convert.rs
1// Take a look at the license at the top of the repository in the LICENSE file.
2
3use std::{fmt, io, os::raw::c_char, path::PathBuf, ptr};
4
5use crate::{ffi, translate::*, ConvertError, Error, GString, NormalizeMode, Slice};
6
7// rustdoc-stripper-ignore-next
8/// A wrapper for [`ConvertError`](crate::ConvertError) that can hold an offset into the input
9/// string.
10#[derive(Debug)]
11pub enum CvtError {
12 Convert(Error),
13 IllegalSequence { source: Error, offset: usize },
14}
15
16impl std::error::Error for CvtError {
17 fn source(&self) -> ::core::option::Option<&(dyn std::error::Error + 'static)> {
18 match self {
19 CvtError::Convert(err) => std::error::Error::source(err),
20 CvtError::IllegalSequence { source, .. } => Some(source),
21 }
22 }
23}
24
25impl fmt::Display for CvtError {
26 fn fmt(&self, fmt: &mut fmt::Formatter) -> ::core::fmt::Result {
27 match self {
28 CvtError::Convert(err) => fmt::Display::fmt(err, fmt),
29 CvtError::IllegalSequence { source, offset } => {
30 write!(fmt, "{source} at offset {offset}")
31 }
32 }
33 }
34}
35
36impl std::convert::From<Error> for CvtError {
37 fn from(err: Error) -> Self {
38 CvtError::Convert(err)
39 }
40}
41
42impl CvtError {
43 #[inline]
44 fn new(err: Error, bytes_read: usize) -> Self {
45 if err.kind::<ConvertError>() == Some(ConvertError::IllegalSequence) {
46 Self::IllegalSequence {
47 source: err,
48 offset: bytes_read,
49 }
50 } else {
51 err.into()
52 }
53 }
54}
55
56/// Converts a string from one character set to another.
57///
58/// Note that you should use g_iconv() for streaming conversions.
59/// Despite the fact that @bytes_read can return information about partial
60/// characters, the g_convert_... functions are not generally suitable
61/// for streaming. If the underlying converter maintains internal state,
62/// then this won't be preserved across successive calls to g_convert(),
63/// g_convert_with_iconv() or g_convert_with_fallback(). (An example of
64/// this is the GNU C converter for CP1255 which does not emit a base
65/// character until it knows that the next character is not a mark that
66/// could combine with the base character.)
67///
68/// Using extensions such as "//TRANSLIT" may not work (or may not work
69/// well) on many platforms. Consider using g_str_to_ascii() instead.
70/// ## `str`
71///
72/// the string to convert.
73/// ## `to_codeset`
74/// name of character set into which to convert @str
75/// ## `from_codeset`
76/// character set of @str.
77///
78/// # Returns
79///
80///
81/// If the conversion was successful, a newly allocated buffer
82/// containing the converted string, which must be freed with g_free().
83/// Otherwise [`None`] and @error will be set.
84///
85/// ## `bytes_read`
86/// location to store the number of bytes in
87/// the input string that were successfully converted, or [`None`].
88/// Even if the conversion was successful, this may be
89/// less than @len if there were partial characters
90/// at the end of the input. If the error
91/// [`ConvertError::IllegalSequence`][crate::ConvertError::IllegalSequence] occurs, the value
92/// stored will be the byte offset after the last valid
93/// input sequence.
94// rustdoc-stripper-ignore-next-stop
95/// Converts a string from one character set to another.
96///
97/// Note that you should use g_iconv() for streaming conversions.
98/// Despite the fact that @bytes_read can return information about partial
99/// characters, the g_convert_... functions are not generally suitable
100/// for streaming. If the underlying converter maintains internal state,
101/// then this won't be preserved across successive calls to g_convert(),
102/// g_convert_with_iconv() or g_convert_with_fallback(). (An example of
103/// this is the GNU C converter for CP1255 which does not emit a base
104/// character until it knows that the next character is not a mark that
105/// could combine with the base character.)
106///
107/// Using extensions such as "//TRANSLIT" may not work (or may not work
108/// well) on many platforms. Consider using g_str_to_ascii() instead.
109/// ## `str`
110///
111/// the string to convert.
112/// ## `to_codeset`
113/// name of character set into which to convert @str
114/// ## `from_codeset`
115/// character set of @str.
116///
117/// # Returns
118///
119///
120/// If the conversion was successful, a newly allocated buffer
121/// containing the converted string, which must be freed with g_free().
122/// Otherwise [`None`] and @error will be set.
123///
124/// ## `bytes_read`
125/// location to store the number of bytes in
126/// the input string that were successfully converted, or [`None`].
127/// Even if the conversion was successful, this may be
128/// less than @len if there were partial characters
129/// at the end of the input. If the error
130/// [`ConvertError::IllegalSequence`][crate::ConvertError::IllegalSequence] occurs, the value
131/// stored will be the byte offset after the last valid
132/// input sequence.
133#[doc(alias = "g_convert")]
134pub fn convert(
135 str_: &[u8],
136 to_codeset: impl IntoGStr,
137 from_codeset: impl IntoGStr,
138) -> Result<(Slice<u8>, usize), CvtError> {
139 assert!(str_.len() <= isize::MAX as usize);
140 let mut bytes_read = 0;
141 let mut bytes_written = 0;
142 let mut error = ptr::null_mut();
143 let result = to_codeset.run_with_gstr(|to_codeset| {
144 from_codeset.run_with_gstr(|from_codeset| unsafe {
145 ffi::g_convert(
146 str_.as_ptr(),
147 str_.len() as isize,
148 to_codeset.to_glib_none().0,
149 from_codeset.to_glib_none().0,
150 &mut bytes_read,
151 &mut bytes_written,
152 &mut error,
153 )
154 })
155 });
156 if result.is_null() {
157 Err(CvtError::new(unsafe { from_glib_full(error) }, bytes_read))
158 } else {
159 let slice = unsafe { Slice::from_glib_full_num(result, bytes_written as _) };
160 Ok((slice, bytes_read))
161 }
162}
163
164/// Converts a string from one character set to another, possibly
165/// including fallback sequences for characters not representable
166/// in the output. Note that it is not guaranteed that the specification
167/// for the fallback sequences in @fallback will be honored. Some
168/// systems may do an approximate conversion from @from_codeset
169/// to @to_codeset in their iconv() functions,
170/// in which case GLib will simply return that approximate conversion.
171///
172/// Note that you should use g_iconv() for streaming conversions.
173/// Despite the fact that @bytes_read can return information about partial
174/// characters, the g_convert_... functions are not generally suitable
175/// for streaming. If the underlying converter maintains internal state,
176/// then this won't be preserved across successive calls to g_convert(),
177/// g_convert_with_iconv() or g_convert_with_fallback(). (An example of
178/// this is the GNU C converter for CP1255 which does not emit a base
179/// character until it knows that the next character is not a mark that
180/// could combine with the base character.)
181/// ## `str`
182///
183/// the string to convert.
184/// ## `to_codeset`
185/// name of character set into which to convert @str
186/// ## `from_codeset`
187/// character set of @str.
188/// ## `fallback`
189/// UTF-8 string to use in place of characters not
190/// present in the target encoding. (The string must be
191/// representable in the target encoding).
192/// If [`None`], characters not in the target encoding will
193/// be represented as Unicode escapes \uxxxx or \Uxxxxyyyy.
194///
195/// # Returns
196///
197///
198/// If the conversion was successful, a newly allocated buffer
199/// containing the converted string, which must be freed with g_free().
200/// Otherwise [`None`] and @error will be set.
201///
202/// ## `bytes_read`
203/// location to store the number of bytes in
204/// the input string that were successfully converted, or [`None`].
205/// Even if the conversion was successful, this may be
206/// less than @len if there were partial characters
207/// at the end of the input.
208// rustdoc-stripper-ignore-next-stop
209/// Converts a string from one character set to another, possibly
210/// including fallback sequences for characters not representable
211/// in the output. Note that it is not guaranteed that the specification
212/// for the fallback sequences in @fallback will be honored. Some
213/// systems may do an approximate conversion from @from_codeset
214/// to @to_codeset in their iconv() functions,
215/// in which case GLib will simply return that approximate conversion.
216///
217/// Note that you should use g_iconv() for streaming conversions.
218/// Despite the fact that @bytes_read can return information about partial
219/// characters, the g_convert_... functions are not generally suitable
220/// for streaming. If the underlying converter maintains internal state,
221/// then this won't be preserved across successive calls to g_convert(),
222/// g_convert_with_iconv() or g_convert_with_fallback(). (An example of
223/// this is the GNU C converter for CP1255 which does not emit a base
224/// character until it knows that the next character is not a mark that
225/// could combine with the base character.)
226/// ## `str`
227///
228/// the string to convert.
229/// ## `to_codeset`
230/// name of character set into which to convert @str
231/// ## `from_codeset`
232/// character set of @str.
233/// ## `fallback`
234/// UTF-8 string to use in place of characters not
235/// present in the target encoding. (The string must be
236/// representable in the target encoding).
237/// If [`None`], characters not in the target encoding will
238/// be represented as Unicode escapes \uxxxx or \Uxxxxyyyy.
239///
240/// # Returns
241///
242///
243/// If the conversion was successful, a newly allocated buffer
244/// containing the converted string, which must be freed with g_free().
245/// Otherwise [`None`] and @error will be set.
246///
247/// ## `bytes_read`
248/// location to store the number of bytes in
249/// the input string that were successfully converted, or [`None`].
250/// Even if the conversion was successful, this may be
251/// less than @len if there were partial characters
252/// at the end of the input.
253#[doc(alias = "g_convert_with_fallback")]
254pub fn convert_with_fallback(
255 str_: &[u8],
256 to_codeset: impl IntoGStr,
257 from_codeset: impl IntoGStr,
258 fallback: Option<impl IntoGStr>,
259) -> Result<(Slice<u8>, usize), CvtError> {
260 assert!(str_.len() <= isize::MAX as usize);
261 let mut bytes_read = 0;
262 let mut bytes_written = 0;
263 let mut error = ptr::null_mut();
264 let result = to_codeset.run_with_gstr(|to_codeset| {
265 from_codeset.run_with_gstr(|from_codeset| {
266 fallback.run_with_gstr(|fallback| unsafe {
267 ffi::g_convert_with_fallback(
268 str_.as_ptr(),
269 str_.len() as isize,
270 to_codeset.to_glib_none().0,
271 from_codeset.to_glib_none().0,
272 fallback.to_glib_none().0,
273 &mut bytes_read,
274 &mut bytes_written,
275 &mut error,
276 )
277 })
278 })
279 });
280 if result.is_null() {
281 Err(CvtError::new(unsafe { from_glib_full(error) }, bytes_read))
282 } else {
283 let slice = unsafe { Slice::from_glib_full_num(result, bytes_written as _) };
284 Ok((slice, bytes_read))
285 }
286}
287
288// rustdoc-stripper-ignore-next
289/// A wrapper for [`std::io::Error`] that can hold an offset into an input string.
290#[derive(Debug)]
291pub enum IConvError {
292 Error(io::Error),
293 WithOffset { source: io::Error, offset: usize },
294}
295
296impl std::error::Error for IConvError {
297 fn source(&self) -> Option<&(dyn std::error::Error + 'static)> {
298 match self {
299 IConvError::Error(err) => std::error::Error::source(err),
300 IConvError::WithOffset { source, .. } => Some(source),
301 }
302 }
303}
304
305impl fmt::Display for IConvError {
306 fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
307 match self {
308 IConvError::Error(err) => fmt::Display::fmt(err, fmt),
309 IConvError::WithOffset { source, offset } => write!(fmt, "{source} at offset {offset}"),
310 }
311 }
312}
313
314impl std::convert::From<io::Error> for IConvError {
315 fn from(err: io::Error) -> Self {
316 IConvError::Error(err)
317 }
318}
319
320/// The GIConv struct wraps an iconv() conversion descriptor. It contains
321/// private data and should only be accessed using the following functions.
322#[derive(Debug)]
323#[repr(transparent)]
324#[doc(alias = "GIConv")]
325pub struct IConv(ffi::GIConv);
326
327unsafe impl Send for IConv {}
328
329impl IConv {
330 /// Same as the standard UNIX routine iconv_open(), but
331 /// may be implemented via libiconv on UNIX flavors that lack
332 /// a native implementation.
333 ///
334 /// GLib provides g_convert() and g_locale_to_utf8() which are likely
335 /// more convenient than the raw iconv wrappers.
336 /// ## `to_codeset`
337 /// destination codeset
338 /// ## `from_codeset`
339 /// source codeset
340 ///
341 /// # Returns
342 ///
343 /// a "conversion descriptor", or (GIConv)-1 if
344 /// opening the converter failed.
345 #[doc(alias = "g_iconv_open")]
346 #[allow(clippy::unnecessary_lazy_evaluations)]
347 pub fn new(to_codeset: impl IntoGStr, from_codeset: impl IntoGStr) -> Option<Self> {
348 let iconv = to_codeset.run_with_gstr(|to_codeset| {
349 from_codeset.run_with_gstr(|from_codeset| unsafe {
350 ffi::g_iconv_open(to_codeset.to_glib_none().0, from_codeset.to_glib_none().0)
351 })
352 });
353 (iconv as isize != -1).then(|| Self(iconv))
354 }
355 /// Converts a string from one character set to another.
356 ///
357 /// Note that you should use g_iconv() for streaming conversions.
358 /// Despite the fact that @bytes_read can return information about partial
359 /// characters, the g_convert_... functions are not generally suitable
360 /// for streaming. If the underlying converter maintains internal state,
361 /// then this won't be preserved across successive calls to g_convert(),
362 /// g_convert_with_iconv() or g_convert_with_fallback(). (An example of
363 /// this is the GNU C converter for CP1255 which does not emit a base
364 /// character until it knows that the next character is not a mark that
365 /// could combine with the base character.)
366 ///
367 /// Characters which are valid in the input character set, but which have no
368 /// representation in the output character set will result in a
369 /// [`ConvertError::IllegalSequence`][crate::ConvertError::IllegalSequence] error. This is in contrast to the iconv()
370 /// specification, which leaves this behaviour implementation defined. Note that
371 /// this is the same error code as is returned for an invalid byte sequence in
372 /// the input character set. To get defined behaviour for conversion of
373 /// unrepresentable characters, use g_convert_with_fallback().
374 /// ## `str`
375 ///
376 /// the string to convert.
377 /// ## `converter`
378 /// conversion descriptor from g_iconv_open()
379 ///
380 /// # Returns
381 ///
382 ///
383 /// If the conversion was successful, a newly allocated buffer
384 /// containing the converted string, which must be freed with
385 /// g_free(). Otherwise [`None`] and @error will be set.
386 ///
387 /// ## `bytes_read`
388 /// location to store the number of bytes in
389 /// the input string that were successfully converted, or [`None`].
390 /// Even if the conversion was successful, this may be
391 /// less than @len if there were partial characters
392 /// at the end of the input. If the error
393 /// [`ConvertError::IllegalSequence`][crate::ConvertError::IllegalSequence] occurs, the value
394 /// stored will be the byte offset after the last valid
395 /// input sequence.
396 // rustdoc-stripper-ignore-next-stop
397 /// Converts a string from one character set to another.
398 ///
399 /// Note that you should use g_iconv() for streaming conversions.
400 /// Despite the fact that @bytes_read can return information about partial
401 /// characters, the g_convert_... functions are not generally suitable
402 /// for streaming. If the underlying converter maintains internal state,
403 /// then this won't be preserved across successive calls to g_convert(),
404 /// g_convert_with_iconv() or g_convert_with_fallback(). (An example of
405 /// this is the GNU C converter for CP1255 which does not emit a base
406 /// character until it knows that the next character is not a mark that
407 /// could combine with the base character.)
408 ///
409 /// Characters which are valid in the input character set, but which have no
410 /// representation in the output character set will result in a
411 /// [`ConvertError::IllegalSequence`][crate::ConvertError::IllegalSequence] error. This is in contrast to the iconv()
412 /// specification, which leaves this behaviour implementation defined. Note that
413 /// this is the same error code as is returned for an invalid byte sequence in
414 /// the input character set. To get defined behaviour for conversion of
415 /// unrepresentable characters, use g_convert_with_fallback().
416 /// ## `str`
417 ///
418 /// the string to convert.
419 /// ## `converter`
420 /// conversion descriptor from g_iconv_open()
421 ///
422 /// # Returns
423 ///
424 ///
425 /// If the conversion was successful, a newly allocated buffer
426 /// containing the converted string, which must be freed with
427 /// g_free(). Otherwise [`None`] and @error will be set.
428 ///
429 /// ## `bytes_read`
430 /// location to store the number of bytes in
431 /// the input string that were successfully converted, or [`None`].
432 /// Even if the conversion was successful, this may be
433 /// less than @len if there were partial characters
434 /// at the end of the input. If the error
435 /// [`ConvertError::IllegalSequence`][crate::ConvertError::IllegalSequence] occurs, the value
436 /// stored will be the byte offset after the last valid
437 /// input sequence.
438 #[doc(alias = "g_convert_with_iconv")]
439 pub fn convert(&mut self, str_: &[u8]) -> Result<(Slice<u8>, usize), CvtError> {
440 assert!(str_.len() <= isize::MAX as usize);
441 let mut bytes_read = 0;
442 let mut bytes_written = 0;
443 let mut error = ptr::null_mut();
444 let result = unsafe {
445 ffi::g_convert_with_iconv(
446 str_.as_ptr(),
447 str_.len() as isize,
448 self.0,
449 &mut bytes_read,
450 &mut bytes_written,
451 &mut error,
452 )
453 };
454 if result.is_null() {
455 Err(CvtError::new(unsafe { from_glib_full(error) }, bytes_read))
456 } else {
457 let slice = unsafe { Slice::from_glib_full_num(result, bytes_written as _) };
458 Ok((slice, bytes_read))
459 }
460 }
461 /// Same as the standard UNIX routine iconv(), but
462 /// may be implemented via libiconv on UNIX flavors that lack
463 /// a native implementation.
464 ///
465 /// GLib provides g_convert() and g_locale_to_utf8() which are likely
466 /// more convenient than the raw iconv wrappers.
467 ///
468 /// Note that the behaviour of iconv() for characters which are valid in the
469 /// input character set, but which have no representation in the output character
470 /// set, is implementation defined. This function may return success (with a
471 /// positive number of non-reversible conversions as replacement characters were
472 /// used), or it may return -1 and set an error such as `EILSEQ`, in such a
473 /// situation.
474 ///
475 /// See [`iconv(3posix)`](man:iconv(3posix)) and [`iconv(3)`](man:iconv(3)) for more details about behavior when an
476 /// error occurs.
477 /// ## `converter`
478 /// conversion descriptor from g_iconv_open()
479 /// ## `inbuf`
480 /// bytes to convert
481 /// ## `inbytes_left`
482 /// inout parameter, bytes remaining to convert in @inbuf
483 /// ## `outbuf`
484 /// converted output bytes
485 /// ## `outbytes_left`
486 /// inout parameter, bytes available to fill in @outbuf
487 ///
488 /// # Returns
489 ///
490 /// count of non-reversible conversions, or -1 on error
491 // rustdoc-stripper-ignore-next-stop
492 /// Same as the standard UNIX routine iconv(), but
493 /// may be implemented via libiconv on UNIX flavors that lack
494 /// a native implementation.
495 ///
496 /// GLib provides g_convert() and g_locale_to_utf8() which are likely
497 /// more convenient than the raw iconv wrappers.
498 ///
499 /// Note that the behaviour of iconv() for characters which are valid in the
500 /// input character set, but which have no representation in the output character
501 /// set, is implementation defined. This function may return success (with a
502 /// positive number of non-reversible conversions as replacement characters were
503 /// used), or it may return -1 and set an error such as `EILSEQ`, in such a
504 /// situation.
505 ///
506 /// See [`iconv(3posix)`](man:iconv(3posix)) and [`iconv(3)`](man:iconv(3)) for more details about behavior when an
507 /// error occurs.
508 /// ## `converter`
509 /// conversion descriptor from g_iconv_open()
510 /// ## `inbuf`
511 /// bytes to convert
512 /// ## `inbytes_left`
513 /// inout parameter, bytes remaining to convert in @inbuf
514 /// ## `outbuf`
515 /// converted output bytes
516 /// ## `outbytes_left`
517 /// inout parameter, bytes available to fill in @outbuf
518 ///
519 /// # Returns
520 ///
521 /// count of non-reversible conversions, or -1 on error
522 #[doc(alias = "g_iconv")]
523 pub fn iconv(
524 &mut self,
525 inbuf: Option<&[u8]>,
526 outbuf: Option<&mut [std::mem::MaybeUninit<u8>]>,
527 ) -> Result<(usize, usize, usize), IConvError> {
528 let input_len = inbuf.as_ref().map(|b| b.len()).unwrap_or_default();
529 let mut inbytes_left = input_len;
530 let mut outbytes_left = outbuf.as_ref().map(|b| b.len()).unwrap_or_default();
531 let mut inbuf = inbuf
532 .map(|b| mut_override(b.as_ptr()) as *mut c_char)
533 .unwrap_or_else(ptr::null_mut);
534 let mut outbuf = outbuf
535 .map(|b| b.as_mut_ptr() as *mut c_char)
536 .unwrap_or_else(ptr::null_mut);
537 let conversions = unsafe {
538 ffi::g_iconv(
539 self.0,
540 &mut inbuf,
541 &mut inbytes_left,
542 &mut outbuf,
543 &mut outbytes_left,
544 )
545 };
546 if conversions as isize == -1 {
547 let err = io::Error::last_os_error();
548 let code = err.raw_os_error().unwrap();
549 if code == libc::EILSEQ || code == libc::EINVAL {
550 Err(IConvError::WithOffset {
551 source: err,
552 offset: input_len - inbytes_left,
553 })
554 } else {
555 Err(err.into())
556 }
557 } else {
558 Ok((conversions, inbytes_left, outbytes_left))
559 }
560 }
561}
562
563impl Drop for IConv {
564 #[inline]
565 fn drop(&mut self) {
566 unsafe {
567 ffi::g_iconv_close(self.0);
568 }
569 }
570}
571
572/// Determines the preferred character sets used for filenames.
573/// The first character set from the @charsets is the filename encoding, the
574/// subsequent character sets are used when trying to generate a displayable
575/// representation of a filename, see g_filename_display_name().
576///
577/// On Unix, the character sets are determined by consulting the
578/// environment variables `G_FILENAME_ENCODING` and `G_BROKEN_FILENAMES`.
579/// On Windows, the character set used in the GLib API is always UTF-8
580/// and said environment variables have no effect.
581///
582/// `G_FILENAME_ENCODING` may be set to a comma-separated list of
583/// character set names. The special token `@locale` is taken to mean the
584/// character set for the [current locale](running.html#locale).
585/// If `G_FILENAME_ENCODING` is not set, but `G_BROKEN_FILENAMES` is,
586/// the character set of the current locale is taken as the filename
587/// encoding. If neither environment variable is set, UTF-8 is taken
588/// as the filename encoding, but the character set of the current locale
589/// is also put in the list of encodings.
590///
591/// The returned @charsets belong to GLib and must not be freed.
592///
593/// Note that on Unix, regardless of the locale character set or
594/// `G_FILENAME_ENCODING` value, the actual file names present
595/// on a system might be in any random encoding or just gibberish.
596///
597/// # Returns
598///
599/// [`true`] if the filename encoding is UTF-8.
600///
601/// ## `filename_charsets`
602///
603/// return location for the [`None`]-terminated list of encoding names
604// rustdoc-stripper-ignore-next-stop
605/// Determines the preferred character sets used for filenames.
606/// The first character set from the @charsets is the filename encoding, the
607/// subsequent character sets are used when trying to generate a displayable
608/// representation of a filename, see g_filename_display_name().
609///
610/// On Unix, the character sets are determined by consulting the
611/// environment variables `G_FILENAME_ENCODING` and `G_BROKEN_FILENAMES`.
612/// On Windows, the character set used in the GLib API is always UTF-8
613/// and said environment variables have no effect.
614///
615/// `G_FILENAME_ENCODING` may be set to a comma-separated list of
616/// character set names. The special token `@locale` is taken to mean the
617/// character set for the [current locale](running.html#locale).
618/// If `G_FILENAME_ENCODING` is not set, but `G_BROKEN_FILENAMES` is,
619/// the character set of the current locale is taken as the filename
620/// encoding. If neither environment variable is set, UTF-8 is taken
621/// as the filename encoding, but the character set of the current locale
622/// is also put in the list of encodings.
623///
624/// The returned @charsets belong to GLib and must not be freed.
625///
626/// Note that on Unix, regardless of the locale character set or
627/// `G_FILENAME_ENCODING` value, the actual file names present
628/// on a system might be in any random encoding or just gibberish.
629///
630/// # Returns
631///
632/// [`true`] if the filename encoding is UTF-8.
633///
634/// ## `filename_charsets`
635///
636/// return location for the [`None`]-terminated list of encoding names
637#[doc(alias = "g_get_filename_charsets")]
638#[doc(alias = "get_filename_charsets")]
639pub fn filename_charsets() -> (bool, Vec<GString>) {
640 let mut filename_charsets = ptr::null_mut();
641 unsafe {
642 let is_utf8 = ffi::g_get_filename_charsets(&mut filename_charsets);
643 (
644 from_glib(is_utf8),
645 FromGlibPtrContainer::from_glib_none(filename_charsets),
646 )
647 }
648}
649
650/// Converts a string from UTF-8 to the encoding GLib uses for
651/// filenames. Note that on Windows GLib uses UTF-8 for filenames;
652/// on other platforms, this function indirectly depends on the
653/// [current locale](running.html#locale).
654///
655/// The input string shall not contain nul characters even if the @len
656/// argument is positive. A nul character found inside the string will result
657/// in error [`ConvertError::IllegalSequence`][crate::ConvertError::IllegalSequence]. If the filename encoding is
658/// not UTF-8 and the conversion output contains a nul character, the error
659/// [`ConvertError::EmbeddedNul`][crate::ConvertError::EmbeddedNul] is set and the function returns [`None`].
660/// ## `utf8string`
661/// a UTF-8 encoded string.
662/// ## `len`
663/// the length of the string, or -1 if the string is
664/// nul-terminated.
665///
666/// # Returns
667///
668///
669/// The converted string, or [`None`] on an error.
670///
671/// ## `bytes_read`
672/// location to store the number of bytes in
673/// the input string that were successfully converted, or [`None`].
674/// Even if the conversion was successful, this may be
675/// less than @len if there were partial characters
676/// at the end of the input. If the error
677/// [`ConvertError::IllegalSequence`][crate::ConvertError::IllegalSequence] occurs, the value
678/// stored will be the byte offset after the last valid
679/// input sequence.
680///
681/// ## `bytes_written`
682/// the number of bytes stored in
683/// the output buffer (not including the terminating nul).
684// rustdoc-stripper-ignore-next-stop
685/// Converts a string from UTF-8 to the encoding GLib uses for
686/// filenames. Note that on Windows GLib uses UTF-8 for filenames;
687/// on other platforms, this function indirectly depends on the
688/// [current locale](running.html#locale).
689///
690/// The input string shall not contain nul characters even if the @len
691/// argument is positive. A nul character found inside the string will result
692/// in error [`ConvertError::IllegalSequence`][crate::ConvertError::IllegalSequence]. If the filename encoding is
693/// not UTF-8 and the conversion output contains a nul character, the error
694/// [`ConvertError::EmbeddedNul`][crate::ConvertError::EmbeddedNul] is set and the function returns [`None`].
695/// ## `utf8string`
696/// a UTF-8 encoded string.
697/// ## `len`
698/// the length of the string, or -1 if the string is
699/// nul-terminated.
700///
701/// # Returns
702///
703///
704/// The converted string, or [`None`] on an error.
705///
706/// ## `bytes_read`
707/// location to store the number of bytes in
708/// the input string that were successfully converted, or [`None`].
709/// Even if the conversion was successful, this may be
710/// less than @len if there were partial characters
711/// at the end of the input. If the error
712/// [`ConvertError::IllegalSequence`][crate::ConvertError::IllegalSequence] occurs, the value
713/// stored will be the byte offset after the last valid
714/// input sequence.
715///
716/// ## `bytes_written`
717/// the number of bytes stored in
718/// the output buffer (not including the terminating nul).
719#[doc(alias = "g_filename_from_utf8")]
720pub fn filename_from_utf8(utf8string: impl IntoGStr) -> Result<(PathBuf, usize), CvtError> {
721 let mut bytes_read = 0;
722 let mut bytes_written = std::mem::MaybeUninit::uninit();
723 let mut error = ptr::null_mut();
724 let ret = utf8string.run_with_gstr(|utf8string| {
725 assert!(utf8string.len() <= isize::MAX as usize);
726 let len = utf8string.len() as isize;
727 unsafe {
728 ffi::g_filename_from_utf8(
729 utf8string.to_glib_none().0,
730 len,
731 &mut bytes_read,
732 bytes_written.as_mut_ptr(),
733 &mut error,
734 )
735 }
736 });
737 if error.is_null() {
738 Ok(unsafe {
739 (
740 PathBuf::from_glib_full_num(ret, bytes_written.assume_init()),
741 bytes_read,
742 )
743 })
744 } else {
745 Err(unsafe { CvtError::new(from_glib_full(error), bytes_read) })
746 }
747}
748
749/// Converts a string which is in the encoding used by GLib for
750/// filenames into a UTF-8 string. Note that on Windows GLib uses UTF-8
751/// for filenames; on other platforms, this function indirectly depends on
752/// the [current locale](running.html#locale).
753///
754/// The input string shall not contain nul characters even if the @len
755/// argument is positive. A nul character found inside the string will result
756/// in error [`ConvertError::IllegalSequence`][crate::ConvertError::IllegalSequence].
757/// If the source encoding is not UTF-8 and the conversion output contains a
758/// nul character, the error [`ConvertError::EmbeddedNul`][crate::ConvertError::EmbeddedNul] is set and the
759/// function returns [`None`]. Use g_convert() to produce output that
760/// may contain embedded nul characters.
761/// ## `opsysstring`
762/// a string in the encoding for filenames
763/// ## `len`
764/// the length of the string, or -1 if the string is
765/// nul-terminated (Note that some encodings may allow nul
766/// bytes to occur inside strings. In that case, using -1
767/// for the @len parameter is unsafe)
768///
769/// # Returns
770///
771/// The converted string, or [`None`] on an error.
772///
773/// ## `bytes_read`
774/// location to store the number of bytes in the
775/// input string that were successfully converted, or [`None`].
776/// Even if the conversion was successful, this may be
777/// less than @len if there were partial characters
778/// at the end of the input. If the error
779/// [`ConvertError::IllegalSequence`][crate::ConvertError::IllegalSequence] occurs, the value
780/// stored will be the byte offset after the last valid
781/// input sequence.
782///
783/// ## `bytes_written`
784/// the number of bytes stored in the output
785/// buffer (not including the terminating nul).
786// rustdoc-stripper-ignore-next-stop
787/// Converts a string which is in the encoding used by GLib for
788/// filenames into a UTF-8 string. Note that on Windows GLib uses UTF-8
789/// for filenames; on other platforms, this function indirectly depends on
790/// the [current locale](running.html#locale).
791///
792/// The input string shall not contain nul characters even if the @len
793/// argument is positive. A nul character found inside the string will result
794/// in error [`ConvertError::IllegalSequence`][crate::ConvertError::IllegalSequence].
795/// If the source encoding is not UTF-8 and the conversion output contains a
796/// nul character, the error [`ConvertError::EmbeddedNul`][crate::ConvertError::EmbeddedNul] is set and the
797/// function returns [`None`]. Use g_convert() to produce output that
798/// may contain embedded nul characters.
799/// ## `opsysstring`
800/// a string in the encoding for filenames
801/// ## `len`
802/// the length of the string, or -1 if the string is
803/// nul-terminated (Note that some encodings may allow nul
804/// bytes to occur inside strings. In that case, using -1
805/// for the @len parameter is unsafe)
806///
807/// # Returns
808///
809/// The converted string, or [`None`] on an error.
810///
811/// ## `bytes_read`
812/// location to store the number of bytes in the
813/// input string that were successfully converted, or [`None`].
814/// Even if the conversion was successful, this may be
815/// less than @len if there were partial characters
816/// at the end of the input. If the error
817/// [`ConvertError::IllegalSequence`][crate::ConvertError::IllegalSequence] occurs, the value
818/// stored will be the byte offset after the last valid
819/// input sequence.
820///
821/// ## `bytes_written`
822/// the number of bytes stored in the output
823/// buffer (not including the terminating nul).
824#[doc(alias = "g_filename_to_utf8")]
825pub fn filename_to_utf8(
826 opsysstring: impl AsRef<std::path::Path>,
827) -> Result<(crate::GString, usize), CvtError> {
828 let path = opsysstring.as_ref().to_glib_none();
829 let mut bytes_read = 0;
830 let mut bytes_written = std::mem::MaybeUninit::uninit();
831 let mut error = ptr::null_mut();
832 let ret = unsafe {
833 ffi::g_filename_to_utf8(
834 path.0,
835 path.1.as_bytes().len() as isize,
836 &mut bytes_read,
837 bytes_written.as_mut_ptr(),
838 &mut error,
839 )
840 };
841 if error.is_null() {
842 Ok(unsafe {
843 (
844 GString::from_glib_full_num(ret, bytes_written.assume_init()),
845 bytes_read,
846 )
847 })
848 } else {
849 Err(unsafe { CvtError::new(from_glib_full(error), bytes_read) })
850 }
851}
852
853/// Converts a string from UTF-8 to the encoding used for strings by
854/// the C runtime (usually the same as that used by the operating
855/// system) in the [current locale](running.html#locale).
856/// On Windows this means the system codepage.
857///
858/// The input string shall not contain nul characters even if the @len
859/// argument is positive. A nul character found inside the string will result
860/// in error [`ConvertError::IllegalSequence`][crate::ConvertError::IllegalSequence]. Use g_convert() to convert
861/// input that may contain embedded nul characters.
862/// ## `utf8string`
863/// a UTF-8 encoded string
864/// ## `len`
865/// the length of the string, or -1 if the string is
866/// nul-terminated.
867///
868/// # Returns
869///
870///
871/// A newly-allocated buffer containing the converted string,
872/// or [`None`] on an error, and error will be set.
873///
874/// ## `bytes_read`
875/// location to store the number of bytes in the
876/// input string that were successfully converted, or [`None`].
877/// Even if the conversion was successful, this may be
878/// less than @len if there were partial characters
879/// at the end of the input. If the error
880/// [`ConvertError::IllegalSequence`][crate::ConvertError::IllegalSequence] occurs, the value
881/// stored will be the byte offset after the last valid
882/// input sequence.
883// rustdoc-stripper-ignore-next-stop
884/// Converts a string from UTF-8 to the encoding used for strings by
885/// the C runtime (usually the same as that used by the operating
886/// system) in the [current locale](running.html#locale).
887/// On Windows this means the system codepage.
888///
889/// The input string shall not contain nul characters even if the @len
890/// argument is positive. A nul character found inside the string will result
891/// in error [`ConvertError::IllegalSequence`][crate::ConvertError::IllegalSequence]. Use g_convert() to convert
892/// input that may contain embedded nul characters.
893/// ## `utf8string`
894/// a UTF-8 encoded string
895/// ## `len`
896/// the length of the string, or -1 if the string is
897/// nul-terminated.
898///
899/// # Returns
900///
901///
902/// A newly-allocated buffer containing the converted string,
903/// or [`None`] on an error, and error will be set.
904///
905/// ## `bytes_read`
906/// location to store the number of bytes in the
907/// input string that were successfully converted, or [`None`].
908/// Even if the conversion was successful, this may be
909/// less than @len if there were partial characters
910/// at the end of the input. If the error
911/// [`ConvertError::IllegalSequence`][crate::ConvertError::IllegalSequence] occurs, the value
912/// stored will be the byte offset after the last valid
913/// input sequence.
914#[doc(alias = "g_locale_from_utf8")]
915pub fn locale_from_utf8(utf8string: impl IntoGStr) -> Result<(Slice<u8>, usize), CvtError> {
916 let mut bytes_read = 0;
917 let mut bytes_written = std::mem::MaybeUninit::uninit();
918 let mut error = ptr::null_mut();
919 let ret = utf8string.run_with_gstr(|utf8string| {
920 assert!(utf8string.len() <= isize::MAX as usize);
921 unsafe {
922 ffi::g_locale_from_utf8(
923 utf8string.as_ptr(),
924 utf8string.len() as isize,
925 &mut bytes_read,
926 bytes_written.as_mut_ptr(),
927 &mut error,
928 )
929 }
930 });
931 if error.is_null() {
932 Ok(unsafe {
933 (
934 Slice::from_glib_full_num(ret, bytes_written.assume_init() + 1),
935 bytes_read,
936 )
937 })
938 } else {
939 Err(unsafe { CvtError::new(from_glib_full(error), bytes_read) })
940 }
941}
942
943/// Converts a string which is in the encoding used for strings by
944/// the C runtime (usually the same as that used by the operating
945/// system) in the [current locale](running.html#locale) into a UTF-8 string.
946///
947/// If the source encoding is not UTF-8 and the conversion output contains a
948/// nul character, the error [`ConvertError::EmbeddedNul`][crate::ConvertError::EmbeddedNul] is set and the
949/// function returns [`None`].
950/// If the source encoding is UTF-8, an embedded nul character is treated with
951/// the [`ConvertError::IllegalSequence`][crate::ConvertError::IllegalSequence] error for backward compatibility with
952/// earlier versions of this library. Use g_convert() to produce output that
953/// may contain embedded nul characters.
954/// ## `opsysstring`
955/// a string in the
956/// encoding of the current locale. On Windows
957/// this means the system codepage.
958///
959/// # Returns
960///
961/// The converted string, or [`None`] on an error.
962///
963/// ## `bytes_read`
964/// location to store the number of bytes in the
965/// input string that were successfully converted, or [`None`].
966/// Even if the conversion was successful, this may be
967/// less than @len if there were partial characters
968/// at the end of the input. If the error
969/// [`ConvertError::IllegalSequence`][crate::ConvertError::IllegalSequence] occurs, the value
970/// stored will be the byte offset after the last valid
971/// input sequence.
972///
973/// ## `bytes_written`
974/// the number of bytes stored in the output
975/// buffer (not including the terminating nul).
976// rustdoc-stripper-ignore-next-stop
977/// Converts a string which is in the encoding used for strings by
978/// the C runtime (usually the same as that used by the operating
979/// system) in the [current locale](running.html#locale) into a UTF-8 string.
980///
981/// If the source encoding is not UTF-8 and the conversion output contains a
982/// nul character, the error [`ConvertError::EmbeddedNul`][crate::ConvertError::EmbeddedNul] is set and the
983/// function returns [`None`].
984/// If the source encoding is UTF-8, an embedded nul character is treated with
985/// the [`ConvertError::IllegalSequence`][crate::ConvertError::IllegalSequence] error for backward compatibility with
986/// earlier versions of this library. Use g_convert() to produce output that
987/// may contain embedded nul characters.
988/// ## `opsysstring`
989/// a string in the
990/// encoding of the current locale. On Windows
991/// this means the system codepage.
992///
993/// # Returns
994///
995/// The converted string, or [`None`] on an error.
996///
997/// ## `bytes_read`
998/// location to store the number of bytes in the
999/// input string that were successfully converted, or [`None`].
1000/// Even if the conversion was successful, this may be
1001/// less than @len if there were partial characters
1002/// at the end of the input. If the error
1003/// [`ConvertError::IllegalSequence`][crate::ConvertError::IllegalSequence] occurs, the value
1004/// stored will be the byte offset after the last valid
1005/// input sequence.
1006///
1007/// ## `bytes_written`
1008/// the number of bytes stored in the output
1009/// buffer (not including the terminating nul).
1010#[doc(alias = "g_locale_to_utf8")]
1011pub fn locale_to_utf8(opsysstring: &[u8]) -> Result<(crate::GString, usize), CvtError> {
1012 let len = opsysstring.len() as isize;
1013 let mut bytes_read = 0;
1014 let mut bytes_written = std::mem::MaybeUninit::uninit();
1015 let mut error = ptr::null_mut();
1016 let ret = unsafe {
1017 ffi::g_locale_to_utf8(
1018 opsysstring.to_glib_none().0,
1019 len,
1020 &mut bytes_read,
1021 bytes_written.as_mut_ptr(),
1022 &mut error,
1023 )
1024 };
1025 if error.is_null() {
1026 Ok(unsafe {
1027 (
1028 GString::from_glib_full_num(ret, bytes_written.assume_init()),
1029 bytes_read,
1030 )
1031 })
1032 } else {
1033 Err(unsafe { CvtError::new(from_glib_full(error), bytes_read) })
1034 }
1035}
1036
1037#[doc(alias = "g_utf8_to_ucs4")]
1038#[doc(alias = "g_utf8_to_ucs4_fast")]
1039#[doc(alias = "utf8_to_ucs4")]
1040pub fn utf8_to_utf32(str: impl AsRef<str>) -> Slice<char> {
1041 unsafe {
1042 let mut items_written = 0;
1043
1044 let str_as_utf32 = ffi::g_utf8_to_ucs4_fast(
1045 str.as_ref().as_ptr().cast::<c_char>(),
1046 str.as_ref().len() as _,
1047 &mut items_written,
1048 );
1049
1050 // NOTE: We assume that u32 and char have the same layout and trust that glib won't give us
1051 // invalid UTF-32 codepoints
1052 Slice::from_glib_full_num(str_as_utf32, items_written as usize)
1053 }
1054}
1055
1056#[doc(alias = "g_ucs4_to_utf8")]
1057#[doc(alias = "ucs4_to_utf8")]
1058pub fn utf32_to_utf8(str: impl AsRef<[char]>) -> GString {
1059 let mut items_read = 0;
1060 let mut items_written = 0;
1061 let mut error = ptr::null_mut();
1062
1063 unsafe {
1064 let str_as_utf8 = ffi::g_ucs4_to_utf8(
1065 str.as_ref().as_ptr().cast::<u32>(),
1066 str.as_ref().len() as _,
1067 &mut items_read,
1068 &mut items_written,
1069 &mut error,
1070 );
1071
1072 debug_assert!(
1073 error.is_null(),
1074 "Rust `char` should always be convertible to UTF-8"
1075 );
1076
1077 GString::from_glib_full_num(str_as_utf8, items_written as usize)
1078 }
1079}
1080
1081#[doc(alias = "g_utf8_casefold")]
1082#[doc(alias = "utf8_casefold")]
1083pub fn casefold(str: impl AsRef<str>) -> GString {
1084 unsafe {
1085 let str = ffi::g_utf8_casefold(str.as_ref().as_ptr().cast(), str.as_ref().len() as isize);
1086
1087 from_glib_full(str)
1088 }
1089}
1090
1091#[doc(alias = "g_utf8_normalize")]
1092#[doc(alias = "utf8_normalize")]
1093pub fn normalize(str: impl AsRef<str>, mode: NormalizeMode) -> GString {
1094 unsafe {
1095 let str = ffi::g_utf8_normalize(
1096 str.as_ref().as_ptr().cast(),
1097 str.as_ref().len() as isize,
1098 mode.into_glib(),
1099 );
1100
1101 from_glib_full(str)
1102 }
1103}
1104
1105#[cfg(test)]
1106mod tests {
1107 #[test]
1108 fn convert_ascii() {
1109 assert!(super::convert(b"Hello", "utf-8", "ascii").is_ok());
1110 assert!(super::convert(b"He\xaallo", "utf-8", "ascii").is_err());
1111 assert_eq!(
1112 super::convert_with_fallback(b"H\xc3\xa9llo", "ascii", "utf-8", crate::NONE_STR)
1113 .unwrap()
1114 .0
1115 .as_slice(),
1116 b"H\\u00e9llo"
1117 );
1118 assert_eq!(
1119 super::convert_with_fallback(b"H\xc3\xa9llo", "ascii", "utf-8", Some("_"))
1120 .unwrap()
1121 .0
1122 .as_slice(),
1123 b"H_llo"
1124 );
1125 }
1126 #[test]
1127 fn iconv() {
1128 let mut conv = super::IConv::new("utf-8", "ascii").unwrap();
1129 assert!(conv.convert(b"Hello").is_ok());
1130 assert!(conv.convert(b"He\xaallo").is_err());
1131 assert!(super::IConv::new("utf-8", "badcharset123456789").is_none());
1132 }
1133 #[test]
1134 fn filename_charsets() {
1135 let _ = super::filename_charsets();
1136 }
1137
1138 #[test]
1139 fn utf8_and_utf32() {
1140 let utf32 = ['A', 'b', '🤔'];
1141 let utf8 = super::utf32_to_utf8(utf32);
1142 assert_eq!(utf8, "Ab🤔");
1143
1144 let utf8 = "🤔 ț";
1145 let utf32 = super::utf8_to_utf32(utf8);
1146 assert_eq!(utf32.as_slice(), &['🤔', ' ', 'ț']);
1147 }
1148}