glib/convert.rs
1// Take a look at the license at the top of the repository in the LICENSE file.
2
3use std::{fmt, io, os::raw::c_char, path::PathBuf, ptr};
4
5use crate::{ffi, translate::*, ConvertError, Error, GString, NormalizeMode, Slice};
6
7// rustdoc-stripper-ignore-next
8/// A wrapper for [`ConvertError`](crate::ConvertError) that can hold an offset into the input
9/// string.
10#[derive(Debug)]
11pub enum CvtError {
12 Convert(Error),
13 IllegalSequence { source: Error, offset: usize },
14}
15
16impl std::error::Error for CvtError {
17 fn source(&self) -> ::core::option::Option<&(dyn std::error::Error + 'static)> {
18 match self {
19 CvtError::Convert(err) => std::error::Error::source(err),
20 CvtError::IllegalSequence { source, .. } => Some(source),
21 }
22 }
23}
24
25impl fmt::Display for CvtError {
26 fn fmt(&self, fmt: &mut fmt::Formatter) -> ::core::fmt::Result {
27 match self {
28 CvtError::Convert(err) => fmt::Display::fmt(err, fmt),
29 CvtError::IllegalSequence { source, offset } => {
30 write!(fmt, "{source} at offset {offset}")
31 }
32 }
33 }
34}
35
36impl std::convert::From<Error> for CvtError {
37 fn from(err: Error) -> Self {
38 CvtError::Convert(err)
39 }
40}
41
42impl CvtError {
43 #[inline]
44 fn new(err: Error, bytes_read: usize) -> Self {
45 if err.kind::<ConvertError>() == Some(ConvertError::IllegalSequence) {
46 Self::IllegalSequence {
47 source: err,
48 offset: bytes_read,
49 }
50 } else {
51 err.into()
52 }
53 }
54}
55
56/// Converts a string from one character set to another.
57///
58/// Note that you should use g_iconv() for streaming conversions.
59/// Despite the fact that @bytes_read can return information about partial
60/// characters, the g_convert_... functions are not generally suitable
61/// for streaming. If the underlying converter maintains internal state,
62/// then this won't be preserved across successive calls to g_convert(),
63/// g_convert_with_iconv() or g_convert_with_fallback(). (An example of
64/// this is the GNU C converter for CP1255 which does not emit a base
65/// character until it knows that the next character is not a mark that
66/// could combine with the base character.)
67///
68/// Using extensions such as "//TRANSLIT" may not work (or may not work
69/// well) on many platforms. Consider using g_str_to_ascii() instead.
70/// ## `str`
71///
72/// the string to convert.
73/// ## `to_codeset`
74/// name of character set into which to convert @str
75/// ## `from_codeset`
76/// character set of @str.
77///
78/// # Returns
79///
80///
81/// If the conversion was successful, a newly allocated buffer
82/// containing the converted string, which must be freed with g_free().
83/// Otherwise [`None`] and @error will be set.
84///
85/// ## `bytes_read`
86/// location to store the number of bytes in
87/// the input string that were successfully converted, or [`None`].
88/// Even if the conversion was successful, this may be
89/// less than @len if there were partial characters
90/// at the end of the input. If the error
91/// [`ConvertError::IllegalSequence`][crate::ConvertError::IllegalSequence] occurs, the value
92/// stored will be the byte offset after the last valid
93/// input sequence.
94// rustdoc-stripper-ignore-next-stop
95/// Converts a string from one character set to another.
96///
97/// Note that you should use g_iconv() for streaming conversions.
98/// Despite the fact that @bytes_read can return information about partial
99/// characters, the g_convert_... functions are not generally suitable
100/// for streaming. If the underlying converter maintains internal state,
101/// then this won't be preserved across successive calls to g_convert(),
102/// g_convert_with_iconv() or g_convert_with_fallback(). (An example of
103/// this is the GNU C converter for CP1255 which does not emit a base
104/// character until it knows that the next character is not a mark that
105/// could combine with the base character.)
106///
107/// Using extensions such as "//TRANSLIT" may not work (or may not work
108/// well) on many platforms. Consider using g_str_to_ascii() instead.
109/// ## `str`
110///
111/// the string to convert.
112/// ## `to_codeset`
113/// name of character set into which to convert @str
114/// ## `from_codeset`
115/// character set of @str.
116///
117/// # Returns
118///
119///
120/// If the conversion was successful, a newly allocated buffer
121/// containing the converted string, which must be freed with g_free().
122/// Otherwise [`None`] and @error will be set.
123///
124/// ## `bytes_read`
125/// location to store the number of bytes in
126/// the input string that were successfully converted, or [`None`].
127/// Even if the conversion was successful, this may be
128/// less than @len if there were partial characters
129/// at the end of the input. If the error
130/// [`ConvertError::IllegalSequence`][crate::ConvertError::IllegalSequence] occurs, the value
131/// stored will be the byte offset after the last valid
132/// input sequence.
133#[doc(alias = "g_convert")]
134pub fn convert(
135 str_: &[u8],
136 to_codeset: impl IntoGStr,
137 from_codeset: impl IntoGStr,
138) -> Result<(Slice<u8>, usize), CvtError> {
139 assert!(str_.len() <= isize::MAX as usize);
140 let mut bytes_read = 0;
141 let mut bytes_written = 0;
142 let mut error = ptr::null_mut();
143 let result = to_codeset.run_with_gstr(|to_codeset| {
144 from_codeset.run_with_gstr(|from_codeset| unsafe {
145 ffi::g_convert(
146 str_.as_ptr(),
147 str_.len() as isize,
148 to_codeset.to_glib_none().0,
149 from_codeset.to_glib_none().0,
150 &mut bytes_read,
151 &mut bytes_written,
152 &mut error,
153 )
154 })
155 });
156 if result.is_null() {
157 Err(CvtError::new(unsafe { from_glib_full(error) }, bytes_read))
158 } else {
159 let slice = unsafe { Slice::from_glib_full_num(result, bytes_written as _) };
160 Ok((slice, bytes_read))
161 }
162}
163
164/// Converts a string from one character set to another, possibly
165/// including fallback sequences for characters not representable
166/// in the output. Note that it is not guaranteed that the specification
167/// for the fallback sequences in @fallback will be honored. Some
168/// systems may do an approximate conversion from @from_codeset
169/// to @to_codeset in their iconv() functions,
170/// in which case GLib will simply return that approximate conversion.
171///
172/// Note that you should use g_iconv() for streaming conversions.
173/// Despite the fact that @bytes_read can return information about partial
174/// characters, the g_convert_... functions are not generally suitable
175/// for streaming. If the underlying converter maintains internal state,
176/// then this won't be preserved across successive calls to g_convert(),
177/// g_convert_with_iconv() or g_convert_with_fallback(). (An example of
178/// this is the GNU C converter for CP1255 which does not emit a base
179/// character until it knows that the next character is not a mark that
180/// could combine with the base character.)
181/// ## `str`
182///
183/// the string to convert.
184/// ## `to_codeset`
185/// name of character set into which to convert @str
186/// ## `from_codeset`
187/// character set of @str.
188/// ## `fallback`
189/// UTF-8 string to use in place of characters not
190/// present in the target encoding. (The string must be
191/// representable in the target encoding).
192/// If [`None`], characters not in the target encoding will
193/// be represented as Unicode escapes \uxxxx or \Uxxxxyyyy.
194///
195/// # Returns
196///
197///
198/// If the conversion was successful, a newly allocated buffer
199/// containing the converted string, which must be freed with g_free().
200/// Otherwise [`None`] and @error will be set.
201///
202/// ## `bytes_read`
203/// location to store the number of bytes in
204/// the input string that were successfully converted, or [`None`].
205/// Even if the conversion was successful, this may be
206/// less than @len if there were partial characters
207/// at the end of the input.
208// rustdoc-stripper-ignore-next-stop
209/// Converts a string from one character set to another, possibly
210/// including fallback sequences for characters not representable
211/// in the output. Note that it is not guaranteed that the specification
212/// for the fallback sequences in @fallback will be honored. Some
213/// systems may do an approximate conversion from @from_codeset
214/// to @to_codeset in their iconv() functions,
215/// in which case GLib will simply return that approximate conversion.
216///
217/// Note that you should use g_iconv() for streaming conversions.
218/// Despite the fact that @bytes_read can return information about partial
219/// characters, the g_convert_... functions are not generally suitable
220/// for streaming. If the underlying converter maintains internal state,
221/// then this won't be preserved across successive calls to g_convert(),
222/// g_convert_with_iconv() or g_convert_with_fallback(). (An example of
223/// this is the GNU C converter for CP1255 which does not emit a base
224/// character until it knows that the next character is not a mark that
225/// could combine with the base character.)
226/// ## `str`
227///
228/// the string to convert.
229/// ## `to_codeset`
230/// name of character set into which to convert @str
231/// ## `from_codeset`
232/// character set of @str.
233/// ## `fallback`
234/// UTF-8 string to use in place of characters not
235/// present in the target encoding. (The string must be
236/// representable in the target encoding).
237/// If [`None`], characters not in the target encoding will
238/// be represented as Unicode escapes \uxxxx or \Uxxxxyyyy.
239///
240/// # Returns
241///
242///
243/// If the conversion was successful, a newly allocated buffer
244/// containing the converted string, which must be freed with g_free().
245/// Otherwise [`None`] and @error will be set.
246///
247/// ## `bytes_read`
248/// location to store the number of bytes in
249/// the input string that were successfully converted, or [`None`].
250/// Even if the conversion was successful, this may be
251/// less than @len if there were partial characters
252/// at the end of the input.
253#[doc(alias = "g_convert_with_fallback")]
254pub fn convert_with_fallback(
255 str_: &[u8],
256 to_codeset: impl IntoGStr,
257 from_codeset: impl IntoGStr,
258 fallback: Option<impl IntoGStr>,
259) -> Result<(Slice<u8>, usize), CvtError> {
260 assert!(str_.len() <= isize::MAX as usize);
261 let mut bytes_read = 0;
262 let mut bytes_written = 0;
263 let mut error = ptr::null_mut();
264 let result = to_codeset.run_with_gstr(|to_codeset| {
265 from_codeset.run_with_gstr(|from_codeset| {
266 fallback.run_with_gstr(|fallback| unsafe {
267 ffi::g_convert_with_fallback(
268 str_.as_ptr(),
269 str_.len() as isize,
270 to_codeset.to_glib_none().0,
271 from_codeset.to_glib_none().0,
272 fallback.to_glib_none().0,
273 &mut bytes_read,
274 &mut bytes_written,
275 &mut error,
276 )
277 })
278 })
279 });
280 if result.is_null() {
281 Err(CvtError::new(unsafe { from_glib_full(error) }, bytes_read))
282 } else {
283 let slice = unsafe { Slice::from_glib_full_num(result, bytes_written as _) };
284 Ok((slice, bytes_read))
285 }
286}
287
288// rustdoc-stripper-ignore-next
289/// A wrapper for [`std::io::Error`] that can hold an offset into an input string.
290#[derive(Debug)]
291pub enum IConvError {
292 Error(io::Error),
293 WithOffset { source: io::Error, offset: usize },
294}
295
296impl std::error::Error for IConvError {
297 fn source(&self) -> Option<&(dyn std::error::Error + 'static)> {
298 match self {
299 IConvError::Error(err) => std::error::Error::source(err),
300 IConvError::WithOffset { source, .. } => Some(source),
301 }
302 }
303}
304
305impl fmt::Display for IConvError {
306 fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
307 match self {
308 IConvError::Error(err) => fmt::Display::fmt(err, fmt),
309 IConvError::WithOffset { source, offset } => write!(fmt, "{source} at offset {offset}"),
310 }
311 }
312}
313
314impl std::convert::From<io::Error> for IConvError {
315 fn from(err: io::Error) -> Self {
316 IConvError::Error(err)
317 }
318}
319
320/// The GIConv struct wraps an iconv() conversion descriptor. It contains
321/// private data and should only be accessed using the following functions.
322// rustdoc-stripper-ignore-next-stop
323/// The GIConv struct wraps an iconv() conversion descriptor. It contains
324/// private data and should only be accessed using the following functions.
325#[derive(Debug)]
326#[repr(transparent)]
327#[doc(alias = "GIConv")]
328pub struct IConv(ffi::GIConv);
329
330unsafe impl Send for IConv {}
331
332impl IConv {
333 /// Same as the standard UNIX routine iconv_open(), but
334 /// may be implemented via libiconv on UNIX flavors that lack
335 /// a native implementation.
336 ///
337 /// GLib provides g_convert() and g_locale_to_utf8() which are likely
338 /// more convenient than the raw iconv wrappers.
339 /// ## `to_codeset`
340 /// destination codeset
341 /// ## `from_codeset`
342 /// source codeset
343 ///
344 /// # Returns
345 ///
346 /// a "conversion descriptor", or (GIConv)-1 if
347 /// opening the converter failed.
348 // rustdoc-stripper-ignore-next-stop
349 /// Same as the standard UNIX routine iconv_open(), but
350 /// may be implemented via libiconv on UNIX flavors that lack
351 /// a native implementation.
352 ///
353 /// GLib provides g_convert() and g_locale_to_utf8() which are likely
354 /// more convenient than the raw iconv wrappers.
355 /// ## `to_codeset`
356 /// destination codeset
357 /// ## `from_codeset`
358 /// source codeset
359 ///
360 /// # Returns
361 ///
362 /// a "conversion descriptor", or (GIConv)-1 if
363 /// opening the converter failed.
364 #[doc(alias = "g_iconv_open")]
365 #[allow(clippy::unnecessary_lazy_evaluations)]
366 pub fn new(to_codeset: impl IntoGStr, from_codeset: impl IntoGStr) -> Option<Self> {
367 let iconv = to_codeset.run_with_gstr(|to_codeset| {
368 from_codeset.run_with_gstr(|from_codeset| unsafe {
369 ffi::g_iconv_open(to_codeset.to_glib_none().0, from_codeset.to_glib_none().0)
370 })
371 });
372 (iconv as isize != -1).then(|| Self(iconv))
373 }
374 /// Converts a string from one character set to another.
375 ///
376 /// Note that you should use g_iconv() for streaming conversions.
377 /// Despite the fact that @bytes_read can return information about partial
378 /// characters, the g_convert_... functions are not generally suitable
379 /// for streaming. If the underlying converter maintains internal state,
380 /// then this won't be preserved across successive calls to g_convert(),
381 /// g_convert_with_iconv() or g_convert_with_fallback(). (An example of
382 /// this is the GNU C converter for CP1255 which does not emit a base
383 /// character until it knows that the next character is not a mark that
384 /// could combine with the base character.)
385 ///
386 /// Characters which are valid in the input character set, but which have no
387 /// representation in the output character set will result in a
388 /// [`ConvertError::IllegalSequence`][crate::ConvertError::IllegalSequence] error. This is in contrast to the iconv()
389 /// specification, which leaves this behaviour implementation defined. Note that
390 /// this is the same error code as is returned for an invalid byte sequence in
391 /// the input character set. To get defined behaviour for conversion of
392 /// unrepresentable characters, use g_convert_with_fallback().
393 /// ## `str`
394 ///
395 /// the string to convert.
396 /// ## `converter`
397 /// conversion descriptor from g_iconv_open()
398 ///
399 /// # Returns
400 ///
401 ///
402 /// If the conversion was successful, a newly allocated buffer
403 /// containing the converted string, which must be freed with
404 /// g_free(). Otherwise [`None`] and @error will be set.
405 ///
406 /// ## `bytes_read`
407 /// location to store the number of bytes in
408 /// the input string that were successfully converted, or [`None`].
409 /// Even if the conversion was successful, this may be
410 /// less than @len if there were partial characters
411 /// at the end of the input. If the error
412 /// [`ConvertError::IllegalSequence`][crate::ConvertError::IllegalSequence] occurs, the value
413 /// stored will be the byte offset after the last valid
414 /// input sequence.
415 // rustdoc-stripper-ignore-next-stop
416 /// Converts a string from one character set to another.
417 ///
418 /// Note that you should use g_iconv() for streaming conversions.
419 /// Despite the fact that @bytes_read can return information about partial
420 /// characters, the g_convert_... functions are not generally suitable
421 /// for streaming. If the underlying converter maintains internal state,
422 /// then this won't be preserved across successive calls to g_convert(),
423 /// g_convert_with_iconv() or g_convert_with_fallback(). (An example of
424 /// this is the GNU C converter for CP1255 which does not emit a base
425 /// character until it knows that the next character is not a mark that
426 /// could combine with the base character.)
427 ///
428 /// Characters which are valid in the input character set, but which have no
429 /// representation in the output character set will result in a
430 /// [`ConvertError::IllegalSequence`][crate::ConvertError::IllegalSequence] error. This is in contrast to the iconv()
431 /// specification, which leaves this behaviour implementation defined. Note that
432 /// this is the same error code as is returned for an invalid byte sequence in
433 /// the input character set. To get defined behaviour for conversion of
434 /// unrepresentable characters, use g_convert_with_fallback().
435 /// ## `str`
436 ///
437 /// the string to convert.
438 /// ## `converter`
439 /// conversion descriptor from g_iconv_open()
440 ///
441 /// # Returns
442 ///
443 ///
444 /// If the conversion was successful, a newly allocated buffer
445 /// containing the converted string, which must be freed with
446 /// g_free(). Otherwise [`None`] and @error will be set.
447 ///
448 /// ## `bytes_read`
449 /// location to store the number of bytes in
450 /// the input string that were successfully converted, or [`None`].
451 /// Even if the conversion was successful, this may be
452 /// less than @len if there were partial characters
453 /// at the end of the input. If the error
454 /// [`ConvertError::IllegalSequence`][crate::ConvertError::IllegalSequence] occurs, the value
455 /// stored will be the byte offset after the last valid
456 /// input sequence.
457 #[doc(alias = "g_convert_with_iconv")]
458 pub fn convert(&mut self, str_: &[u8]) -> Result<(Slice<u8>, usize), CvtError> {
459 assert!(str_.len() <= isize::MAX as usize);
460 let mut bytes_read = 0;
461 let mut bytes_written = 0;
462 let mut error = ptr::null_mut();
463 let result = unsafe {
464 ffi::g_convert_with_iconv(
465 str_.as_ptr(),
466 str_.len() as isize,
467 self.0,
468 &mut bytes_read,
469 &mut bytes_written,
470 &mut error,
471 )
472 };
473 if result.is_null() {
474 Err(CvtError::new(unsafe { from_glib_full(error) }, bytes_read))
475 } else {
476 let slice = unsafe { Slice::from_glib_full_num(result, bytes_written as _) };
477 Ok((slice, bytes_read))
478 }
479 }
480 /// Same as the standard UNIX routine iconv(), but
481 /// may be implemented via libiconv on UNIX flavors that lack
482 /// a native implementation.
483 ///
484 /// GLib provides g_convert() and g_locale_to_utf8() which are likely
485 /// more convenient than the raw iconv wrappers.
486 ///
487 /// Note that the behaviour of iconv() for characters which are valid in the
488 /// input character set, but which have no representation in the output character
489 /// set, is implementation defined. This function may return success (with a
490 /// positive number of non-reversible conversions as replacement characters were
491 /// used), or it may return -1 and set an error such as `EILSEQ`, in such a
492 /// situation.
493 ///
494 /// See [`iconv(3posix)`](man:iconv(3posix)) and [`iconv(3)`](man:iconv(3)) for more details about behavior when an
495 /// error occurs.
496 /// ## `converter`
497 /// conversion descriptor from g_iconv_open()
498 /// ## `inbuf`
499 /// bytes to convert
500 /// ## `inbytes_left`
501 /// inout parameter, bytes remaining to convert in @inbuf
502 /// ## `outbuf`
503 /// converted output bytes
504 /// ## `outbytes_left`
505 /// inout parameter, bytes available to fill in @outbuf
506 ///
507 /// # Returns
508 ///
509 /// count of non-reversible conversions, or -1 on error
510 // rustdoc-stripper-ignore-next-stop
511 /// Same as the standard UNIX routine iconv(), but
512 /// may be implemented via libiconv on UNIX flavors that lack
513 /// a native implementation.
514 ///
515 /// GLib provides g_convert() and g_locale_to_utf8() which are likely
516 /// more convenient than the raw iconv wrappers.
517 ///
518 /// Note that the behaviour of iconv() for characters which are valid in the
519 /// input character set, but which have no representation in the output character
520 /// set, is implementation defined. This function may return success (with a
521 /// positive number of non-reversible conversions as replacement characters were
522 /// used), or it may return -1 and set an error such as `EILSEQ`, in such a
523 /// situation.
524 ///
525 /// See [`iconv(3posix)`](man:iconv(3posix)) and [`iconv(3)`](man:iconv(3)) for more details about behavior when an
526 /// error occurs.
527 /// ## `converter`
528 /// conversion descriptor from g_iconv_open()
529 /// ## `inbuf`
530 /// bytes to convert
531 /// ## `inbytes_left`
532 /// inout parameter, bytes remaining to convert in @inbuf
533 /// ## `outbuf`
534 /// converted output bytes
535 /// ## `outbytes_left`
536 /// inout parameter, bytes available to fill in @outbuf
537 ///
538 /// # Returns
539 ///
540 /// count of non-reversible conversions, or -1 on error
541 #[doc(alias = "g_iconv")]
542 pub fn iconv(
543 &mut self,
544 inbuf: Option<&[u8]>,
545 outbuf: Option<&mut [std::mem::MaybeUninit<u8>]>,
546 ) -> Result<(usize, usize, usize), IConvError> {
547 let input_len = inbuf.as_ref().map(|b| b.len()).unwrap_or_default();
548 let mut inbytes_left = input_len;
549 let mut outbytes_left = outbuf.as_ref().map(|b| b.len()).unwrap_or_default();
550 let mut inbuf = inbuf
551 .map(|b| mut_override(b.as_ptr()) as *mut c_char)
552 .unwrap_or_else(ptr::null_mut);
553 let mut outbuf = outbuf
554 .map(|b| b.as_mut_ptr() as *mut c_char)
555 .unwrap_or_else(ptr::null_mut);
556 let conversions = unsafe {
557 ffi::g_iconv(
558 self.0,
559 &mut inbuf,
560 &mut inbytes_left,
561 &mut outbuf,
562 &mut outbytes_left,
563 )
564 };
565 if conversions as isize == -1 {
566 let err = io::Error::last_os_error();
567 let code = err.raw_os_error().unwrap();
568 if code == libc::EILSEQ || code == libc::EINVAL {
569 Err(IConvError::WithOffset {
570 source: err,
571 offset: input_len - inbytes_left,
572 })
573 } else {
574 Err(err.into())
575 }
576 } else {
577 Ok((conversions, inbytes_left, outbytes_left))
578 }
579 }
580}
581
582impl Drop for IConv {
583 #[inline]
584 fn drop(&mut self) {
585 unsafe {
586 ffi::g_iconv_close(self.0);
587 }
588 }
589}
590
591/// Determines the preferred character sets used for filenames.
592/// The first character set from the @charsets is the filename encoding, the
593/// subsequent character sets are used when trying to generate a displayable
594/// representation of a filename, see g_filename_display_name().
595///
596/// On Unix, the character sets are determined by consulting the
597/// environment variables `G_FILENAME_ENCODING` and `G_BROKEN_FILENAMES`.
598/// On Windows, the character set used in the GLib API is always UTF-8
599/// and said environment variables have no effect.
600///
601/// `G_FILENAME_ENCODING` may be set to a comma-separated list of
602/// character set names. The special token `@locale` is taken to mean the
603/// character set for the [current locale](running.html#locale).
604/// If `G_FILENAME_ENCODING` is not set, but `G_BROKEN_FILENAMES` is,
605/// the character set of the current locale is taken as the filename
606/// encoding. If neither environment variable is set, UTF-8 is taken
607/// as the filename encoding, but the character set of the current locale
608/// is also put in the list of encodings.
609///
610/// The returned @charsets belong to GLib and must not be freed.
611///
612/// Note that on Unix, regardless of the locale character set or
613/// `G_FILENAME_ENCODING` value, the actual file names present
614/// on a system might be in any random encoding or just gibberish.
615///
616/// # Returns
617///
618/// [`true`] if the filename encoding is UTF-8.
619///
620/// ## `filename_charsets`
621///
622/// return location for the [`None`]-terminated list of encoding names
623// rustdoc-stripper-ignore-next-stop
624/// Determines the preferred character sets used for filenames.
625/// The first character set from the @charsets is the filename encoding, the
626/// subsequent character sets are used when trying to generate a displayable
627/// representation of a filename, see g_filename_display_name().
628///
629/// On Unix, the character sets are determined by consulting the
630/// environment variables `G_FILENAME_ENCODING` and `G_BROKEN_FILENAMES`.
631/// On Windows, the character set used in the GLib API is always UTF-8
632/// and said environment variables have no effect.
633///
634/// `G_FILENAME_ENCODING` may be set to a comma-separated list of
635/// character set names. The special token `@locale` is taken to mean the
636/// character set for the [current locale](running.html#locale).
637/// If `G_FILENAME_ENCODING` is not set, but `G_BROKEN_FILENAMES` is,
638/// the character set of the current locale is taken as the filename
639/// encoding. If neither environment variable is set, UTF-8 is taken
640/// as the filename encoding, but the character set of the current locale
641/// is also put in the list of encodings.
642///
643/// The returned @charsets belong to GLib and must not be freed.
644///
645/// Note that on Unix, regardless of the locale character set or
646/// `G_FILENAME_ENCODING` value, the actual file names present
647/// on a system might be in any random encoding or just gibberish.
648///
649/// # Returns
650///
651/// [`true`] if the filename encoding is UTF-8.
652///
653/// ## `filename_charsets`
654///
655/// return location for the [`None`]-terminated list of encoding names
656#[doc(alias = "g_get_filename_charsets")]
657#[doc(alias = "get_filename_charsets")]
658pub fn filename_charsets() -> (bool, Vec<GString>) {
659 let mut filename_charsets = ptr::null_mut();
660 unsafe {
661 let is_utf8 = ffi::g_get_filename_charsets(&mut filename_charsets);
662 (
663 from_glib(is_utf8),
664 FromGlibPtrContainer::from_glib_none(filename_charsets),
665 )
666 }
667}
668
669/// Converts a string from UTF-8 to the encoding GLib uses for
670/// filenames. Note that on Windows GLib uses UTF-8 for filenames;
671/// on other platforms, this function indirectly depends on the
672/// [current locale](running.html#locale).
673///
674/// The input string shall not contain nul characters even if the @len
675/// argument is positive. A nul character found inside the string will result
676/// in error [`ConvertError::IllegalSequence`][crate::ConvertError::IllegalSequence]. If the filename encoding is
677/// not UTF-8 and the conversion output contains a nul character, the error
678/// [`ConvertError::EmbeddedNul`][crate::ConvertError::EmbeddedNul] is set and the function returns [`None`].
679/// ## `utf8string`
680/// a UTF-8 encoded string.
681/// ## `len`
682/// the length of the string, or -1 if the string is
683/// nul-terminated.
684///
685/// # Returns
686///
687///
688/// The converted string, or [`None`] on an error.
689///
690/// ## `bytes_read`
691/// location to store the number of bytes in
692/// the input string that were successfully converted, or [`None`].
693/// Even if the conversion was successful, this may be
694/// less than @len if there were partial characters
695/// at the end of the input. If the error
696/// [`ConvertError::IllegalSequence`][crate::ConvertError::IllegalSequence] occurs, the value
697/// stored will be the byte offset after the last valid
698/// input sequence.
699///
700/// ## `bytes_written`
701/// the number of bytes stored in
702/// the output buffer (not including the terminating nul).
703// rustdoc-stripper-ignore-next-stop
704/// Converts a string from UTF-8 to the encoding GLib uses for
705/// filenames. Note that on Windows GLib uses UTF-8 for filenames;
706/// on other platforms, this function indirectly depends on the
707/// [current locale](running.html#locale).
708///
709/// The input string shall not contain nul characters even if the @len
710/// argument is positive. A nul character found inside the string will result
711/// in error [`ConvertError::IllegalSequence`][crate::ConvertError::IllegalSequence]. If the filename encoding is
712/// not UTF-8 and the conversion output contains a nul character, the error
713/// [`ConvertError::EmbeddedNul`][crate::ConvertError::EmbeddedNul] is set and the function returns [`None`].
714/// ## `utf8string`
715/// a UTF-8 encoded string.
716/// ## `len`
717/// the length of the string, or -1 if the string is
718/// nul-terminated.
719///
720/// # Returns
721///
722///
723/// The converted string, or [`None`] on an error.
724///
725/// ## `bytes_read`
726/// location to store the number of bytes in
727/// the input string that were successfully converted, or [`None`].
728/// Even if the conversion was successful, this may be
729/// less than @len if there were partial characters
730/// at the end of the input. If the error
731/// [`ConvertError::IllegalSequence`][crate::ConvertError::IllegalSequence] occurs, the value
732/// stored will be the byte offset after the last valid
733/// input sequence.
734///
735/// ## `bytes_written`
736/// the number of bytes stored in
737/// the output buffer (not including the terminating nul).
738#[doc(alias = "g_filename_from_utf8")]
739pub fn filename_from_utf8(utf8string: impl IntoGStr) -> Result<(PathBuf, usize), CvtError> {
740 let mut bytes_read = 0;
741 let mut bytes_written = std::mem::MaybeUninit::uninit();
742 let mut error = ptr::null_mut();
743 let ret = utf8string.run_with_gstr(|utf8string| {
744 assert!(utf8string.len() <= isize::MAX as usize);
745 let len = utf8string.len() as isize;
746 unsafe {
747 ffi::g_filename_from_utf8(
748 utf8string.to_glib_none().0,
749 len,
750 &mut bytes_read,
751 bytes_written.as_mut_ptr(),
752 &mut error,
753 )
754 }
755 });
756 if error.is_null() {
757 Ok(unsafe {
758 (
759 PathBuf::from_glib_full_num(ret, bytes_written.assume_init()),
760 bytes_read,
761 )
762 })
763 } else {
764 Err(unsafe { CvtError::new(from_glib_full(error), bytes_read) })
765 }
766}
767
768/// Converts a string which is in the encoding used by GLib for
769/// filenames into a UTF-8 string. Note that on Windows GLib uses UTF-8
770/// for filenames; on other platforms, this function indirectly depends on
771/// the [current locale](running.html#locale).
772///
773/// The input string shall not contain nul characters even if the @len
774/// argument is positive. A nul character found inside the string will result
775/// in error [`ConvertError::IllegalSequence`][crate::ConvertError::IllegalSequence].
776/// If the source encoding is not UTF-8 and the conversion output contains a
777/// nul character, the error [`ConvertError::EmbeddedNul`][crate::ConvertError::EmbeddedNul] is set and the
778/// function returns [`None`]. Use g_convert() to produce output that
779/// may contain embedded nul characters.
780/// ## `opsysstring`
781/// a string in the encoding for filenames
782/// ## `len`
783/// the length of the string, or -1 if the string is
784/// nul-terminated (Note that some encodings may allow nul
785/// bytes to occur inside strings. In that case, using -1
786/// for the @len parameter is unsafe)
787///
788/// # Returns
789///
790/// The converted string, or [`None`] on an error.
791///
792/// ## `bytes_read`
793/// location to store the number of bytes in the
794/// input string that were successfully converted, or [`None`].
795/// Even if the conversion was successful, this may be
796/// less than @len if there were partial characters
797/// at the end of the input. If the error
798/// [`ConvertError::IllegalSequence`][crate::ConvertError::IllegalSequence] occurs, the value
799/// stored will be the byte offset after the last valid
800/// input sequence.
801///
802/// ## `bytes_written`
803/// the number of bytes stored in the output
804/// buffer (not including the terminating nul).
805// rustdoc-stripper-ignore-next-stop
806/// Converts a string which is in the encoding used by GLib for
807/// filenames into a UTF-8 string. Note that on Windows GLib uses UTF-8
808/// for filenames; on other platforms, this function indirectly depends on
809/// the [current locale](running.html#locale).
810///
811/// The input string shall not contain nul characters even if the @len
812/// argument is positive. A nul character found inside the string will result
813/// in error [`ConvertError::IllegalSequence`][crate::ConvertError::IllegalSequence].
814/// If the source encoding is not UTF-8 and the conversion output contains a
815/// nul character, the error [`ConvertError::EmbeddedNul`][crate::ConvertError::EmbeddedNul] is set and the
816/// function returns [`None`]. Use g_convert() to produce output that
817/// may contain embedded nul characters.
818/// ## `opsysstring`
819/// a string in the encoding for filenames
820/// ## `len`
821/// the length of the string, or -1 if the string is
822/// nul-terminated (Note that some encodings may allow nul
823/// bytes to occur inside strings. In that case, using -1
824/// for the @len parameter is unsafe)
825///
826/// # Returns
827///
828/// The converted string, or [`None`] on an error.
829///
830/// ## `bytes_read`
831/// location to store the number of bytes in the
832/// input string that were successfully converted, or [`None`].
833/// Even if the conversion was successful, this may be
834/// less than @len if there were partial characters
835/// at the end of the input. If the error
836/// [`ConvertError::IllegalSequence`][crate::ConvertError::IllegalSequence] occurs, the value
837/// stored will be the byte offset after the last valid
838/// input sequence.
839///
840/// ## `bytes_written`
841/// the number of bytes stored in the output
842/// buffer (not including the terminating nul).
843#[doc(alias = "g_filename_to_utf8")]
844pub fn filename_to_utf8(
845 opsysstring: impl AsRef<std::path::Path>,
846) -> Result<(crate::GString, usize), CvtError> {
847 let path = opsysstring.as_ref().to_glib_none();
848 let mut bytes_read = 0;
849 let mut bytes_written = std::mem::MaybeUninit::uninit();
850 let mut error = ptr::null_mut();
851 let ret = unsafe {
852 ffi::g_filename_to_utf8(
853 path.0,
854 path.1.as_bytes().len() as isize,
855 &mut bytes_read,
856 bytes_written.as_mut_ptr(),
857 &mut error,
858 )
859 };
860 if error.is_null() {
861 Ok(unsafe {
862 (
863 GString::from_glib_full_num(ret, bytes_written.assume_init()),
864 bytes_read,
865 )
866 })
867 } else {
868 Err(unsafe { CvtError::new(from_glib_full(error), bytes_read) })
869 }
870}
871
872/// Converts a string from UTF-8 to the encoding used for strings by
873/// the C runtime (usually the same as that used by the operating
874/// system) in the [current locale](running.html#locale).
875/// On Windows this means the system codepage.
876///
877/// The input string shall not contain nul characters even if the @len
878/// argument is positive. A nul character found inside the string will result
879/// in error [`ConvertError::IllegalSequence`][crate::ConvertError::IllegalSequence]. Use g_convert() to convert
880/// input that may contain embedded nul characters.
881/// ## `utf8string`
882/// a UTF-8 encoded string
883/// ## `len`
884/// the length of the string, or -1 if the string is
885/// nul-terminated.
886///
887/// # Returns
888///
889///
890/// A newly-allocated buffer containing the converted string,
891/// or [`None`] on an error, and error will be set.
892///
893/// ## `bytes_read`
894/// location to store the number of bytes in the
895/// input string that were successfully converted, or [`None`].
896/// Even if the conversion was successful, this may be
897/// less than @len if there were partial characters
898/// at the end of the input. If the error
899/// [`ConvertError::IllegalSequence`][crate::ConvertError::IllegalSequence] occurs, the value
900/// stored will be the byte offset after the last valid
901/// input sequence.
902// rustdoc-stripper-ignore-next-stop
903/// Converts a string from UTF-8 to the encoding used for strings by
904/// the C runtime (usually the same as that used by the operating
905/// system) in the [current locale](running.html#locale).
906/// On Windows this means the system codepage.
907///
908/// The input string shall not contain nul characters even if the @len
909/// argument is positive. A nul character found inside the string will result
910/// in error [`ConvertError::IllegalSequence`][crate::ConvertError::IllegalSequence]. Use g_convert() to convert
911/// input that may contain embedded nul characters.
912/// ## `utf8string`
913/// a UTF-8 encoded string
914/// ## `len`
915/// the length of the string, or -1 if the string is
916/// nul-terminated.
917///
918/// # Returns
919///
920///
921/// A newly-allocated buffer containing the converted string,
922/// or [`None`] on an error, and error will be set.
923///
924/// ## `bytes_read`
925/// location to store the number of bytes in the
926/// input string that were successfully converted, or [`None`].
927/// Even if the conversion was successful, this may be
928/// less than @len if there were partial characters
929/// at the end of the input. If the error
930/// [`ConvertError::IllegalSequence`][crate::ConvertError::IllegalSequence] occurs, the value
931/// stored will be the byte offset after the last valid
932/// input sequence.
933#[doc(alias = "g_locale_from_utf8")]
934pub fn locale_from_utf8(utf8string: impl IntoGStr) -> Result<(Slice<u8>, usize), CvtError> {
935 let mut bytes_read = 0;
936 let mut bytes_written = std::mem::MaybeUninit::uninit();
937 let mut error = ptr::null_mut();
938 let ret = utf8string.run_with_gstr(|utf8string| {
939 assert!(utf8string.len() <= isize::MAX as usize);
940 unsafe {
941 ffi::g_locale_from_utf8(
942 utf8string.as_ptr(),
943 utf8string.len() as isize,
944 &mut bytes_read,
945 bytes_written.as_mut_ptr(),
946 &mut error,
947 )
948 }
949 });
950 if error.is_null() {
951 Ok(unsafe {
952 (
953 Slice::from_glib_full_num(ret, bytes_written.assume_init() + 1),
954 bytes_read,
955 )
956 })
957 } else {
958 Err(unsafe { CvtError::new(from_glib_full(error), bytes_read) })
959 }
960}
961
962/// Converts a string which is in the encoding used for strings by
963/// the C runtime (usually the same as that used by the operating
964/// system) in the [current locale](running.html#locale) into a UTF-8 string.
965///
966/// If the source encoding is not UTF-8 and the conversion output contains a
967/// nul character, the error [`ConvertError::EmbeddedNul`][crate::ConvertError::EmbeddedNul] is set and the
968/// function returns [`None`].
969/// If the source encoding is UTF-8, an embedded nul character is treated with
970/// the [`ConvertError::IllegalSequence`][crate::ConvertError::IllegalSequence] error for backward compatibility with
971/// earlier versions of this library. Use g_convert() to produce output that
972/// may contain embedded nul characters.
973/// ## `opsysstring`
974/// a string in the
975/// encoding of the current locale. On Windows
976/// this means the system codepage.
977///
978/// # Returns
979///
980/// The converted string, or [`None`] on an error.
981///
982/// ## `bytes_read`
983/// location to store the number of bytes in the
984/// input string that were successfully converted, or [`None`].
985/// Even if the conversion was successful, this may be
986/// less than @len if there were partial characters
987/// at the end of the input. If the error
988/// [`ConvertError::IllegalSequence`][crate::ConvertError::IllegalSequence] occurs, the value
989/// stored will be the byte offset after the last valid
990/// input sequence.
991///
992/// ## `bytes_written`
993/// the number of bytes stored in the output
994/// buffer (not including the terminating nul).
995// rustdoc-stripper-ignore-next-stop
996/// Converts a string which is in the encoding used for strings by
997/// the C runtime (usually the same as that used by the operating
998/// system) in the [current locale](running.html#locale) into a UTF-8 string.
999///
1000/// If the source encoding is not UTF-8 and the conversion output contains a
1001/// nul character, the error [`ConvertError::EmbeddedNul`][crate::ConvertError::EmbeddedNul] is set and the
1002/// function returns [`None`].
1003/// If the source encoding is UTF-8, an embedded nul character is treated with
1004/// the [`ConvertError::IllegalSequence`][crate::ConvertError::IllegalSequence] error for backward compatibility with
1005/// earlier versions of this library. Use g_convert() to produce output that
1006/// may contain embedded nul characters.
1007/// ## `opsysstring`
1008/// a string in the
1009/// encoding of the current locale. On Windows
1010/// this means the system codepage.
1011///
1012/// # Returns
1013///
1014/// The converted string, or [`None`] on an error.
1015///
1016/// ## `bytes_read`
1017/// location to store the number of bytes in the
1018/// input string that were successfully converted, or [`None`].
1019/// Even if the conversion was successful, this may be
1020/// less than @len if there were partial characters
1021/// at the end of the input. If the error
1022/// [`ConvertError::IllegalSequence`][crate::ConvertError::IllegalSequence] occurs, the value
1023/// stored will be the byte offset after the last valid
1024/// input sequence.
1025///
1026/// ## `bytes_written`
1027/// the number of bytes stored in the output
1028/// buffer (not including the terminating nul).
1029#[doc(alias = "g_locale_to_utf8")]
1030pub fn locale_to_utf8(opsysstring: &[u8]) -> Result<(crate::GString, usize), CvtError> {
1031 let len = opsysstring.len() as isize;
1032 let mut bytes_read = 0;
1033 let mut bytes_written = std::mem::MaybeUninit::uninit();
1034 let mut error = ptr::null_mut();
1035 let ret = unsafe {
1036 ffi::g_locale_to_utf8(
1037 opsysstring.to_glib_none().0,
1038 len,
1039 &mut bytes_read,
1040 bytes_written.as_mut_ptr(),
1041 &mut error,
1042 )
1043 };
1044 if error.is_null() {
1045 Ok(unsafe {
1046 (
1047 GString::from_glib_full_num(ret, bytes_written.assume_init()),
1048 bytes_read,
1049 )
1050 })
1051 } else {
1052 Err(unsafe { CvtError::new(from_glib_full(error), bytes_read) })
1053 }
1054}
1055
1056#[doc(alias = "g_utf8_to_ucs4")]
1057#[doc(alias = "g_utf8_to_ucs4_fast")]
1058#[doc(alias = "utf8_to_ucs4")]
1059pub fn utf8_to_utf32(str: impl AsRef<str>) -> Slice<char> {
1060 unsafe {
1061 let mut items_written = 0;
1062
1063 let str_as_utf32 = ffi::g_utf8_to_ucs4_fast(
1064 str.as_ref().as_ptr().cast::<c_char>(),
1065 str.as_ref().len() as _,
1066 &mut items_written,
1067 );
1068
1069 // NOTE: We assume that u32 and char have the same layout and trust that glib won't give us
1070 // invalid UTF-32 codepoints
1071 Slice::from_glib_full_num(str_as_utf32, items_written as usize)
1072 }
1073}
1074
1075#[doc(alias = "g_ucs4_to_utf8")]
1076#[doc(alias = "ucs4_to_utf8")]
1077pub fn utf32_to_utf8(str: impl AsRef<[char]>) -> GString {
1078 let mut items_read = 0;
1079 let mut items_written = 0;
1080 let mut error = ptr::null_mut();
1081
1082 unsafe {
1083 let str_as_utf8 = ffi::g_ucs4_to_utf8(
1084 str.as_ref().as_ptr().cast::<u32>(),
1085 str.as_ref().len() as _,
1086 &mut items_read,
1087 &mut items_written,
1088 &mut error,
1089 );
1090
1091 debug_assert!(
1092 error.is_null(),
1093 "Rust `char` should always be convertible to UTF-8"
1094 );
1095
1096 GString::from_glib_full_num(str_as_utf8, items_written as usize)
1097 }
1098}
1099
1100#[doc(alias = "g_utf8_casefold")]
1101#[doc(alias = "utf8_casefold")]
1102pub fn casefold(str: impl AsRef<str>) -> GString {
1103 unsafe {
1104 let str = ffi::g_utf8_casefold(str.as_ref().as_ptr().cast(), str.as_ref().len() as isize);
1105
1106 from_glib_full(str)
1107 }
1108}
1109
1110#[doc(alias = "g_utf8_normalize")]
1111#[doc(alias = "utf8_normalize")]
1112pub fn normalize(str: impl AsRef<str>, mode: NormalizeMode) -> GString {
1113 unsafe {
1114 let str = ffi::g_utf8_normalize(
1115 str.as_ref().as_ptr().cast(),
1116 str.as_ref().len() as isize,
1117 mode.into_glib(),
1118 );
1119
1120 from_glib_full(str)
1121 }
1122}
1123
1124#[cfg(test)]
1125mod tests {
1126 #[test]
1127 fn convert_ascii() {
1128 assert!(super::convert(b"Hello", "utf-8", "ascii").is_ok());
1129 assert!(super::convert(b"He\xaallo", "utf-8", "ascii").is_err());
1130 assert_eq!(
1131 super::convert_with_fallback(b"H\xc3\xa9llo", "ascii", "utf-8", crate::NONE_STR)
1132 .unwrap()
1133 .0
1134 .as_slice(),
1135 b"H\\u00e9llo"
1136 );
1137 assert_eq!(
1138 super::convert_with_fallback(b"H\xc3\xa9llo", "ascii", "utf-8", Some("_"))
1139 .unwrap()
1140 .0
1141 .as_slice(),
1142 b"H_llo"
1143 );
1144 }
1145 #[test]
1146 fn iconv() {
1147 let mut conv = super::IConv::new("utf-8", "ascii").unwrap();
1148 assert!(conv.convert(b"Hello").is_ok());
1149 assert!(conv.convert(b"He\xaallo").is_err());
1150 assert!(super::IConv::new("utf-8", "badcharset123456789").is_none());
1151 }
1152 #[test]
1153 fn filename_charsets() {
1154 let _ = super::filename_charsets();
1155 }
1156
1157 #[test]
1158 fn utf8_and_utf32() {
1159 let utf32 = ['A', 'b', '🤔'];
1160 let utf8 = super::utf32_to_utf8(utf32);
1161 assert_eq!(utf8, "Ab🤔");
1162
1163 let utf8 = "🤔 ț";
1164 let utf32 = super::utf8_to_utf32(utf8);
1165 assert_eq!(utf32.as_slice(), &['🤔', ' ', 'ț']);
1166 }
1167}