glib/
unichar.rs

1// Take a look at the license at the top of the repository in the LICENSE file.
2
3use std::mem::MaybeUninit;
4
5use crate::{
6    UnicodeBreakType, UnicodeScript, UnicodeType, ffi,
7    translate::{IntoGlib, UnsafeFrom, from_glib},
8};
9
10mod sealed {
11    pub trait Sealed {}
12    impl Sealed for char {}
13}
14
15impl UnsafeFrom<u32> for char {
16    #[inline]
17    unsafe fn unsafe_from(t: u32) -> Self {
18        debug_assert!(
19            char::try_from(t).is_ok(),
20            "glib returned an invalid Unicode codepoint"
21        );
22        unsafe { char::from_u32_unchecked(t) }
23    }
24}
25
26// rustdoc-stripper-ignore-next
27/// The kind of decomposition to perform
28#[derive(Clone, Copy, PartialEq, Eq, Debug, Hash)]
29pub enum DecompositionKind {
30    // rustdoc-stripper-ignore-next
31    /// Compatibility decomposition
32    Compatibility,
33
34    // rustdoc-stripper-ignore-next
35    /// Canonical decomposition
36    Canonical,
37}
38
39// rustdoc-stripper-ignore-next
40/// The result of a single step of the Unicode canonical decomposition algorithm
41#[derive(Clone, Copy, PartialEq, Eq, Debug, Hash)]
42pub enum CharacterDecomposition {
43    // rustdoc-stripper-ignore-next
44    /// The character could not be decomposed further
45    NoDecomposition,
46    // rustdoc-stripper-ignore-next
47    // A 'singleton' decomposition, which means the character was replaced by another
48    Singleton(char),
49    // rustdoc-stripper-ignore-next
50    /// The first character may decompose further, but the second cannot
51    Pair(char, char),
52}
53
54// rustdoc-stripper-ignore-next
55/// This trait provides access to Unicode character classification and manipulations functions
56/// provided by GLib that do not exist in the standard library
57#[doc(alias = "g_unichar")]
58pub trait Unichar: sealed::Sealed + Copy + Into<u32> + UnsafeFrom<u32> {
59    #[doc(alias = "g_unichar_type")]
60    #[doc(alias = "unichar_type")]
61    #[inline]
62    fn unicode_type(self) -> UnicodeType {
63        unsafe { from_glib(ffi::g_unichar_type(self.into())) }
64    }
65
66    #[doc(alias = "g_unichar_break_type")]
67    #[doc(alias = "unichar_break_type")]
68    #[inline]
69    fn break_type(self) -> UnicodeBreakType {
70        unsafe { from_glib(ffi::g_unichar_break_type(self.into())) }
71    }
72
73    #[doc(alias = "g_unichar_get_script")]
74    #[doc(alias = "unichar_get_script")]
75    #[inline]
76    fn script(self) -> UnicodeScript {
77        unsafe { from_glib(ffi::g_unichar_get_script(self.into())) }
78    }
79
80    #[doc(alias = "g_unichar_combining_class")]
81    #[doc(alias = "unichar_combining_class")]
82    #[inline]
83    fn combining_class(self) -> u8 {
84        // UAX #44 § 5.7.4: The character property invariants regarding Canonical_Combining_Class
85        //                  guarantee that [...] all values used will be in the range 0..254.
86        // So this cast is fine
87        unsafe { ffi::g_unichar_combining_class(self.into()) as u8 }
88    }
89
90    #[doc(alias = "g_unichar_ismark")]
91    #[doc(alias = "unichar_ismark")]
92    #[inline]
93    fn is_mark(self) -> bool {
94        unsafe { from_glib(ffi::g_unichar_ismark(self.into())) }
95    }
96
97    #[doc(alias = "g_unichar_isgraph")]
98    #[doc(alias = "unichar_isgraph")]
99    #[inline]
100    fn is_graphical(self) -> bool {
101        unsafe { from_glib(ffi::g_unichar_isgraph(self.into())) }
102    }
103
104    #[doc(alias = "g_unichar_ispunct")]
105    #[doc(alias = "unichar_ispunct")]
106    #[inline]
107    fn is_punctuation(self) -> bool {
108        unsafe { from_glib(ffi::g_unichar_ispunct(self.into())) }
109    }
110
111    #[doc(alias = "g_unichar_istitle")]
112    #[doc(alias = "unichar_istitle")]
113    #[inline]
114    fn is_titlecase(self) -> bool {
115        unsafe { from_glib(ffi::g_unichar_istitle(self.into())) }
116    }
117
118    #[doc(alias = "g_unichar_isdefined")]
119    #[doc(alias = "unichar_isdefined")]
120    #[inline]
121    fn is_defined(self) -> bool {
122        unsafe { from_glib(ffi::g_unichar_isdefined(self.into())) }
123    }
124
125    #[doc(alias = "g_unichar_iswide")]
126    #[doc(alias = "unichar_iswide")]
127    #[inline]
128    fn is_wide(self) -> bool {
129        unsafe { from_glib(ffi::g_unichar_iswide(self.into())) }
130    }
131
132    #[doc(alias = "g_unichar_iswide_cjk")]
133    #[doc(alias = "unichar_iswide_cjk")]
134    #[inline]
135    fn is_wide_cjk(self) -> bool {
136        unsafe { from_glib(ffi::g_unichar_iswide_cjk(self.into())) }
137    }
138
139    #[doc(alias = "g_unichar_iszerowidth")]
140    #[doc(alias = "unichar_iszerowidth")]
141    #[inline]
142    fn is_zero_width(self) -> bool {
143        unsafe { from_glib(ffi::g_unichar_iszerowidth(self.into())) }
144    }
145
146    #[doc(alias = "g_unichar_totitle")]
147    #[doc(alias = "unichar_totitle")]
148    #[inline]
149    fn to_titlecase(self) -> Self {
150        unsafe { Self::unsafe_from(ffi::g_unichar_totitle(self.into())) }
151    }
152
153    #[doc(alias = "g_unichar_get_mirror_char")]
154    #[doc(alias = "unichar_get_mirror_char")]
155    #[inline]
156    fn mirror_char(self) -> Option<Self> {
157        // SAFETY: If g_unichar_get_mirror_char returns true, it will initialize `mirrored`
158        unsafe {
159            let mut mirrored = MaybeUninit::uninit();
160            let res = from_glib(ffi::g_unichar_get_mirror_char(
161                self.into(),
162                mirrored.as_mut_ptr(),
163            ));
164            if res {
165                Some(Self::unsafe_from(mirrored.assume_init()))
166            } else {
167                None
168            }
169        }
170    }
171
172    #[doc(alias = "g_unichar_fully_decompose")]
173    #[doc(alias = "unichar_fully_decompose")]
174    #[inline]
175    fn fully_decompose(self, decomposition_kind: DecompositionKind) -> Vec<Self> {
176        let compat = match decomposition_kind {
177            DecompositionKind::Compatibility => true,
178            DecompositionKind::Canonical => false,
179        };
180        let buffer_len = ffi::G_UNICHAR_MAX_DECOMPOSITION_LENGTH as usize;
181
182        // SAFETY: We assume glib only ever writes valid Unicode codepoints in the provided buffer
183        //         and that it does not lie about the
184        unsafe {
185            let mut buffer = Vec::<Self>::with_capacity(buffer_len);
186            let decomposition_length = ffi::g_unichar_fully_decompose(
187                self.into(),
188                compat.into_glib(),
189                buffer.as_mut_ptr().cast(),
190                buffer_len,
191            );
192            debug_assert!(decomposition_length <= buffer_len);
193            buffer.set_len(decomposition_length);
194            buffer
195        }
196    }
197
198    #[doc(alias = "g_unichar_decompose")]
199    #[doc(alias = "unichar_decompose")]
200    #[inline]
201    fn decompose(self) -> CharacterDecomposition {
202        // SAFETY: `a` and `b` will always be init after the g_unichar_decompose call returns
203        unsafe {
204            let mut a = MaybeUninit::uninit();
205            let mut b = MaybeUninit::uninit();
206            let res = from_glib(ffi::g_unichar_decompose(
207                self.into(),
208                a.as_mut_ptr(),
209                b.as_mut_ptr(),
210            ));
211
212            if res {
213                let (a, b) = (a.assume_init(), b.assume_init());
214                if b == 0 {
215                    CharacterDecomposition::Singleton(char::unsafe_from(a))
216                } else {
217                    CharacterDecomposition::Pair(char::unsafe_from(a), char::unsafe_from(b))
218                }
219            } else {
220                CharacterDecomposition::NoDecomposition
221            }
222        }
223    }
224
225    #[doc(alias = "g_unichar_compose")]
226    #[doc(alias = "unichar_compose")]
227    #[inline]
228    fn compose(a: char, b: char) -> Option<Self> {
229        // SAFETY: If g_unichar_compose returns true, it will initialize `out`
230        unsafe {
231            let mut out = MaybeUninit::uninit();
232            let res = from_glib(ffi::g_unichar_compose(a.into(), b.into(), out.as_mut_ptr()));
233
234            if res {
235                Some(Self::unsafe_from(out.assume_init()))
236            } else {
237                None
238            }
239        }
240    }
241}
242
243impl Unichar for char {}