glib/
unichar.rs

1// Take a look at the license at the top of the repository in the LICENSE file.
2
3use std::mem::MaybeUninit;
4
5use crate::{
6    ffi,
7    translate::{from_glib, IntoGlib, UnsafeFrom},
8    UnicodeBreakType, UnicodeScript, UnicodeType,
9};
10
11mod sealed {
12    pub trait Sealed {}
13    impl Sealed for char {}
14}
15
16impl UnsafeFrom<u32> for char {
17    #[inline]
18    unsafe fn unsafe_from(t: u32) -> Self {
19        debug_assert!(
20            char::try_from(t).is_ok(),
21            "glib returned an invalid Unicode codepoint"
22        );
23        unsafe { char::from_u32_unchecked(t) }
24    }
25}
26
27// rustdoc-stripper-ignore-next
28/// The kind of decomposition to perform
29#[derive(Clone, Copy, PartialEq, Eq, Debug, Hash)]
30pub enum DecompositionKind {
31    // rustdoc-stripper-ignore-next
32    /// Compatibility decomposition
33    Compatibility,
34
35    // rustdoc-stripper-ignore-next
36    /// Canonical decomposition
37    Canonical,
38}
39
40// rustdoc-stripper-ignore-next
41/// The result of a single step of the Unicode canonical decomposition algorithm
42#[derive(Clone, Copy, PartialEq, Eq, Debug, Hash)]
43pub enum CharacterDecomposition {
44    // rustdoc-stripper-ignore-next
45    /// The character could not be decomposed further
46    NoDecomposition,
47    // rustdoc-stripper-ignore-next
48    // A 'singleton' decomposition, which means the character was replaced by another
49    Singleton(char),
50    // rustdoc-stripper-ignore-next
51    /// The first character may decompose further, but the second cannot
52    Pair(char, char),
53}
54
55// rustdoc-stripper-ignore-next
56/// This trait provides access to Unicode character classification and manipulations functions
57/// provided by GLib that do not exist in the standard library
58#[doc(alias = "g_unichar")]
59pub trait Unichar: sealed::Sealed + Copy + Into<u32> + UnsafeFrom<u32> {
60    #[doc(alias = "g_unichar_type")]
61    #[doc(alias = "unichar_type")]
62    #[inline]
63    fn unicode_type(self) -> UnicodeType {
64        unsafe { from_glib(ffi::g_unichar_type(self.into())) }
65    }
66
67    #[doc(alias = "g_unichar_break_type")]
68    #[doc(alias = "unichar_break_type")]
69    #[inline]
70    fn break_type(self) -> UnicodeBreakType {
71        unsafe { from_glib(ffi::g_unichar_break_type(self.into())) }
72    }
73
74    #[doc(alias = "g_unichar_get_script")]
75    #[doc(alias = "unichar_get_script")]
76    #[inline]
77    fn script(self) -> UnicodeScript {
78        unsafe { from_glib(ffi::g_unichar_get_script(self.into())) }
79    }
80
81    #[doc(alias = "g_unichar_combining_class")]
82    #[doc(alias = "unichar_combining_class")]
83    #[inline]
84    fn combining_class(self) -> u8 {
85        // UAX #44 § 5.7.4: The character property invariants regarding Canonical_Combining_Class
86        //                  guarantee that [...] all values used will be in the range 0..254.
87        // So this cast is fine
88        unsafe { ffi::g_unichar_combining_class(self.into()) as u8 }
89    }
90
91    #[doc(alias = "g_unichar_ismark")]
92    #[doc(alias = "unichar_ismark")]
93    #[inline]
94    fn is_mark(self) -> bool {
95        unsafe { from_glib(ffi::g_unichar_ismark(self.into())) }
96    }
97
98    #[doc(alias = "g_unichar_isgraph")]
99    #[doc(alias = "unichar_isgraph")]
100    #[inline]
101    fn is_graphical(self) -> bool {
102        unsafe { from_glib(ffi::g_unichar_isgraph(self.into())) }
103    }
104
105    #[doc(alias = "g_unichar_ispunct")]
106    #[doc(alias = "unichar_ispunct")]
107    #[inline]
108    fn is_punctuation(self) -> bool {
109        unsafe { from_glib(ffi::g_unichar_ispunct(self.into())) }
110    }
111
112    #[doc(alias = "g_unichar_istitle")]
113    #[doc(alias = "unichar_istitle")]
114    #[inline]
115    fn is_titlecase(self) -> bool {
116        unsafe { from_glib(ffi::g_unichar_istitle(self.into())) }
117    }
118
119    #[doc(alias = "g_unichar_isdefined")]
120    #[doc(alias = "unichar_isdefined")]
121    #[inline]
122    fn is_defined(self) -> bool {
123        unsafe { from_glib(ffi::g_unichar_isdefined(self.into())) }
124    }
125
126    #[doc(alias = "g_unichar_iswide")]
127    #[doc(alias = "unichar_iswide")]
128    #[inline]
129    fn is_wide(self) -> bool {
130        unsafe { from_glib(ffi::g_unichar_iswide(self.into())) }
131    }
132
133    #[doc(alias = "g_unichar_iswide_cjk")]
134    #[doc(alias = "unichar_iswide_cjk")]
135    #[inline]
136    fn is_wide_cjk(self) -> bool {
137        unsafe { from_glib(ffi::g_unichar_iswide_cjk(self.into())) }
138    }
139
140    #[doc(alias = "g_unichar_iszerowidth")]
141    #[doc(alias = "unichar_iszerowidth")]
142    #[inline]
143    fn is_zero_width(self) -> bool {
144        unsafe { from_glib(ffi::g_unichar_iszerowidth(self.into())) }
145    }
146
147    #[doc(alias = "g_unichar_totitle")]
148    #[doc(alias = "unichar_totitle")]
149    #[inline]
150    fn to_titlecase(self) -> Self {
151        unsafe { Self::unsafe_from(ffi::g_unichar_totitle(self.into())) }
152    }
153
154    #[doc(alias = "g_unichar_get_mirror_char")]
155    #[doc(alias = "unichar_get_mirror_char")]
156    #[inline]
157    fn mirror_char(self) -> Option<Self> {
158        // SAFETY: If g_unichar_get_mirror_char returns true, it will initialize `mirrored`
159        unsafe {
160            let mut mirrored = MaybeUninit::uninit();
161            let res = from_glib(ffi::g_unichar_get_mirror_char(
162                self.into(),
163                mirrored.as_mut_ptr(),
164            ));
165            if res {
166                Some(Self::unsafe_from(mirrored.assume_init()))
167            } else {
168                None
169            }
170        }
171    }
172
173    #[doc(alias = "g_unichar_fully_decompose")]
174    #[doc(alias = "unichar_fully_decompose")]
175    #[inline]
176    fn fully_decompose(self, decomposition_kind: DecompositionKind) -> Vec<Self> {
177        let compat = match decomposition_kind {
178            DecompositionKind::Compatibility => true,
179            DecompositionKind::Canonical => false,
180        };
181        let buffer_len = ffi::G_UNICHAR_MAX_DECOMPOSITION_LENGTH as usize;
182
183        // SAFETY: We assume glib only ever writes valid Unicode codepoints in the provided buffer
184        //         and that it does not lie about the
185        unsafe {
186            let mut buffer = Vec::<Self>::with_capacity(buffer_len);
187            let decomposition_length = ffi::g_unichar_fully_decompose(
188                self.into(),
189                compat.into_glib(),
190                buffer.as_mut_ptr().cast(),
191                buffer_len,
192            );
193            debug_assert!(decomposition_length <= buffer_len);
194            buffer.set_len(decomposition_length);
195            buffer
196        }
197    }
198
199    #[doc(alias = "g_unichar_decompose")]
200    #[doc(alias = "unichar_decompose")]
201    #[inline]
202    fn decompose(self) -> CharacterDecomposition {
203        // SAFETY: `a` and `b` will always be init after the g_unichar_decompose call returns
204        unsafe {
205            let mut a = MaybeUninit::uninit();
206            let mut b = MaybeUninit::uninit();
207            let res = from_glib(ffi::g_unichar_decompose(
208                self.into(),
209                a.as_mut_ptr(),
210                b.as_mut_ptr(),
211            ));
212
213            if res {
214                let (a, b) = (a.assume_init(), b.assume_init());
215                if b == 0 {
216                    CharacterDecomposition::Singleton(char::unsafe_from(a))
217                } else {
218                    CharacterDecomposition::Pair(char::unsafe_from(a), char::unsafe_from(b))
219                }
220            } else {
221                CharacterDecomposition::NoDecomposition
222            }
223        }
224    }
225
226    #[doc(alias = "g_unichar_compose")]
227    #[doc(alias = "unichar_compose")]
228    #[inline]
229    fn compose(a: char, b: char) -> Option<Self> {
230        // SAFETY: If g_unichar_compose returns true, it will initialize `out`
231        unsafe {
232            let mut out = MaybeUninit::uninit();
233            let res = from_glib(ffi::g_unichar_compose(a.into(), b.into(), out.as_mut_ptr()));
234
235            if res {
236                Some(Self::unsafe_from(out.assume_init()))
237            } else {
238                None
239            }
240        }
241    }
242}
243
244impl Unichar for char {}