glib/
regex.rs

1// Take a look at the license at the top of the repository in the LICENSE file.
2
3// rustdoc-stripper-ignore-next
4//! This module is inefficient and should not be used by Rust programs except for
5//! compatibility with GLib.Regex based APIs.
6
7use crate::{
8    ffi, translate::*, GStr, GStringPtr, MatchInfo, PtrSlice, Regex, RegexCompileFlags,
9    RegexMatchFlags,
10};
11use std::{mem, ptr};
12
13impl Regex {
14    /// Retrieves the number of the subexpression named @name.
15    /// ## `name`
16    /// name of the subexpression
17    ///
18    /// # Returns
19    ///
20    /// The number of the subexpression or -1 if @name
21    ///   does not exists
22    // rustdoc-stripper-ignore-next-stop
23    /// Retrieves the number of the subexpression named @name.
24    /// ## `name`
25    /// name of the subexpression
26    ///
27    /// # Returns
28    ///
29    /// The number of the subexpression or -1 if @name
30    ///   does not exists
31    #[doc(alias = "g_regex_get_string_number")]
32    #[doc(alias = "get_string_number")]
33    pub fn string_number(&self, name: impl IntoGStr) -> i32 {
34        name.run_with_gstr(|name| unsafe {
35            ffi::g_regex_get_string_number(self.to_glib_none().0, name.to_glib_none().0)
36        })
37    }
38
39    /// Escapes the nul characters in @string to "\x00".  It can be used
40    /// to compile a regex with embedded nul characters.
41    ///
42    /// For completeness, @length can be -1 for a nul-terminated string.
43    /// In this case the output string will be of course equal to @string.
44    /// ## `string`
45    /// the string to escape
46    /// ## `length`
47    /// the length of @string
48    ///
49    /// # Returns
50    ///
51    /// a newly-allocated escaped string
52    // rustdoc-stripper-ignore-next-stop
53    /// Escapes the nul characters in @string to "\x00".  It can be used
54    /// to compile a regex with embedded nul characters.
55    ///
56    /// For completeness, @length can be -1 for a nul-terminated string.
57    /// In this case the output string will be of course equal to @string.
58    /// ## `string`
59    /// the string to escape
60    /// ## `length`
61    /// the length of @string
62    ///
63    /// # Returns
64    ///
65    /// a newly-allocated escaped string
66    #[doc(alias = "g_regex_escape_nul")]
67    pub fn escape_nul(string: impl IntoGStr) -> crate::GString {
68        unsafe {
69            string.run_with_gstr(|string| {
70                from_glib_full(ffi::g_regex_escape_nul(
71                    string.to_glib_none().0,
72                    string.len() as _,
73                ))
74            })
75        }
76    }
77
78    /// Escapes the special characters used for regular expressions
79    /// in @string, for instance "a.b*c" becomes "a\.b\*c". This
80    /// function is useful to dynamically generate regular expressions.
81    ///
82    /// @string can contain nul characters that are replaced with "\0",
83    /// in this case remember to specify the correct length of @string
84    /// in @length.
85    /// ## `string`
86    /// the string to escape
87    /// ## `length`
88    /// the length of @string, in bytes, or -1 if @string is nul-terminated
89    ///
90    /// # Returns
91    ///
92    /// a newly-allocated escaped string
93    // rustdoc-stripper-ignore-next-stop
94    /// Escapes the special characters used for regular expressions
95    /// in @string, for instance "a.b*c" becomes "a\.b\*c". This
96    /// function is useful to dynamically generate regular expressions.
97    ///
98    /// @string can contain nul characters that are replaced with "\0",
99    /// in this case remember to specify the correct length of @string
100    /// in @length.
101    /// ## `string`
102    /// the string to escape
103    /// ## `length`
104    /// the length of @string, in bytes, or -1 if @string is nul-terminated
105    ///
106    /// # Returns
107    ///
108    /// a newly-allocated escaped string
109    #[doc(alias = "g_regex_escape_string")]
110    pub fn escape_string(string: impl IntoGStr) -> crate::GString {
111        unsafe {
112            string.run_with_gstr(|string| {
113                from_glib_full(ffi::g_regex_escape_string(
114                    string.to_glib_none().0,
115                    string.len() as _,
116                ))
117            })
118        }
119    }
120
121    /// Checks whether @replacement is a valid replacement string
122    /// (see g_regex_replace()), i.e. that all escape sequences in
123    /// it are valid.
124    ///
125    /// If @has_references is not [`None`] then @replacement is checked
126    /// for pattern references. For instance, replacement text 'foo\n'
127    /// does not contain references and may be evaluated without information
128    /// about actual match, but '\0\1' (whole match followed by first
129    /// subpattern) requires valid #GMatchInfo object.
130    /// ## `replacement`
131    /// the replacement string
132    ///
133    /// # Returns
134    ///
135    /// whether @replacement is a valid replacement string
136    ///
137    /// ## `has_references`
138    /// location to store information about
139    ///   references in @replacement or [`None`]
140    // rustdoc-stripper-ignore-next-stop
141    /// Checks whether @replacement is a valid replacement string
142    /// (see g_regex_replace()), i.e. that all escape sequences in
143    /// it are valid.
144    ///
145    /// If @has_references is not [`None`] then @replacement is checked
146    /// for pattern references. For instance, replacement text 'foo\n'
147    /// does not contain references and may be evaluated without information
148    /// about actual match, but '\0\1' (whole match followed by first
149    /// subpattern) requires valid #GMatchInfo object.
150    /// ## `replacement`
151    /// the replacement string
152    ///
153    /// # Returns
154    ///
155    /// whether @replacement is a valid replacement string
156    ///
157    /// ## `has_references`
158    /// location to store information about
159    ///   references in @replacement or [`None`]
160    #[doc(alias = "g_regex_check_replacement")]
161    pub fn check_replacement(replacement: impl IntoGStr) -> Result<bool, crate::Error> {
162        replacement.run_with_gstr(|replacement| unsafe {
163            let mut has_references = mem::MaybeUninit::uninit();
164            let mut error = ptr::null_mut();
165            let is_ok = ffi::g_regex_check_replacement(
166                replacement.to_glib_none().0,
167                has_references.as_mut_ptr(),
168                &mut error,
169            );
170            debug_assert_eq!(is_ok == crate::ffi::GFALSE, !error.is_null());
171            if error.is_null() {
172                Ok(from_glib(has_references.assume_init()))
173            } else {
174                Err(from_glib_full(error))
175            }
176        })
177    }
178
179    /// Scans for a match in @string for @pattern.
180    ///
181    /// This function is equivalent to g_regex_match() but it does not
182    /// require to compile the pattern with g_regex_new(), avoiding some
183    /// lines of code when you need just to do a match without extracting
184    /// substrings, capture counts, and so on.
185    ///
186    /// If this function is to be called on the same @pattern more than
187    /// once, it's more efficient to compile the pattern once with
188    /// g_regex_new() and then use g_regex_match().
189    /// ## `pattern`
190    /// the regular expression
191    /// ## `string`
192    /// the string to scan for matches
193    /// ## `compile_options`
194    /// compile options for the regular expression, or 0
195    /// ## `match_options`
196    /// match options, or 0
197    ///
198    /// # Returns
199    ///
200    /// [`true`] if the string matched, [`false`] otherwise
201    // rustdoc-stripper-ignore-next-stop
202    /// Scans for a match in @string for @pattern.
203    ///
204    /// This function is equivalent to g_regex_match() but it does not
205    /// require to compile the pattern with g_regex_new(), avoiding some
206    /// lines of code when you need just to do a match without extracting
207    /// substrings, capture counts, and so on.
208    ///
209    /// If this function is to be called on the same @pattern more than
210    /// once, it's more efficient to compile the pattern once with
211    /// g_regex_new() and then use g_regex_match().
212    /// ## `pattern`
213    /// the regular expression
214    /// ## `string`
215    /// the string to scan for matches
216    /// ## `compile_options`
217    /// compile options for the regular expression, or 0
218    /// ## `match_options`
219    /// match options, or 0
220    ///
221    /// # Returns
222    ///
223    /// [`true`] if the string matched, [`false`] otherwise
224    #[doc(alias = "g_regex_match_simple")]
225    pub fn match_simple(
226        pattern: impl IntoGStr,
227        string: impl IntoGStr,
228        compile_options: RegexCompileFlags,
229        match_options: RegexMatchFlags,
230    ) -> bool {
231        pattern.run_with_gstr(|pattern| {
232            string.run_with_gstr(|string| unsafe {
233                from_glib(ffi::g_regex_match_simple(
234                    pattern.to_glib_none().0,
235                    string.to_glib_none().0,
236                    compile_options.into_glib(),
237                    match_options.into_glib(),
238                ))
239            })
240        })
241    }
242
243    /// Replaces all occurrences of the pattern in @self with the
244    /// replacement text. Backreferences of the form `\number` or
245    /// `\g<number>` in the replacement text are interpolated by the
246    /// number-th captured subexpression of the match, `\g<name>` refers
247    /// to the captured subexpression with the given name. `\0` refers
248    /// to the complete match, but `\0` followed by a number is the octal
249    /// representation of a character. To include a literal `\` in the
250    /// replacement, write `\\\\`.
251    ///
252    /// There are also escapes that changes the case of the following text:
253    ///
254    /// - \l: Convert to lower case the next character
255    /// - \u: Convert to upper case the next character
256    /// - \L: Convert to lower case till \E
257    /// - \U: Convert to upper case till \E
258    /// - \E: End case modification
259    ///
260    /// If you do not need to use backreferences use g_regex_replace_literal().
261    ///
262    /// The @replacement string must be UTF-8 encoded even if [`RegexCompileFlags::RAW`][crate::RegexCompileFlags::RAW] was
263    /// passed to g_regex_new(). If you want to use not UTF-8 encoded strings
264    /// you can use g_regex_replace_literal().
265    ///
266    /// Setting @start_position differs from just passing over a shortened
267    /// string and setting [`RegexMatchFlags::NOTBOL`][crate::RegexMatchFlags::NOTBOL] in the case of a pattern that
268    /// begins with any kind of lookbehind assertion, such as "\b".
269    /// ## `string`
270    /// the string to perform matches against
271    /// ## `start_position`
272    /// starting index of the string to match, in bytes
273    /// ## `replacement`
274    /// text to replace each match with
275    /// ## `match_options`
276    /// options for the match
277    ///
278    /// # Returns
279    ///
280    /// a newly allocated string containing the replacements
281    // rustdoc-stripper-ignore-next-stop
282    /// Replaces all occurrences of the pattern in @self with the
283    /// replacement text. Backreferences of the form `\number` or
284    /// `\g<number>` in the replacement text are interpolated by the
285    /// number-th captured subexpression of the match, `\g<name>` refers
286    /// to the captured subexpression with the given name. `\0` refers
287    /// to the complete match, but `\0` followed by a number is the octal
288    /// representation of a character. To include a literal `\` in the
289    /// replacement, write `\\\\`.
290    ///
291    /// There are also escapes that changes the case of the following text:
292    ///
293    /// - \l: Convert to lower case the next character
294    /// - \u: Convert to upper case the next character
295    /// - \L: Convert to lower case till \E
296    /// - \U: Convert to upper case till \E
297    /// - \E: End case modification
298    ///
299    /// If you do not need to use backreferences use g_regex_replace_literal().
300    ///
301    /// The @replacement string must be UTF-8 encoded even if [`RegexCompileFlags::RAW`][crate::RegexCompileFlags::RAW] was
302    /// passed to g_regex_new(). If you want to use not UTF-8 encoded strings
303    /// you can use g_regex_replace_literal().
304    ///
305    /// Setting @start_position differs from just passing over a shortened
306    /// string and setting [`RegexMatchFlags::NOTBOL`][crate::RegexMatchFlags::NOTBOL] in the case of a pattern that
307    /// begins with any kind of lookbehind assertion, such as "\b".
308    /// ## `string`
309    /// the string to perform matches against
310    /// ## `start_position`
311    /// starting index of the string to match, in bytes
312    /// ## `replacement`
313    /// text to replace each match with
314    /// ## `match_options`
315    /// options for the match
316    ///
317    /// # Returns
318    ///
319    /// a newly allocated string containing the replacements
320    #[doc(alias = "g_regex_replace")]
321    pub fn replace(
322        &self,
323        string: impl IntoGStr,
324        start_position: i32,
325        replacement: impl IntoGStr,
326        match_options: RegexMatchFlags,
327    ) -> Result<crate::GString, crate::Error> {
328        unsafe {
329            string.run_with_gstr(|string| {
330                replacement.run_with_gstr(|replacement| {
331                    let mut error = ptr::null_mut();
332                    let ret = ffi::g_regex_replace(
333                        self.to_glib_none().0,
334                        string.as_ptr() as *const _,
335                        string.len() as _,
336                        start_position,
337                        replacement.to_glib_none().0,
338                        match_options.into_glib(),
339                        &mut error,
340                    );
341                    debug_assert_eq!(ret.is_null(), !error.is_null());
342                    if error.is_null() {
343                        Ok(from_glib_full(ret))
344                    } else {
345                        Err(from_glib_full(error))
346                    }
347                })
348            })
349        }
350    }
351
352    /// Using the standard algorithm for regular expression matching only
353    /// the longest match in the string is retrieved. This function uses
354    /// a different algorithm so it can retrieve all the possible matches.
355    /// For more documentation see g_regex_match_all_full().
356    ///
357    /// A #GMatchInfo structure, used to get information on the match, is
358    /// stored in @match_info if not [`None`]. Note that if @match_info is
359    /// not [`None`] then it is created even if the function returns [`false`],
360    /// i.e. you must free it regardless if regular expression actually
361    /// matched.
362    ///
363    /// @string is not copied and is used in #GMatchInfo internally. If
364    /// you use any #GMatchInfo method (except g_match_info_free()) after
365    /// freeing or modifying @string then the behaviour is undefined.
366    /// ## `string`
367    /// the string to scan for matches
368    /// ## `match_options`
369    /// match options
370    ///
371    /// # Returns
372    ///
373    /// [`true`] is the string matched, [`false`] otherwise
374    ///
375    /// ## `match_info`
376    /// pointer to location where to store
377    ///     the #GMatchInfo, or [`None`] if you do not need it
378    // rustdoc-stripper-ignore-next-stop
379    /// Using the standard algorithm for regular expression matching only
380    /// the longest match in the string is retrieved. This function uses
381    /// a different algorithm so it can retrieve all the possible matches.
382    /// For more documentation see g_regex_match_all_full().
383    ///
384    /// A #GMatchInfo structure, used to get information on the match, is
385    /// stored in @match_info if not [`None`]. Note that if @match_info is
386    /// not [`None`] then it is created even if the function returns [`false`],
387    /// i.e. you must free it regardless if regular expression actually
388    /// matched.
389    ///
390    /// @string is not copied and is used in #GMatchInfo internally. If
391    /// you use any #GMatchInfo method (except g_match_info_free()) after
392    /// freeing or modifying @string then the behaviour is undefined.
393    /// ## `string`
394    /// the string to scan for matches
395    /// ## `match_options`
396    /// match options
397    ///
398    /// # Returns
399    ///
400    /// [`true`] is the string matched, [`false`] otherwise
401    ///
402    /// ## `match_info`
403    /// pointer to location where to store
404    ///     the #GMatchInfo, or [`None`] if you do not need it
405    #[doc(alias = "g_regex_match_all")]
406    pub fn match_all<'input>(
407        &self,
408        string: &'input GStr,
409        match_options: RegexMatchFlags,
410    ) -> Result<MatchInfo<'input>, crate::Error> {
411        self.match_all_full(string, 0, match_options)
412    }
413
414    /// Using the standard algorithm for regular expression matching only
415    /// the longest match in the @string is retrieved, it is not possible
416    /// to obtain all the available matches. For instance matching
417    /// `"<a> <b> <c>"` against the pattern `"<.*>"`
418    /// you get `"<a> <b> <c>"`.
419    ///
420    /// This function uses a different algorithm (called DFA, i.e. deterministic
421    /// finite automaton), so it can retrieve all the possible matches, all
422    /// starting at the same point in the string. For instance matching
423    /// `"<a> <b> <c>"` against the pattern `"<.*>"`
424    /// you would obtain three matches: `"<a> <b> <c>"`,
425    /// `"<a> <b>"` and `"<a>"`.
426    ///
427    /// The number of matched strings is retrieved using
428    /// g_match_info_get_match_count(). To obtain the matched strings and
429    /// their position you can use, respectively, g_match_info_fetch() and
430    /// g_match_info_fetch_pos(). Note that the strings are returned in
431    /// reverse order of length; that is, the longest matching string is
432    /// given first.
433    ///
434    /// Note that the DFA algorithm is slower than the standard one and it
435    /// is not able to capture substrings, so backreferences do not work.
436    ///
437    /// Setting @start_position differs from just passing over a shortened
438    /// string and setting [`RegexMatchFlags::NOTBOL`][crate::RegexMatchFlags::NOTBOL] in the case of a pattern
439    /// that begins with any kind of lookbehind assertion, such as "\b".
440    ///
441    /// Unless [`RegexCompileFlags::RAW`][crate::RegexCompileFlags::RAW] is specified in the options, @string must be valid UTF-8.
442    ///
443    /// A #GMatchInfo structure, used to get information on the match, is
444    /// stored in @match_info if not [`None`]. Note that if @match_info is
445    /// not [`None`] then it is created even if the function returns [`false`],
446    /// i.e. you must free it regardless if regular expression actually
447    /// matched.
448    ///
449    /// @string is not copied and is used in #GMatchInfo internally. If
450    /// you use any #GMatchInfo method (except g_match_info_free()) after
451    /// freeing or modifying @string then the behaviour is undefined.
452    /// ## `string`
453    /// the string to scan for matches
454    /// ## `start_position`
455    /// starting index of the string to match, in bytes
456    /// ## `match_options`
457    /// match options
458    ///
459    /// # Returns
460    ///
461    /// [`true`] is the string matched, [`false`] otherwise
462    ///
463    /// ## `match_info`
464    /// pointer to location where to store
465    ///     the #GMatchInfo, or [`None`] if you do not need it
466    // rustdoc-stripper-ignore-next-stop
467    /// Using the standard algorithm for regular expression matching only
468    /// the longest match in the @string is retrieved, it is not possible
469    /// to obtain all the available matches. For instance matching
470    /// `"<a> <b> <c>"` against the pattern `"<.*>"`
471    /// you get `"<a> <b> <c>"`.
472    ///
473    /// This function uses a different algorithm (called DFA, i.e. deterministic
474    /// finite automaton), so it can retrieve all the possible matches, all
475    /// starting at the same point in the string. For instance matching
476    /// `"<a> <b> <c>"` against the pattern `"<.*>"`
477    /// you would obtain three matches: `"<a> <b> <c>"`,
478    /// `"<a> <b>"` and `"<a>"`.
479    ///
480    /// The number of matched strings is retrieved using
481    /// g_match_info_get_match_count(). To obtain the matched strings and
482    /// their position you can use, respectively, g_match_info_fetch() and
483    /// g_match_info_fetch_pos(). Note that the strings are returned in
484    /// reverse order of length; that is, the longest matching string is
485    /// given first.
486    ///
487    /// Note that the DFA algorithm is slower than the standard one and it
488    /// is not able to capture substrings, so backreferences do not work.
489    ///
490    /// Setting @start_position differs from just passing over a shortened
491    /// string and setting [`RegexMatchFlags::NOTBOL`][crate::RegexMatchFlags::NOTBOL] in the case of a pattern
492    /// that begins with any kind of lookbehind assertion, such as "\b".
493    ///
494    /// Unless [`RegexCompileFlags::RAW`][crate::RegexCompileFlags::RAW] is specified in the options, @string must be valid UTF-8.
495    ///
496    /// A #GMatchInfo structure, used to get information on the match, is
497    /// stored in @match_info if not [`None`]. Note that if @match_info is
498    /// not [`None`] then it is created even if the function returns [`false`],
499    /// i.e. you must free it regardless if regular expression actually
500    /// matched.
501    ///
502    /// @string is not copied and is used in #GMatchInfo internally. If
503    /// you use any #GMatchInfo method (except g_match_info_free()) after
504    /// freeing or modifying @string then the behaviour is undefined.
505    /// ## `string`
506    /// the string to scan for matches
507    /// ## `start_position`
508    /// starting index of the string to match, in bytes
509    /// ## `match_options`
510    /// match options
511    ///
512    /// # Returns
513    ///
514    /// [`true`] is the string matched, [`false`] otherwise
515    ///
516    /// ## `match_info`
517    /// pointer to location where to store
518    ///     the #GMatchInfo, or [`None`] if you do not need it
519    #[doc(alias = "g_regex_match_all_full")]
520    pub fn match_all_full<'input>(
521        &self,
522        string: &'input GStr,
523        start_position: i32,
524        match_options: RegexMatchFlags,
525    ) -> Result<MatchInfo<'input>, crate::Error> {
526        unsafe {
527            let mut match_info = ptr::null_mut();
528            let mut error = ptr::null_mut();
529            let res = ffi::g_regex_match_all_full(
530                self.to_glib_none().0,
531                string.to_glib_none().0,
532                string.len() as _,
533                start_position,
534                match_options.into_glib(),
535                &mut match_info,
536                &mut error,
537            );
538            if error.is_null() {
539                let match_info = MatchInfo::from_glib_full(match_info);
540                debug_assert_eq!(match_info.matches(), from_glib(res));
541                Ok(match_info)
542            } else {
543                debug_assert!(match_info.is_null());
544                Err(from_glib_full(error))
545            }
546        }
547    }
548
549    /// Scans for a match in @string for the pattern in @self.
550    /// The @match_options are combined with the match options specified
551    /// when the @self structure was created, letting you have more
552    /// flexibility in reusing #GRegex structures.
553    ///
554    /// Unless [`RegexCompileFlags::RAW`][crate::RegexCompileFlags::RAW] is specified in the options, @string must be valid UTF-8.
555    ///
556    /// A #GMatchInfo structure, used to get information on the match,
557    /// is stored in @match_info if not [`None`]. Note that if @match_info
558    /// is not [`None`] then it is created even if the function returns [`false`],
559    /// i.e. you must free it regardless if regular expression actually matched.
560    ///
561    /// To retrieve all the non-overlapping matches of the pattern in
562    /// string you can use g_match_info_next().
563    ///
564    ///
565    ///
566    /// **⚠️ The following code is in C ⚠️**
567    ///
568    /// ```C
569    /// static void
570    /// print_uppercase_words (const gchar *string)
571    /// {
572    ///   // Print all uppercase-only words.
573    ///   GRegex *regex;
574    ///   GMatchInfo *match_info;
575    ///
576    ///   regex = g_regex_new ("[A-Z]+", G_REGEX_DEFAULT, G_REGEX_MATCH_DEFAULT, NULL);
577    ///   g_regex_match (regex, string, 0, &match_info);
578    ///   while (g_match_info_matches (match_info))
579    ///     {
580    ///       gchar *word = g_match_info_fetch (match_info, 0);
581    ///       g_print ("Found: %s\n", word);
582    ///       g_free (word);
583    ///       g_match_info_next (match_info, NULL);
584    ///     }
585    ///   g_match_info_free (match_info);
586    ///   g_regex_unref (regex);
587    /// }
588    /// ```
589    ///
590    /// @string is not copied and is used in #GMatchInfo internally. If
591    /// you use any #GMatchInfo method (except g_match_info_free()) after
592    /// freeing or modifying @string then the behaviour is undefined.
593    /// ## `string`
594    /// the string to scan for matches
595    /// ## `match_options`
596    /// match options
597    ///
598    /// # Returns
599    ///
600    /// [`true`] is the string matched, [`false`] otherwise
601    ///
602    /// ## `match_info`
603    /// pointer to location where to store
604    ///     the #GMatchInfo, or [`None`] if you do not need it
605    // rustdoc-stripper-ignore-next-stop
606    /// Scans for a match in @string for the pattern in @self.
607    /// The @match_options are combined with the match options specified
608    /// when the @self structure was created, letting you have more
609    /// flexibility in reusing #GRegex structures.
610    ///
611    /// Unless [`RegexCompileFlags::RAW`][crate::RegexCompileFlags::RAW] is specified in the options, @string must be valid UTF-8.
612    ///
613    /// A #GMatchInfo structure, used to get information on the match,
614    /// is stored in @match_info if not [`None`]. Note that if @match_info
615    /// is not [`None`] then it is created even if the function returns [`false`],
616    /// i.e. you must free it regardless if regular expression actually matched.
617    ///
618    /// To retrieve all the non-overlapping matches of the pattern in
619    /// string you can use g_match_info_next().
620    ///
621    ///
622    ///
623    /// **⚠️ The following code is in C ⚠️**
624    ///
625    /// ```C
626    /// static void
627    /// print_uppercase_words (const gchar *string)
628    /// {
629    ///   // Print all uppercase-only words.
630    ///   GRegex *regex;
631    ///   GMatchInfo *match_info;
632    ///
633    ///   regex = g_regex_new ("[A-Z]+", G_REGEX_DEFAULT, G_REGEX_MATCH_DEFAULT, NULL);
634    ///   g_regex_match (regex, string, 0, &match_info);
635    ///   while (g_match_info_matches (match_info))
636    ///     {
637    ///       gchar *word = g_match_info_fetch (match_info, 0);
638    ///       g_print ("Found: %s\n", word);
639    ///       g_free (word);
640    ///       g_match_info_next (match_info, NULL);
641    ///     }
642    ///   g_match_info_free (match_info);
643    ///   g_regex_unref (regex);
644    /// }
645    /// ```
646    ///
647    /// @string is not copied and is used in #GMatchInfo internally. If
648    /// you use any #GMatchInfo method (except g_match_info_free()) after
649    /// freeing or modifying @string then the behaviour is undefined.
650    /// ## `string`
651    /// the string to scan for matches
652    /// ## `match_options`
653    /// match options
654    ///
655    /// # Returns
656    ///
657    /// [`true`] is the string matched, [`false`] otherwise
658    ///
659    /// ## `match_info`
660    /// pointer to location where to store
661    ///     the #GMatchInfo, or [`None`] if you do not need it
662    #[doc(alias = "g_regex_match")]
663    pub fn match_<'input>(
664        &self,
665        string: &'input GStr,
666        match_options: RegexMatchFlags,
667    ) -> Result<MatchInfo<'input>, crate::Error> {
668        self.match_full(string, 0, match_options)
669    }
670
671    /// Scans for a match in @string for the pattern in @self.
672    /// The @match_options are combined with the match options specified
673    /// when the @self structure was created, letting you have more
674    /// flexibility in reusing #GRegex structures.
675    ///
676    /// Setting @start_position differs from just passing over a shortened
677    /// string and setting [`RegexMatchFlags::NOTBOL`][crate::RegexMatchFlags::NOTBOL] in the case of a pattern
678    /// that begins with any kind of lookbehind assertion, such as "\b".
679    ///
680    /// Unless [`RegexCompileFlags::RAW`][crate::RegexCompileFlags::RAW] is specified in the options, @string must be valid UTF-8.
681    ///
682    /// A #GMatchInfo structure, used to get information on the match, is
683    /// stored in @match_info if not [`None`]. Note that if @match_info is
684    /// not [`None`] then it is created even if the function returns [`false`],
685    /// i.e. you must free it regardless if regular expression actually
686    /// matched.
687    ///
688    /// @string is not copied and is used in #GMatchInfo internally. If
689    /// you use any #GMatchInfo method (except g_match_info_free()) after
690    /// freeing or modifying @string then the behaviour is undefined.
691    ///
692    /// To retrieve all the non-overlapping matches of the pattern in
693    /// string you can use g_match_info_next().
694    ///
695    ///
696    ///
697    /// **⚠️ The following code is in C ⚠️**
698    ///
699    /// ```C
700    /// static void
701    /// print_uppercase_words (const gchar *string)
702    /// {
703    ///   // Print all uppercase-only words.
704    ///   GRegex *regex;
705    ///   GMatchInfo *match_info;
706    ///   GError *error = NULL;
707    ///
708    ///   regex = g_regex_new ("[A-Z]+", G_REGEX_DEFAULT, G_REGEX_MATCH_DEFAULT, NULL);
709    ///   g_regex_match_full (regex, string, -1, 0, 0, &match_info, &error);
710    ///   while (g_match_info_matches (match_info))
711    ///     {
712    ///       gchar *word = g_match_info_fetch (match_info, 0);
713    ///       g_print ("Found: %s\n", word);
714    ///       g_free (word);
715    ///       g_match_info_next (match_info, &error);
716    ///     }
717    ///   g_match_info_free (match_info);
718    ///   g_regex_unref (regex);
719    ///   if (error != NULL)
720    ///     {
721    ///       g_printerr ("Error while matching: %s\n", error->message);
722    ///       g_error_free (error);
723    ///     }
724    /// }
725    /// ```
726    /// ## `string`
727    /// the string to scan for matches
728    /// ## `start_position`
729    /// starting index of the string to match, in bytes
730    /// ## `match_options`
731    /// match options
732    ///
733    /// # Returns
734    ///
735    /// [`true`] is the string matched, [`false`] otherwise
736    ///
737    /// ## `match_info`
738    /// pointer to location where to store
739    ///     the #GMatchInfo, or [`None`] if you do not need it
740    // rustdoc-stripper-ignore-next-stop
741    /// Scans for a match in @string for the pattern in @self.
742    /// The @match_options are combined with the match options specified
743    /// when the @self structure was created, letting you have more
744    /// flexibility in reusing #GRegex structures.
745    ///
746    /// Setting @start_position differs from just passing over a shortened
747    /// string and setting [`RegexMatchFlags::NOTBOL`][crate::RegexMatchFlags::NOTBOL] in the case of a pattern
748    /// that begins with any kind of lookbehind assertion, such as "\b".
749    ///
750    /// Unless [`RegexCompileFlags::RAW`][crate::RegexCompileFlags::RAW] is specified in the options, @string must be valid UTF-8.
751    ///
752    /// A #GMatchInfo structure, used to get information on the match, is
753    /// stored in @match_info if not [`None`]. Note that if @match_info is
754    /// not [`None`] then it is created even if the function returns [`false`],
755    /// i.e. you must free it regardless if regular expression actually
756    /// matched.
757    ///
758    /// @string is not copied and is used in #GMatchInfo internally. If
759    /// you use any #GMatchInfo method (except g_match_info_free()) after
760    /// freeing or modifying @string then the behaviour is undefined.
761    ///
762    /// To retrieve all the non-overlapping matches of the pattern in
763    /// string you can use g_match_info_next().
764    ///
765    ///
766    ///
767    /// **⚠️ The following code is in C ⚠️**
768    ///
769    /// ```C
770    /// static void
771    /// print_uppercase_words (const gchar *string)
772    /// {
773    ///   // Print all uppercase-only words.
774    ///   GRegex *regex;
775    ///   GMatchInfo *match_info;
776    ///   GError *error = NULL;
777    ///
778    ///   regex = g_regex_new ("[A-Z]+", G_REGEX_DEFAULT, G_REGEX_MATCH_DEFAULT, NULL);
779    ///   g_regex_match_full (regex, string, -1, 0, 0, &match_info, &error);
780    ///   while (g_match_info_matches (match_info))
781    ///     {
782    ///       gchar *word = g_match_info_fetch (match_info, 0);
783    ///       g_print ("Found: %s\n", word);
784    ///       g_free (word);
785    ///       g_match_info_next (match_info, &error);
786    ///     }
787    ///   g_match_info_free (match_info);
788    ///   g_regex_unref (regex);
789    ///   if (error != NULL)
790    ///     {
791    ///       g_printerr ("Error while matching: %s\n", error->message);
792    ///       g_error_free (error);
793    ///     }
794    /// }
795    /// ```
796    /// ## `string`
797    /// the string to scan for matches
798    /// ## `start_position`
799    /// starting index of the string to match, in bytes
800    /// ## `match_options`
801    /// match options
802    ///
803    /// # Returns
804    ///
805    /// [`true`] is the string matched, [`false`] otherwise
806    ///
807    /// ## `match_info`
808    /// pointer to location where to store
809    ///     the #GMatchInfo, or [`None`] if you do not need it
810    #[doc(alias = "g_regex_match_full")]
811    pub fn match_full<'input>(
812        &self,
813        string: &'input GStr,
814        start_position: i32,
815        match_options: RegexMatchFlags,
816    ) -> Result<MatchInfo<'input>, crate::Error> {
817        unsafe {
818            let mut match_info = ptr::null_mut();
819            let mut error = ptr::null_mut();
820            let res = ffi::g_regex_match_full(
821                self.to_glib_none().0,
822                string.to_glib_none().0,
823                string.len() as _,
824                start_position,
825                match_options.into_glib(),
826                &mut match_info,
827                &mut error,
828            );
829            if error.is_null() {
830                let match_info = MatchInfo::from_glib_full(match_info);
831                debug_assert_eq!(match_info.matches(), from_glib(res));
832                Ok(match_info)
833            } else {
834                debug_assert!(match_info.is_null());
835                Err(from_glib_full(error))
836            }
837        }
838    }
839
840    /// Replaces all occurrences of the pattern in @self with the
841    /// replacement text. @replacement is replaced literally, to
842    /// include backreferences use g_regex_replace().
843    ///
844    /// Setting @start_position differs from just passing over a
845    /// shortened string and setting [`RegexMatchFlags::NOTBOL`][crate::RegexMatchFlags::NOTBOL] in the
846    /// case of a pattern that begins with any kind of lookbehind
847    /// assertion, such as "\b".
848    /// ## `string`
849    /// the string to perform matches against
850    /// ## `start_position`
851    /// starting index of the string to match, in bytes
852    /// ## `replacement`
853    /// text to replace each match with
854    /// ## `match_options`
855    /// options for the match
856    ///
857    /// # Returns
858    ///
859    /// a newly allocated string containing the replacements
860    // rustdoc-stripper-ignore-next-stop
861    /// Replaces all occurrences of the pattern in @self with the
862    /// replacement text. @replacement is replaced literally, to
863    /// include backreferences use g_regex_replace().
864    ///
865    /// Setting @start_position differs from just passing over a
866    /// shortened string and setting [`RegexMatchFlags::NOTBOL`][crate::RegexMatchFlags::NOTBOL] in the
867    /// case of a pattern that begins with any kind of lookbehind
868    /// assertion, such as "\b".
869    /// ## `string`
870    /// the string to perform matches against
871    /// ## `start_position`
872    /// starting index of the string to match, in bytes
873    /// ## `replacement`
874    /// text to replace each match with
875    /// ## `match_options`
876    /// options for the match
877    ///
878    /// # Returns
879    ///
880    /// a newly allocated string containing the replacements
881    #[doc(alias = "g_regex_replace_literal")]
882    pub fn replace_literal(
883        &self,
884        string: impl IntoGStr,
885        start_position: i32,
886        replacement: impl IntoGStr,
887        match_options: RegexMatchFlags,
888    ) -> Result<crate::GString, crate::Error> {
889        unsafe {
890            string.run_with_gstr(|string| {
891                replacement.run_with_gstr(|replacement| {
892                    let mut error = ptr::null_mut();
893                    let ret = ffi::g_regex_replace_literal(
894                        self.to_glib_none().0,
895                        string.to_glib_none().0,
896                        string.len() as _,
897                        start_position,
898                        replacement.to_glib_none().0,
899                        match_options.into_glib(),
900                        &mut error,
901                    );
902                    debug_assert_eq!(ret.is_null(), !error.is_null());
903                    if error.is_null() {
904                        Ok(from_glib_full(ret))
905                    } else {
906                        Err(from_glib_full(error))
907                    }
908                })
909            })
910        }
911    }
912
913    /// Breaks the string on the pattern, and returns an array of the tokens.
914    /// If the pattern contains capturing parentheses, then the text for each
915    /// of the substrings will also be returned. If the pattern does not match
916    /// anywhere in the string, then the whole string is returned as the first
917    /// token.
918    ///
919    /// As a special case, the result of splitting the empty string "" is an
920    /// empty vector, not a vector containing a single string. The reason for
921    /// this special case is that being able to represent an empty vector is
922    /// typically more useful than consistent handling of empty elements. If
923    /// you do need to represent empty elements, you'll need to check for the
924    /// empty string before calling this function.
925    ///
926    /// A pattern that can match empty strings splits @string into separate
927    /// characters wherever it matches the empty string between characters.
928    /// For example splitting "ab c" using as a separator "\s*", you will get
929    /// "a", "b" and "c".
930    /// ## `string`
931    /// the string to split with the pattern
932    /// ## `match_options`
933    /// match time option flags
934    ///
935    /// # Returns
936    ///
937    /// a [`None`]-terminated gchar ** array. Free
938    /// it using g_strfreev()
939    // rustdoc-stripper-ignore-next-stop
940    /// Breaks the string on the pattern, and returns an array of the tokens.
941    /// If the pattern contains capturing parentheses, then the text for each
942    /// of the substrings will also be returned. If the pattern does not match
943    /// anywhere in the string, then the whole string is returned as the first
944    /// token.
945    ///
946    /// As a special case, the result of splitting the empty string "" is an
947    /// empty vector, not a vector containing a single string. The reason for
948    /// this special case is that being able to represent an empty vector is
949    /// typically more useful than consistent handling of empty elements. If
950    /// you do need to represent empty elements, you'll need to check for the
951    /// empty string before calling this function.
952    ///
953    /// A pattern that can match empty strings splits @string into separate
954    /// characters wherever it matches the empty string between characters.
955    /// For example splitting "ab c" using as a separator "\s*", you will get
956    /// "a", "b" and "c".
957    /// ## `string`
958    /// the string to split with the pattern
959    /// ## `match_options`
960    /// match time option flags
961    ///
962    /// # Returns
963    ///
964    /// a [`None`]-terminated gchar ** array. Free
965    /// it using g_strfreev()
966    #[doc(alias = "g_regex_split")]
967    pub fn split(
968        &self,
969        string: impl IntoGStr,
970        match_options: RegexMatchFlags,
971    ) -> PtrSlice<GStringPtr> {
972        self.split_full(string, 0, match_options, 0)
973            .unwrap_or_default()
974    }
975
976    /// Breaks the string on the pattern, and returns an array of the tokens.
977    /// If the pattern contains capturing parentheses, then the text for each
978    /// of the substrings will also be returned. If the pattern does not match
979    /// anywhere in the string, then the whole string is returned as the first
980    /// token.
981    ///
982    /// As a special case, the result of splitting the empty string "" is an
983    /// empty vector, not a vector containing a single string. The reason for
984    /// this special case is that being able to represent an empty vector is
985    /// typically more useful than consistent handling of empty elements. If
986    /// you do need to represent empty elements, you'll need to check for the
987    /// empty string before calling this function.
988    ///
989    /// A pattern that can match empty strings splits @string into separate
990    /// characters wherever it matches the empty string between characters.
991    /// For example splitting "ab c" using as a separator "\s*", you will get
992    /// "a", "b" and "c".
993    ///
994    /// Setting @start_position differs from just passing over a shortened
995    /// string and setting [`RegexMatchFlags::NOTBOL`][crate::RegexMatchFlags::NOTBOL] in the case of a pattern
996    /// that begins with any kind of lookbehind assertion, such as "\b".
997    /// ## `string`
998    /// the string to split with the pattern
999    /// ## `start_position`
1000    /// starting index of the string to match, in bytes
1001    /// ## `match_options`
1002    /// match time option flags
1003    /// ## `max_tokens`
1004    /// the maximum number of tokens to split @string into.
1005    ///   If this is less than 1, the string is split completely
1006    ///
1007    /// # Returns
1008    ///
1009    /// a [`None`]-terminated gchar ** array. Free
1010    /// it using g_strfreev()
1011    // rustdoc-stripper-ignore-next-stop
1012    /// Breaks the string on the pattern, and returns an array of the tokens.
1013    /// If the pattern contains capturing parentheses, then the text for each
1014    /// of the substrings will also be returned. If the pattern does not match
1015    /// anywhere in the string, then the whole string is returned as the first
1016    /// token.
1017    ///
1018    /// As a special case, the result of splitting the empty string "" is an
1019    /// empty vector, not a vector containing a single string. The reason for
1020    /// this special case is that being able to represent an empty vector is
1021    /// typically more useful than consistent handling of empty elements. If
1022    /// you do need to represent empty elements, you'll need to check for the
1023    /// empty string before calling this function.
1024    ///
1025    /// A pattern that can match empty strings splits @string into separate
1026    /// characters wherever it matches the empty string between characters.
1027    /// For example splitting "ab c" using as a separator "\s*", you will get
1028    /// "a", "b" and "c".
1029    ///
1030    /// Setting @start_position differs from just passing over a shortened
1031    /// string and setting [`RegexMatchFlags::NOTBOL`][crate::RegexMatchFlags::NOTBOL] in the case of a pattern
1032    /// that begins with any kind of lookbehind assertion, such as "\b".
1033    /// ## `string`
1034    /// the string to split with the pattern
1035    /// ## `start_position`
1036    /// starting index of the string to match, in bytes
1037    /// ## `match_options`
1038    /// match time option flags
1039    /// ## `max_tokens`
1040    /// the maximum number of tokens to split @string into.
1041    ///   If this is less than 1, the string is split completely
1042    ///
1043    /// # Returns
1044    ///
1045    /// a [`None`]-terminated gchar ** array. Free
1046    /// it using g_strfreev()
1047    #[doc(alias = "g_regex_split_full")]
1048    pub fn split_full(
1049        &self,
1050        string: impl IntoGStr,
1051        start_position: i32,
1052        match_options: RegexMatchFlags,
1053        max_tokens: i32,
1054    ) -> Result<PtrSlice<GStringPtr>, crate::Error> {
1055        unsafe {
1056            let mut error = ptr::null_mut();
1057            string.run_with_gstr(|string| {
1058                let ret = ffi::g_regex_split_full(
1059                    self.to_glib_none().0,
1060                    string.to_glib_none().0,
1061                    string.len() as _,
1062                    start_position,
1063                    match_options.into_glib(),
1064                    max_tokens,
1065                    &mut error,
1066                );
1067                debug_assert_eq!(ret.is_null(), !error.is_null());
1068                if error.is_null() {
1069                    Ok(FromGlibPtrContainer::from_glib_full(ret))
1070                } else {
1071                    Err(from_glib_full(error))
1072                }
1073            })
1074        }
1075    }
1076
1077    /// Breaks the string on the pattern, and returns an array of
1078    /// the tokens. If the pattern contains capturing parentheses,
1079    /// then the text for each of the substrings will also be returned.
1080    /// If the pattern does not match anywhere in the string, then the
1081    /// whole string is returned as the first token.
1082    ///
1083    /// This function is equivalent to g_regex_split() but it does
1084    /// not require to compile the pattern with g_regex_new(), avoiding
1085    /// some lines of code when you need just to do a split without
1086    /// extracting substrings, capture counts, and so on.
1087    ///
1088    /// If this function is to be called on the same @pattern more than
1089    /// once, it's more efficient to compile the pattern once with
1090    /// g_regex_new() and then use g_regex_split().
1091    ///
1092    /// As a special case, the result of splitting the empty string ""
1093    /// is an empty vector, not a vector containing a single string.
1094    /// The reason for this special case is that being able to represent
1095    /// an empty vector is typically more useful than consistent handling
1096    /// of empty elements. If you do need to represent empty elements,
1097    /// you'll need to check for the empty string before calling this
1098    /// function.
1099    ///
1100    /// A pattern that can match empty strings splits @string into
1101    /// separate characters wherever it matches the empty string between
1102    /// characters. For example splitting "ab c" using as a separator
1103    /// "\s*", you will get "a", "b" and "c".
1104    /// ## `pattern`
1105    /// the regular expression
1106    /// ## `string`
1107    /// the string to scan for matches
1108    /// ## `compile_options`
1109    /// compile options for the regular expression, or 0
1110    /// ## `match_options`
1111    /// match options, or 0
1112    ///
1113    /// # Returns
1114    ///
1115    /// a [`None`]-terminated array of strings. Free
1116    /// it using g_strfreev()
1117    // rustdoc-stripper-ignore-next-stop
1118    /// Breaks the string on the pattern, and returns an array of
1119    /// the tokens. If the pattern contains capturing parentheses,
1120    /// then the text for each of the substrings will also be returned.
1121    /// If the pattern does not match anywhere in the string, then the
1122    /// whole string is returned as the first token.
1123    ///
1124    /// This function is equivalent to g_regex_split() but it does
1125    /// not require to compile the pattern with g_regex_new(), avoiding
1126    /// some lines of code when you need just to do a split without
1127    /// extracting substrings, capture counts, and so on.
1128    ///
1129    /// If this function is to be called on the same @pattern more than
1130    /// once, it's more efficient to compile the pattern once with
1131    /// g_regex_new() and then use g_regex_split().
1132    ///
1133    /// As a special case, the result of splitting the empty string ""
1134    /// is an empty vector, not a vector containing a single string.
1135    /// The reason for this special case is that being able to represent
1136    /// an empty vector is typically more useful than consistent handling
1137    /// of empty elements. If you do need to represent empty elements,
1138    /// you'll need to check for the empty string before calling this
1139    /// function.
1140    ///
1141    /// A pattern that can match empty strings splits @string into
1142    /// separate characters wherever it matches the empty string between
1143    /// characters. For example splitting "ab c" using as a separator
1144    /// "\s*", you will get "a", "b" and "c".
1145    /// ## `pattern`
1146    /// the regular expression
1147    /// ## `string`
1148    /// the string to scan for matches
1149    /// ## `compile_options`
1150    /// compile options for the regular expression, or 0
1151    /// ## `match_options`
1152    /// match options, or 0
1153    ///
1154    /// # Returns
1155    ///
1156    /// a [`None`]-terminated array of strings. Free
1157    /// it using g_strfreev()
1158    #[doc(alias = "g_regex_split_simple")]
1159    pub fn split_simple(
1160        pattern: impl IntoGStr,
1161        string: impl IntoGStr,
1162        compile_options: RegexCompileFlags,
1163        match_options: RegexMatchFlags,
1164    ) -> PtrSlice<GStringPtr> {
1165        pattern.run_with_gstr(|pattern| {
1166            string.run_with_gstr(|string| unsafe {
1167                FromGlibPtrContainer::from_glib_full(ffi::g_regex_split_simple(
1168                    pattern.to_glib_none().0,
1169                    string.to_glib_none().0,
1170                    compile_options.into_glib(),
1171                    match_options.into_glib(),
1172                ))
1173            })
1174        })
1175    }
1176}
1177
1178#[cfg(test)]
1179mod tests {
1180    use super::*;
1181
1182    #[test]
1183    fn test_replace_literal() {
1184        let regex = Regex::new(
1185            "s[ai]mple",
1186            RegexCompileFlags::OPTIMIZE,
1187            RegexMatchFlags::DEFAULT,
1188        )
1189        .expect("Regex new")
1190        .expect("Null regex");
1191
1192        let quote = "This is a simple sample.";
1193        let result = regex
1194            .replace_literal(quote, 0, "XXX", RegexMatchFlags::DEFAULT)
1195            .expect("regex replace");
1196
1197        assert_eq!(result, "This is a XXX XXX.");
1198    }
1199
1200    #[test]
1201    fn test_split() {
1202        let regex = Regex::new(
1203            "s[ai]mple",
1204            RegexCompileFlags::OPTIMIZE,
1205            RegexMatchFlags::DEFAULT,
1206        )
1207        .expect("Regex new")
1208        .expect("Null regex");
1209
1210        let quote = "This is a simple sample.";
1211        let result = regex.split(quote, RegexMatchFlags::DEFAULT);
1212
1213        assert_eq!(result.len(), 3);
1214        assert_eq!(result[0], "This is a ");
1215        assert_eq!(result[1], " ");
1216        assert_eq!(result[2], ".");
1217    }
1218
1219    #[test]
1220    fn test_match() {
1221        let regex = Regex::new(r"\d", RegexCompileFlags::DEFAULT, RegexMatchFlags::DEFAULT)
1222            .expect("Regex new")
1223            .expect("Null regex");
1224
1225        let input = crate::GString::from("87");
1226        let m = regex.match_(input.as_gstr(), RegexMatchFlags::DEFAULT);
1227        let m = m.unwrap();
1228        assert!(m.matches());
1229        assert_eq!(m.match_count(), 1);
1230        assert_eq!(m.fetch(0).as_deref(), Some("8"));
1231        assert!(m.next().unwrap());
1232        assert_eq!(m.fetch(0).as_deref(), Some("7"));
1233        assert!(!m.next().unwrap());
1234        assert!(m.fetch(0).is_none());
1235
1236        let input = crate::GString::from("a");
1237        let m = regex.match_(input.as_gstr(), RegexMatchFlags::DEFAULT);
1238        let m = m.unwrap();
1239        assert!(!m.matches());
1240        assert_eq!(m.match_count(), 0);
1241        assert!(m.fetch(0).is_none());
1242    }
1243}