glib/
regex.rs

1// Take a look at the license at the top of the repository in the LICENSE file.
2
3// rustdoc-stripper-ignore-next
4//! This module is inefficient and should not be used by Rust programs except for
5//! compatibility with GLib.Regex based APIs.
6
7use crate::{
8    ffi, translate::*, GStr, GStringPtr, MatchInfo, PtrSlice, Regex, RegexCompileFlags,
9    RegexMatchFlags,
10};
11use std::{mem, ptr};
12
13impl Regex {
14    /// Retrieves the number of the subexpression named @name.
15    /// ## `name`
16    /// name of the subexpression
17    ///
18    /// # Returns
19    ///
20    /// The number of the subexpression or -1 if @name
21    ///   does not exists
22    // rustdoc-stripper-ignore-next-stop
23    /// Retrieves the number of the subexpression named @name.
24    /// ## `name`
25    /// name of the subexpression
26    ///
27    /// # Returns
28    ///
29    /// The number of the subexpression or -1 if @name
30    ///   does not exists
31    #[doc(alias = "g_regex_get_string_number")]
32    #[doc(alias = "get_string_number")]
33    pub fn string_number(&self, name: impl IntoGStr) -> i32 {
34        name.run_with_gstr(|name| unsafe {
35            ffi::g_regex_get_string_number(self.to_glib_none().0, name.to_glib_none().0)
36        })
37    }
38
39    /// Escapes the nul characters in @string to "\x00".  It can be used
40    /// to compile a regex with embedded nul characters.
41    ///
42    /// For completeness, @length can be -1 for a nul-terminated string.
43    /// In this case the output string will be of course equal to @string.
44    /// ## `string`
45    /// the string to escape
46    /// ## `length`
47    /// the length of @string
48    ///
49    /// # Returns
50    ///
51    /// a newly-allocated escaped string
52    // rustdoc-stripper-ignore-next-stop
53    /// Escapes the nul characters in @string to "\x00".  It can be used
54    /// to compile a regex with embedded nul characters.
55    ///
56    /// For completeness, @length can be -1 for a nul-terminated string.
57    /// In this case the output string will be of course equal to @string.
58    /// ## `string`
59    /// the string to escape
60    /// ## `length`
61    /// the length of @string
62    ///
63    /// # Returns
64    ///
65    /// a newly-allocated escaped string
66    #[doc(alias = "g_regex_escape_nul")]
67    pub fn escape_nul(string: impl IntoGStr) -> crate::GString {
68        unsafe {
69            string.run_with_gstr(|string| {
70                from_glib_full(ffi::g_regex_escape_nul(
71                    string.to_glib_none().0,
72                    string.len() as _,
73                ))
74            })
75        }
76    }
77
78    /// Escapes the special characters used for regular expressions
79    /// in @string, for instance "a.b*c" becomes "a\.b\*c". This
80    /// function is useful to dynamically generate regular expressions.
81    ///
82    /// @string can contain nul characters that are replaced with "\0",
83    /// in this case remember to specify the correct length of @string
84    /// in @length.
85    /// ## `string`
86    /// the string to escape
87    /// ## `length`
88    /// the length of @string, in bytes, or -1 if @string is nul-terminated
89    ///
90    /// # Returns
91    ///
92    /// a newly-allocated escaped string
93    // rustdoc-stripper-ignore-next-stop
94    /// Escapes the special characters used for regular expressions
95    /// in @string, for instance "a.b*c" becomes "a\.b\*c". This
96    /// function is useful to dynamically generate regular expressions.
97    ///
98    /// @string can contain nul characters that are replaced with "\0",
99    /// in this case remember to specify the correct length of @string
100    /// in @length.
101    /// ## `string`
102    /// the string to escape
103    /// ## `length`
104    /// the length of @string, in bytes, or -1 if @string is nul-terminated
105    ///
106    /// # Returns
107    ///
108    /// a newly-allocated escaped string
109    #[doc(alias = "g_regex_escape_string")]
110    pub fn escape_string(string: impl IntoGStr) -> crate::GString {
111        unsafe {
112            string.run_with_gstr(|string| {
113                from_glib_full(ffi::g_regex_escape_string(
114                    string.to_glib_none().0,
115                    string.len() as _,
116                ))
117            })
118        }
119    }
120
121    /// Checks whether @replacement is a valid replacement string
122    /// (see g_regex_replace()), i.e. that all escape sequences in
123    /// it are valid.
124    ///
125    /// If @has_references is not [`None`] then @replacement is checked
126    /// for pattern references. For instance, replacement text 'foo\n'
127    /// does not contain references and may be evaluated without information
128    /// about actual match, but '\0\1' (whole match followed by first
129    /// subpattern) requires valid #GMatchInfo object.
130    /// ## `replacement`
131    /// the replacement string
132    ///
133    /// # Returns
134    ///
135    /// whether @replacement is a valid replacement string
136    ///
137    /// ## `has_references`
138    /// location to store information about
139    ///   references in @replacement or [`None`]
140    // rustdoc-stripper-ignore-next-stop
141    /// Checks whether @replacement is a valid replacement string
142    /// (see g_regex_replace()), i.e. that all escape sequences in
143    /// it are valid.
144    ///
145    /// If @has_references is not [`None`] then @replacement is checked
146    /// for pattern references. For instance, replacement text 'foo\n'
147    /// does not contain references and may be evaluated without information
148    /// about actual match, but '\0\1' (whole match followed by first
149    /// subpattern) requires valid #GMatchInfo object.
150    /// ## `replacement`
151    /// the replacement string
152    ///
153    /// # Returns
154    ///
155    /// whether @replacement is a valid replacement string
156    ///
157    /// ## `has_references`
158    /// location to store information about
159    ///   references in @replacement or [`None`]
160    #[doc(alias = "g_regex_check_replacement")]
161    pub fn check_replacement(replacement: impl IntoGStr) -> Result<bool, crate::Error> {
162        replacement.run_with_gstr(|replacement| unsafe {
163            let mut has_references = mem::MaybeUninit::uninit();
164            let mut error = ptr::null_mut();
165            let is_ok = ffi::g_regex_check_replacement(
166                replacement.to_glib_none().0,
167                has_references.as_mut_ptr(),
168                &mut error,
169            );
170            debug_assert_eq!(is_ok == crate::ffi::GFALSE, !error.is_null());
171            if error.is_null() {
172                Ok(from_glib(has_references.assume_init()))
173            } else {
174                Err(from_glib_full(error))
175            }
176        })
177    }
178
179    /// Scans for a match in @string for @pattern.
180    ///
181    /// This function is equivalent to g_regex_match() but it does not
182    /// require to compile the pattern with g_regex_new(), avoiding some
183    /// lines of code when you need just to do a match without extracting
184    /// substrings, capture counts, and so on.
185    ///
186    /// If this function is to be called on the same @pattern more than
187    /// once, it's more efficient to compile the pattern once with
188    /// g_regex_new() and then use g_regex_match().
189    /// ## `pattern`
190    /// the regular expression
191    /// ## `string`
192    /// the string to scan for matches
193    /// ## `compile_options`
194    /// compile options for the regular expression, or 0
195    /// ## `match_options`
196    /// match options, or 0
197    ///
198    /// # Returns
199    ///
200    /// [`true`] if the string matched, [`false`] otherwise
201    // rustdoc-stripper-ignore-next-stop
202    /// Scans for a match in @string for @pattern.
203    ///
204    /// This function is equivalent to g_regex_match() but it does not
205    /// require to compile the pattern with g_regex_new(), avoiding some
206    /// lines of code when you need just to do a match without extracting
207    /// substrings, capture counts, and so on.
208    ///
209    /// If this function is to be called on the same @pattern more than
210    /// once, it's more efficient to compile the pattern once with
211    /// g_regex_new() and then use g_regex_match().
212    /// ## `pattern`
213    /// the regular expression
214    /// ## `string`
215    /// the string to scan for matches
216    /// ## `compile_options`
217    /// compile options for the regular expression, or 0
218    /// ## `match_options`
219    /// match options, or 0
220    ///
221    /// # Returns
222    ///
223    /// [`true`] if the string matched, [`false`] otherwise
224    #[doc(alias = "g_regex_match_simple")]
225    pub fn match_simple(
226        pattern: impl IntoGStr,
227        string: impl IntoGStr,
228        compile_options: RegexCompileFlags,
229        match_options: RegexMatchFlags,
230    ) -> bool {
231        pattern.run_with_gstr(|pattern| {
232            string.run_with_gstr(|string| unsafe {
233                from_glib(ffi::g_regex_match_simple(
234                    pattern.to_glib_none().0,
235                    string.to_glib_none().0,
236                    compile_options.into_glib(),
237                    match_options.into_glib(),
238                ))
239            })
240        })
241    }
242
243    /// Replaces all occurrences of the pattern in @self with the
244    /// replacement text. Backreferences of the form `\number` or
245    /// `\g<number>` in the replacement text are interpolated by the
246    /// number-th captured subexpression of the match, `\g<name>` refers
247    /// to the captured subexpression with the given name. `\0` refers
248    /// to the complete match, but `\0` followed by a number is the octal
249    /// representation of a character. To include a literal `\` in the
250    /// replacement, write `\\\\`.
251    ///
252    /// There are also escapes that changes the case of the following text:
253    ///
254    /// - \l: Convert to lower case the next character
255    /// - \u: Convert to upper case the next character
256    /// - \L: Convert to lower case till \E
257    /// - \U: Convert to upper case till \E
258    /// - \E: End case modification
259    ///
260    /// If you do not need to use backreferences use g_regex_replace_literal().
261    ///
262    /// The @replacement string must be UTF-8 encoded even if [`RegexCompileFlags::RAW`][crate::RegexCompileFlags::RAW] was
263    /// passed to g_regex_new(). If you want to use not UTF-8 encoded strings
264    /// you can use g_regex_replace_literal().
265    ///
266    /// Setting @start_position differs from just passing over a shortened
267    /// string and setting [`RegexMatchFlags::NOTBOL`][crate::RegexMatchFlags::NOTBOL] in the case of a pattern that
268    /// begins with any kind of lookbehind assertion, such as "\b".
269    /// ## `string`
270    /// the string to perform matches against
271    /// ## `start_position`
272    /// starting index of the string to match, in bytes
273    /// ## `replacement`
274    /// text to replace each match with
275    /// ## `match_options`
276    /// options for the match
277    ///
278    /// # Returns
279    ///
280    /// a newly allocated string containing the replacements
281    // rustdoc-stripper-ignore-next-stop
282    /// Replaces all occurrences of the pattern in @self with the
283    /// replacement text. Backreferences of the form `\number` or
284    /// `\g<number>` in the replacement text are interpolated by the
285    /// number-th captured subexpression of the match, `\g<name>` refers
286    /// to the captured subexpression with the given name. `\0` refers
287    /// to the complete match, but `\0` followed by a number is the octal
288    /// representation of a character. To include a literal `\` in the
289    /// replacement, write `\\\\`.
290    ///
291    /// There are also escapes that changes the case of the following text:
292    ///
293    /// - \l: Convert to lower case the next character
294    /// - \u: Convert to upper case the next character
295    /// - \L: Convert to lower case till \E
296    /// - \U: Convert to upper case till \E
297    /// - \E: End case modification
298    ///
299    /// If you do not need to use backreferences use g_regex_replace_literal().
300    ///
301    /// The @replacement string must be UTF-8 encoded even if [`RegexCompileFlags::RAW`][crate::RegexCompileFlags::RAW] was
302    /// passed to g_regex_new(). If you want to use not UTF-8 encoded strings
303    /// you can use g_regex_replace_literal().
304    ///
305    /// Setting @start_position differs from just passing over a shortened
306    /// string and setting [`RegexMatchFlags::NOTBOL`][crate::RegexMatchFlags::NOTBOL] in the case of a pattern that
307    /// begins with any kind of lookbehind assertion, such as "\b".
308    /// ## `string`
309    /// the string to perform matches against
310    /// ## `start_position`
311    /// starting index of the string to match, in bytes
312    /// ## `replacement`
313    /// text to replace each match with
314    /// ## `match_options`
315    /// options for the match
316    ///
317    /// # Returns
318    ///
319    /// a newly allocated string containing the replacements
320    #[doc(alias = "g_regex_replace")]
321    pub fn replace(
322        &self,
323        string: impl IntoGStr,
324        start_position: i32,
325        replacement: impl IntoGStr,
326        match_options: RegexMatchFlags,
327    ) -> Result<crate::GString, crate::Error> {
328        unsafe {
329            string.run_with_gstr(|string| {
330                replacement.run_with_gstr(|replacement| {
331                    let mut error = ptr::null_mut();
332                    let ret = ffi::g_regex_replace(
333                        self.to_glib_none().0,
334                        string.as_ptr() as *const _,
335                        string.len() as _,
336                        start_position,
337                        replacement.to_glib_none().0,
338                        match_options.into_glib(),
339                        &mut error,
340                    );
341                    if error.is_null() {
342                        Ok(from_glib_full(ret))
343                    } else {
344                        Err(from_glib_full(error))
345                    }
346                })
347            })
348        }
349    }
350
351    /// Using the standard algorithm for regular expression matching only
352    /// the longest match in the string is retrieved. This function uses
353    /// a different algorithm so it can retrieve all the possible matches.
354    /// For more documentation see g_regex_match_all_full().
355    ///
356    /// A #GMatchInfo structure, used to get information on the match, is
357    /// stored in @match_info if not [`None`]. Note that if @match_info is
358    /// not [`None`] then it is created even if the function returns [`false`],
359    /// i.e. you must free it regardless if regular expression actually
360    /// matched.
361    ///
362    /// @string is not copied and is used in #GMatchInfo internally. If
363    /// you use any #GMatchInfo method (except g_match_info_free()) after
364    /// freeing or modifying @string then the behaviour is undefined.
365    /// ## `string`
366    /// the string to scan for matches
367    /// ## `match_options`
368    /// match options
369    ///
370    /// # Returns
371    ///
372    /// [`true`] is the string matched, [`false`] otherwise
373    ///
374    /// ## `match_info`
375    /// pointer to location where to store
376    ///     the #GMatchInfo, or [`None`] if you do not need it
377    // rustdoc-stripper-ignore-next-stop
378    /// Using the standard algorithm for regular expression matching only
379    /// the longest match in the string is retrieved. This function uses
380    /// a different algorithm so it can retrieve all the possible matches.
381    /// For more documentation see g_regex_match_all_full().
382    ///
383    /// A #GMatchInfo structure, used to get information on the match, is
384    /// stored in @match_info if not [`None`]. Note that if @match_info is
385    /// not [`None`] then it is created even if the function returns [`false`],
386    /// i.e. you must free it regardless if regular expression actually
387    /// matched.
388    ///
389    /// @string is not copied and is used in #GMatchInfo internally. If
390    /// you use any #GMatchInfo method (except g_match_info_free()) after
391    /// freeing or modifying @string then the behaviour is undefined.
392    /// ## `string`
393    /// the string to scan for matches
394    /// ## `match_options`
395    /// match options
396    ///
397    /// # Returns
398    ///
399    /// [`true`] is the string matched, [`false`] otherwise
400    ///
401    /// ## `match_info`
402    /// pointer to location where to store
403    ///     the #GMatchInfo, or [`None`] if you do not need it
404    #[doc(alias = "g_regex_match_all")]
405    pub fn match_all<'input>(
406        &self,
407        string: &'input GStr,
408        match_options: RegexMatchFlags,
409    ) -> Option<MatchInfo<'input>> {
410        self.match_all_full(string, 0, match_options).ok()
411    }
412
413    /// Using the standard algorithm for regular expression matching only
414    /// the longest match in the @string is retrieved, it is not possible
415    /// to obtain all the available matches. For instance matching
416    /// `"<a> <b> <c>"` against the pattern `"<.*>"`
417    /// you get `"<a> <b> <c>"`.
418    ///
419    /// This function uses a different algorithm (called DFA, i.e. deterministic
420    /// finite automaton), so it can retrieve all the possible matches, all
421    /// starting at the same point in the string. For instance matching
422    /// `"<a> <b> <c>"` against the pattern `"<.*>"`
423    /// you would obtain three matches: `"<a> <b> <c>"`,
424    /// `"<a> <b>"` and `"<a>"`.
425    ///
426    /// The number of matched strings is retrieved using
427    /// g_match_info_get_match_count(). To obtain the matched strings and
428    /// their position you can use, respectively, g_match_info_fetch() and
429    /// g_match_info_fetch_pos(). Note that the strings are returned in
430    /// reverse order of length; that is, the longest matching string is
431    /// given first.
432    ///
433    /// Note that the DFA algorithm is slower than the standard one and it
434    /// is not able to capture substrings, so backreferences do not work.
435    ///
436    /// Setting @start_position differs from just passing over a shortened
437    /// string and setting [`RegexMatchFlags::NOTBOL`][crate::RegexMatchFlags::NOTBOL] in the case of a pattern
438    /// that begins with any kind of lookbehind assertion, such as "\b".
439    ///
440    /// Unless [`RegexCompileFlags::RAW`][crate::RegexCompileFlags::RAW] is specified in the options, @string must be valid UTF-8.
441    ///
442    /// A #GMatchInfo structure, used to get information on the match, is
443    /// stored in @match_info if not [`None`]. Note that if @match_info is
444    /// not [`None`] then it is created even if the function returns [`false`],
445    /// i.e. you must free it regardless if regular expression actually
446    /// matched.
447    ///
448    /// @string is not copied and is used in #GMatchInfo internally. If
449    /// you use any #GMatchInfo method (except g_match_info_free()) after
450    /// freeing or modifying @string then the behaviour is undefined.
451    /// ## `string`
452    /// the string to scan for matches
453    /// ## `start_position`
454    /// starting index of the string to match, in bytes
455    /// ## `match_options`
456    /// match options
457    ///
458    /// # Returns
459    ///
460    /// [`true`] is the string matched, [`false`] otherwise
461    ///
462    /// ## `match_info`
463    /// pointer to location where to store
464    ///     the #GMatchInfo, or [`None`] if you do not need it
465    // rustdoc-stripper-ignore-next-stop
466    /// Using the standard algorithm for regular expression matching only
467    /// the longest match in the @string is retrieved, it is not possible
468    /// to obtain all the available matches. For instance matching
469    /// `"<a> <b> <c>"` against the pattern `"<.*>"`
470    /// you get `"<a> <b> <c>"`.
471    ///
472    /// This function uses a different algorithm (called DFA, i.e. deterministic
473    /// finite automaton), so it can retrieve all the possible matches, all
474    /// starting at the same point in the string. For instance matching
475    /// `"<a> <b> <c>"` against the pattern `"<.*>"`
476    /// you would obtain three matches: `"<a> <b> <c>"`,
477    /// `"<a> <b>"` and `"<a>"`.
478    ///
479    /// The number of matched strings is retrieved using
480    /// g_match_info_get_match_count(). To obtain the matched strings and
481    /// their position you can use, respectively, g_match_info_fetch() and
482    /// g_match_info_fetch_pos(). Note that the strings are returned in
483    /// reverse order of length; that is, the longest matching string is
484    /// given first.
485    ///
486    /// Note that the DFA algorithm is slower than the standard one and it
487    /// is not able to capture substrings, so backreferences do not work.
488    ///
489    /// Setting @start_position differs from just passing over a shortened
490    /// string and setting [`RegexMatchFlags::NOTBOL`][crate::RegexMatchFlags::NOTBOL] in the case of a pattern
491    /// that begins with any kind of lookbehind assertion, such as "\b".
492    ///
493    /// Unless [`RegexCompileFlags::RAW`][crate::RegexCompileFlags::RAW] is specified in the options, @string must be valid UTF-8.
494    ///
495    /// A #GMatchInfo structure, used to get information on the match, is
496    /// stored in @match_info if not [`None`]. Note that if @match_info is
497    /// not [`None`] then it is created even if the function returns [`false`],
498    /// i.e. you must free it regardless if regular expression actually
499    /// matched.
500    ///
501    /// @string is not copied and is used in #GMatchInfo internally. If
502    /// you use any #GMatchInfo method (except g_match_info_free()) after
503    /// freeing or modifying @string then the behaviour is undefined.
504    /// ## `string`
505    /// the string to scan for matches
506    /// ## `start_position`
507    /// starting index of the string to match, in bytes
508    /// ## `match_options`
509    /// match options
510    ///
511    /// # Returns
512    ///
513    /// [`true`] is the string matched, [`false`] otherwise
514    ///
515    /// ## `match_info`
516    /// pointer to location where to store
517    ///     the #GMatchInfo, or [`None`] if you do not need it
518    #[doc(alias = "g_regex_match_all_full")]
519    pub fn match_all_full<'input>(
520        &self,
521        string: &'input GStr,
522        start_position: i32,
523        match_options: RegexMatchFlags,
524    ) -> Result<MatchInfo<'input>, crate::Error> {
525        unsafe {
526            let mut match_info = ptr::null_mut();
527            let mut error = ptr::null_mut();
528            let is_ok = ffi::g_regex_match_all_full(
529                self.to_glib_none().0,
530                string.to_glib_none().0,
531                string.len() as _,
532                start_position,
533                match_options.into_glib(),
534                &mut match_info,
535                &mut error,
536            );
537            debug_assert_eq!(is_ok == crate::ffi::GFALSE, !error.is_null());
538            if error.is_null() {
539                Ok(from_glib_full(match_info))
540            } else {
541                Err(from_glib_full(error))
542            }
543        }
544    }
545
546    /// Scans for a match in @string for the pattern in @self.
547    /// The @match_options are combined with the match options specified
548    /// when the @self structure was created, letting you have more
549    /// flexibility in reusing #GRegex structures.
550    ///
551    /// Unless [`RegexCompileFlags::RAW`][crate::RegexCompileFlags::RAW] is specified in the options, @string must be valid UTF-8.
552    ///
553    /// A #GMatchInfo structure, used to get information on the match,
554    /// is stored in @match_info if not [`None`]. Note that if @match_info
555    /// is not [`None`] then it is created even if the function returns [`false`],
556    /// i.e. you must free it regardless if regular expression actually matched.
557    ///
558    /// To retrieve all the non-overlapping matches of the pattern in
559    /// string you can use g_match_info_next().
560    ///
561    ///
562    ///
563    /// **⚠️ The following code is in C ⚠️**
564    ///
565    /// ```C
566    /// static void
567    /// print_uppercase_words (const gchar *string)
568    /// {
569    ///   // Print all uppercase-only words.
570    ///   GRegex *regex;
571    ///   GMatchInfo *match_info;
572    ///
573    ///   regex = g_regex_new ("[A-Z]+", G_REGEX_DEFAULT, G_REGEX_MATCH_DEFAULT, NULL);
574    ///   g_regex_match (regex, string, 0, &match_info);
575    ///   while (g_match_info_matches (match_info))
576    ///     {
577    ///       gchar *word = g_match_info_fetch (match_info, 0);
578    ///       g_print ("Found: %s\n", word);
579    ///       g_free (word);
580    ///       g_match_info_next (match_info, NULL);
581    ///     }
582    ///   g_match_info_free (match_info);
583    ///   g_regex_unref (regex);
584    /// }
585    /// ```
586    ///
587    /// @string is not copied and is used in #GMatchInfo internally. If
588    /// you use any #GMatchInfo method (except g_match_info_free()) after
589    /// freeing or modifying @string then the behaviour is undefined.
590    /// ## `string`
591    /// the string to scan for matches
592    /// ## `match_options`
593    /// match options
594    ///
595    /// # Returns
596    ///
597    /// [`true`] is the string matched, [`false`] otherwise
598    ///
599    /// ## `match_info`
600    /// pointer to location where to store
601    ///     the #GMatchInfo, or [`None`] if you do not need it
602    // rustdoc-stripper-ignore-next-stop
603    /// Scans for a match in @string for the pattern in @self.
604    /// The @match_options are combined with the match options specified
605    /// when the @self structure was created, letting you have more
606    /// flexibility in reusing #GRegex structures.
607    ///
608    /// Unless [`RegexCompileFlags::RAW`][crate::RegexCompileFlags::RAW] is specified in the options, @string must be valid UTF-8.
609    ///
610    /// A #GMatchInfo structure, used to get information on the match,
611    /// is stored in @match_info if not [`None`]. Note that if @match_info
612    /// is not [`None`] then it is created even if the function returns [`false`],
613    /// i.e. you must free it regardless if regular expression actually matched.
614    ///
615    /// To retrieve all the non-overlapping matches of the pattern in
616    /// string you can use g_match_info_next().
617    ///
618    ///
619    ///
620    /// **⚠️ The following code is in C ⚠️**
621    ///
622    /// ```C
623    /// static void
624    /// print_uppercase_words (const gchar *string)
625    /// {
626    ///   // Print all uppercase-only words.
627    ///   GRegex *regex;
628    ///   GMatchInfo *match_info;
629    ///
630    ///   regex = g_regex_new ("[A-Z]+", G_REGEX_DEFAULT, G_REGEX_MATCH_DEFAULT, NULL);
631    ///   g_regex_match (regex, string, 0, &match_info);
632    ///   while (g_match_info_matches (match_info))
633    ///     {
634    ///       gchar *word = g_match_info_fetch (match_info, 0);
635    ///       g_print ("Found: %s\n", word);
636    ///       g_free (word);
637    ///       g_match_info_next (match_info, NULL);
638    ///     }
639    ///   g_match_info_free (match_info);
640    ///   g_regex_unref (regex);
641    /// }
642    /// ```
643    ///
644    /// @string is not copied and is used in #GMatchInfo internally. If
645    /// you use any #GMatchInfo method (except g_match_info_free()) after
646    /// freeing or modifying @string then the behaviour is undefined.
647    /// ## `string`
648    /// the string to scan for matches
649    /// ## `match_options`
650    /// match options
651    ///
652    /// # Returns
653    ///
654    /// [`true`] is the string matched, [`false`] otherwise
655    ///
656    /// ## `match_info`
657    /// pointer to location where to store
658    ///     the #GMatchInfo, or [`None`] if you do not need it
659    #[doc(alias = "g_regex_match")]
660    pub fn match_<'input>(
661        &self,
662        string: &'input GStr,
663        match_options: RegexMatchFlags,
664    ) -> Option<MatchInfo<'input>> {
665        self.match_full(string, 0, match_options).ok()
666    }
667
668    /// Scans for a match in @string for the pattern in @self.
669    /// The @match_options are combined with the match options specified
670    /// when the @self structure was created, letting you have more
671    /// flexibility in reusing #GRegex structures.
672    ///
673    /// Setting @start_position differs from just passing over a shortened
674    /// string and setting [`RegexMatchFlags::NOTBOL`][crate::RegexMatchFlags::NOTBOL] in the case of a pattern
675    /// that begins with any kind of lookbehind assertion, such as "\b".
676    ///
677    /// Unless [`RegexCompileFlags::RAW`][crate::RegexCompileFlags::RAW] is specified in the options, @string must be valid UTF-8.
678    ///
679    /// A #GMatchInfo structure, used to get information on the match, is
680    /// stored in @match_info if not [`None`]. Note that if @match_info is
681    /// not [`None`] then it is created even if the function returns [`false`],
682    /// i.e. you must free it regardless if regular expression actually
683    /// matched.
684    ///
685    /// @string is not copied and is used in #GMatchInfo internally. If
686    /// you use any #GMatchInfo method (except g_match_info_free()) after
687    /// freeing or modifying @string then the behaviour is undefined.
688    ///
689    /// To retrieve all the non-overlapping matches of the pattern in
690    /// string you can use g_match_info_next().
691    ///
692    ///
693    ///
694    /// **⚠️ The following code is in C ⚠️**
695    ///
696    /// ```C
697    /// static void
698    /// print_uppercase_words (const gchar *string)
699    /// {
700    ///   // Print all uppercase-only words.
701    ///   GRegex *regex;
702    ///   GMatchInfo *match_info;
703    ///   GError *error = NULL;
704    ///
705    ///   regex = g_regex_new ("[A-Z]+", G_REGEX_DEFAULT, G_REGEX_MATCH_DEFAULT, NULL);
706    ///   g_regex_match_full (regex, string, -1, 0, 0, &match_info, &error);
707    ///   while (g_match_info_matches (match_info))
708    ///     {
709    ///       gchar *word = g_match_info_fetch (match_info, 0);
710    ///       g_print ("Found: %s\n", word);
711    ///       g_free (word);
712    ///       g_match_info_next (match_info, &error);
713    ///     }
714    ///   g_match_info_free (match_info);
715    ///   g_regex_unref (regex);
716    ///   if (error != NULL)
717    ///     {
718    ///       g_printerr ("Error while matching: %s\n", error->message);
719    ///       g_error_free (error);
720    ///     }
721    /// }
722    /// ```
723    /// ## `string`
724    /// the string to scan for matches
725    /// ## `start_position`
726    /// starting index of the string to match, in bytes
727    /// ## `match_options`
728    /// match options
729    ///
730    /// # Returns
731    ///
732    /// [`true`] is the string matched, [`false`] otherwise
733    ///
734    /// ## `match_info`
735    /// pointer to location where to store
736    ///     the #GMatchInfo, or [`None`] if you do not need it
737    // rustdoc-stripper-ignore-next-stop
738    /// Scans for a match in @string for the pattern in @self.
739    /// The @match_options are combined with the match options specified
740    /// when the @self structure was created, letting you have more
741    /// flexibility in reusing #GRegex structures.
742    ///
743    /// Setting @start_position differs from just passing over a shortened
744    /// string and setting [`RegexMatchFlags::NOTBOL`][crate::RegexMatchFlags::NOTBOL] in the case of a pattern
745    /// that begins with any kind of lookbehind assertion, such as "\b".
746    ///
747    /// Unless [`RegexCompileFlags::RAW`][crate::RegexCompileFlags::RAW] is specified in the options, @string must be valid UTF-8.
748    ///
749    /// A #GMatchInfo structure, used to get information on the match, is
750    /// stored in @match_info if not [`None`]. Note that if @match_info is
751    /// not [`None`] then it is created even if the function returns [`false`],
752    /// i.e. you must free it regardless if regular expression actually
753    /// matched.
754    ///
755    /// @string is not copied and is used in #GMatchInfo internally. If
756    /// you use any #GMatchInfo method (except g_match_info_free()) after
757    /// freeing or modifying @string then the behaviour is undefined.
758    ///
759    /// To retrieve all the non-overlapping matches of the pattern in
760    /// string you can use g_match_info_next().
761    ///
762    ///
763    ///
764    /// **⚠️ The following code is in C ⚠️**
765    ///
766    /// ```C
767    /// static void
768    /// print_uppercase_words (const gchar *string)
769    /// {
770    ///   // Print all uppercase-only words.
771    ///   GRegex *regex;
772    ///   GMatchInfo *match_info;
773    ///   GError *error = NULL;
774    ///
775    ///   regex = g_regex_new ("[A-Z]+", G_REGEX_DEFAULT, G_REGEX_MATCH_DEFAULT, NULL);
776    ///   g_regex_match_full (regex, string, -1, 0, 0, &match_info, &error);
777    ///   while (g_match_info_matches (match_info))
778    ///     {
779    ///       gchar *word = g_match_info_fetch (match_info, 0);
780    ///       g_print ("Found: %s\n", word);
781    ///       g_free (word);
782    ///       g_match_info_next (match_info, &error);
783    ///     }
784    ///   g_match_info_free (match_info);
785    ///   g_regex_unref (regex);
786    ///   if (error != NULL)
787    ///     {
788    ///       g_printerr ("Error while matching: %s\n", error->message);
789    ///       g_error_free (error);
790    ///     }
791    /// }
792    /// ```
793    /// ## `string`
794    /// the string to scan for matches
795    /// ## `start_position`
796    /// starting index of the string to match, in bytes
797    /// ## `match_options`
798    /// match options
799    ///
800    /// # Returns
801    ///
802    /// [`true`] is the string matched, [`false`] otherwise
803    ///
804    /// ## `match_info`
805    /// pointer to location where to store
806    ///     the #GMatchInfo, or [`None`] if you do not need it
807    #[doc(alias = "g_regex_match_full")]
808    pub fn match_full<'input>(
809        &self,
810        string: &'input GStr,
811        start_position: i32,
812        match_options: RegexMatchFlags,
813    ) -> Result<MatchInfo<'input>, crate::Error> {
814        unsafe {
815            let mut match_info = ptr::null_mut();
816            let mut error = ptr::null_mut();
817            let is_ok = ffi::g_regex_match_full(
818                self.to_glib_none().0,
819                string.to_glib_none().0,
820                string.len() as _,
821                start_position,
822                match_options.into_glib(),
823                &mut match_info,
824                &mut error,
825            );
826            debug_assert_eq!(is_ok == crate::ffi::GFALSE, !error.is_null());
827            if error.is_null() {
828                Ok(from_glib_full(match_info))
829            } else {
830                Err(from_glib_full(error))
831            }
832        }
833    }
834
835    /// Replaces all occurrences of the pattern in @self with the
836    /// replacement text. @replacement is replaced literally, to
837    /// include backreferences use g_regex_replace().
838    ///
839    /// Setting @start_position differs from just passing over a
840    /// shortened string and setting [`RegexMatchFlags::NOTBOL`][crate::RegexMatchFlags::NOTBOL] in the
841    /// case of a pattern that begins with any kind of lookbehind
842    /// assertion, such as "\b".
843    /// ## `string`
844    /// the string to perform matches against
845    /// ## `start_position`
846    /// starting index of the string to match, in bytes
847    /// ## `replacement`
848    /// text to replace each match with
849    /// ## `match_options`
850    /// options for the match
851    ///
852    /// # Returns
853    ///
854    /// a newly allocated string containing the replacements
855    // rustdoc-stripper-ignore-next-stop
856    /// Replaces all occurrences of the pattern in @self with the
857    /// replacement text. @replacement is replaced literally, to
858    /// include backreferences use g_regex_replace().
859    ///
860    /// Setting @start_position differs from just passing over a
861    /// shortened string and setting [`RegexMatchFlags::NOTBOL`][crate::RegexMatchFlags::NOTBOL] in the
862    /// case of a pattern that begins with any kind of lookbehind
863    /// assertion, such as "\b".
864    /// ## `string`
865    /// the string to perform matches against
866    /// ## `start_position`
867    /// starting index of the string to match, in bytes
868    /// ## `replacement`
869    /// text to replace each match with
870    /// ## `match_options`
871    /// options for the match
872    ///
873    /// # Returns
874    ///
875    /// a newly allocated string containing the replacements
876    #[doc(alias = "g_regex_replace_literal")]
877    pub fn replace_literal(
878        &self,
879        string: impl IntoGStr,
880        start_position: i32,
881        replacement: impl IntoGStr,
882        match_options: RegexMatchFlags,
883    ) -> Result<crate::GString, crate::Error> {
884        unsafe {
885            string.run_with_gstr(|string| {
886                replacement.run_with_gstr(|replacement| {
887                    let mut error = ptr::null_mut();
888                    let ret = ffi::g_regex_replace_literal(
889                        self.to_glib_none().0,
890                        string.to_glib_none().0,
891                        string.len() as _,
892                        start_position,
893                        replacement.to_glib_none().0,
894                        match_options.into_glib(),
895                        &mut error,
896                    );
897                    if error.is_null() {
898                        Ok(from_glib_full(ret))
899                    } else {
900                        Err(from_glib_full(error))
901                    }
902                })
903            })
904        }
905    }
906
907    /// Breaks the string on the pattern, and returns an array of the tokens.
908    /// If the pattern contains capturing parentheses, then the text for each
909    /// of the substrings will also be returned. If the pattern does not match
910    /// anywhere in the string, then the whole string is returned as the first
911    /// token.
912    ///
913    /// As a special case, the result of splitting the empty string "" is an
914    /// empty vector, not a vector containing a single string. The reason for
915    /// this special case is that being able to represent an empty vector is
916    /// typically more useful than consistent handling of empty elements. If
917    /// you do need to represent empty elements, you'll need to check for the
918    /// empty string before calling this function.
919    ///
920    /// A pattern that can match empty strings splits @string into separate
921    /// characters wherever it matches the empty string between characters.
922    /// For example splitting "ab c" using as a separator "\s*", you will get
923    /// "a", "b" and "c".
924    /// ## `string`
925    /// the string to split with the pattern
926    /// ## `match_options`
927    /// match time option flags
928    ///
929    /// # Returns
930    ///
931    /// a [`None`]-terminated gchar ** array. Free
932    /// it using g_strfreev()
933    // rustdoc-stripper-ignore-next-stop
934    /// Breaks the string on the pattern, and returns an array of the tokens.
935    /// If the pattern contains capturing parentheses, then the text for each
936    /// of the substrings will also be returned. If the pattern does not match
937    /// anywhere in the string, then the whole string is returned as the first
938    /// token.
939    ///
940    /// As a special case, the result of splitting the empty string "" is an
941    /// empty vector, not a vector containing a single string. The reason for
942    /// this special case is that being able to represent an empty vector is
943    /// typically more useful than consistent handling of empty elements. If
944    /// you do need to represent empty elements, you'll need to check for the
945    /// empty string before calling this function.
946    ///
947    /// A pattern that can match empty strings splits @string into separate
948    /// characters wherever it matches the empty string between characters.
949    /// For example splitting "ab c" using as a separator "\s*", you will get
950    /// "a", "b" and "c".
951    /// ## `string`
952    /// the string to split with the pattern
953    /// ## `match_options`
954    /// match time option flags
955    ///
956    /// # Returns
957    ///
958    /// a [`None`]-terminated gchar ** array. Free
959    /// it using g_strfreev()
960    #[doc(alias = "g_regex_split")]
961    pub fn split(
962        &self,
963        string: impl IntoGStr,
964        match_options: RegexMatchFlags,
965    ) -> PtrSlice<GStringPtr> {
966        self.split_full(string, 0, match_options, 0)
967            .unwrap_or_default()
968    }
969
970    /// Breaks the string on the pattern, and returns an array of the tokens.
971    /// If the pattern contains capturing parentheses, then the text for each
972    /// of the substrings will also be returned. If the pattern does not match
973    /// anywhere in the string, then the whole string is returned as the first
974    /// token.
975    ///
976    /// As a special case, the result of splitting the empty string "" is an
977    /// empty vector, not a vector containing a single string. The reason for
978    /// this special case is that being able to represent an empty vector is
979    /// typically more useful than consistent handling of empty elements. If
980    /// you do need to represent empty elements, you'll need to check for the
981    /// empty string before calling this function.
982    ///
983    /// A pattern that can match empty strings splits @string into separate
984    /// characters wherever it matches the empty string between characters.
985    /// For example splitting "ab c" using as a separator "\s*", you will get
986    /// "a", "b" and "c".
987    ///
988    /// Setting @start_position differs from just passing over a shortened
989    /// string and setting [`RegexMatchFlags::NOTBOL`][crate::RegexMatchFlags::NOTBOL] in the case of a pattern
990    /// that begins with any kind of lookbehind assertion, such as "\b".
991    /// ## `string`
992    /// the string to split with the pattern
993    /// ## `start_position`
994    /// starting index of the string to match, in bytes
995    /// ## `match_options`
996    /// match time option flags
997    /// ## `max_tokens`
998    /// the maximum number of tokens to split @string into.
999    ///   If this is less than 1, the string is split completely
1000    ///
1001    /// # Returns
1002    ///
1003    /// a [`None`]-terminated gchar ** array. Free
1004    /// it using g_strfreev()
1005    // rustdoc-stripper-ignore-next-stop
1006    /// Breaks the string on the pattern, and returns an array of the tokens.
1007    /// If the pattern contains capturing parentheses, then the text for each
1008    /// of the substrings will also be returned. If the pattern does not match
1009    /// anywhere in the string, then the whole string is returned as the first
1010    /// token.
1011    ///
1012    /// As a special case, the result of splitting the empty string "" is an
1013    /// empty vector, not a vector containing a single string. The reason for
1014    /// this special case is that being able to represent an empty vector is
1015    /// typically more useful than consistent handling of empty elements. If
1016    /// you do need to represent empty elements, you'll need to check for the
1017    /// empty string before calling this function.
1018    ///
1019    /// A pattern that can match empty strings splits @string into separate
1020    /// characters wherever it matches the empty string between characters.
1021    /// For example splitting "ab c" using as a separator "\s*", you will get
1022    /// "a", "b" and "c".
1023    ///
1024    /// Setting @start_position differs from just passing over a shortened
1025    /// string and setting [`RegexMatchFlags::NOTBOL`][crate::RegexMatchFlags::NOTBOL] in the case of a pattern
1026    /// that begins with any kind of lookbehind assertion, such as "\b".
1027    /// ## `string`
1028    /// the string to split with the pattern
1029    /// ## `start_position`
1030    /// starting index of the string to match, in bytes
1031    /// ## `match_options`
1032    /// match time option flags
1033    /// ## `max_tokens`
1034    /// the maximum number of tokens to split @string into.
1035    ///   If this is less than 1, the string is split completely
1036    ///
1037    /// # Returns
1038    ///
1039    /// a [`None`]-terminated gchar ** array. Free
1040    /// it using g_strfreev()
1041    #[doc(alias = "g_regex_split_full")]
1042    pub fn split_full(
1043        &self,
1044        string: impl IntoGStr,
1045        start_position: i32,
1046        match_options: RegexMatchFlags,
1047        max_tokens: i32,
1048    ) -> Result<PtrSlice<GStringPtr>, crate::Error> {
1049        unsafe {
1050            let mut error = ptr::null_mut();
1051            string.run_with_gstr(|string| {
1052                let ret = ffi::g_regex_split_full(
1053                    self.to_glib_none().0,
1054                    string.to_glib_none().0,
1055                    string.len() as _,
1056                    start_position,
1057                    match_options.into_glib(),
1058                    max_tokens,
1059                    &mut error,
1060                );
1061                if error.is_null() {
1062                    Ok(FromGlibPtrContainer::from_glib_full(ret))
1063                } else {
1064                    Err(from_glib_full(error))
1065                }
1066            })
1067        }
1068    }
1069
1070    /// Breaks the string on the pattern, and returns an array of
1071    /// the tokens. If the pattern contains capturing parentheses,
1072    /// then the text for each of the substrings will also be returned.
1073    /// If the pattern does not match anywhere in the string, then the
1074    /// whole string is returned as the first token.
1075    ///
1076    /// This function is equivalent to g_regex_split() but it does
1077    /// not require to compile the pattern with g_regex_new(), avoiding
1078    /// some lines of code when you need just to do a split without
1079    /// extracting substrings, capture counts, and so on.
1080    ///
1081    /// If this function is to be called on the same @pattern more than
1082    /// once, it's more efficient to compile the pattern once with
1083    /// g_regex_new() and then use g_regex_split().
1084    ///
1085    /// As a special case, the result of splitting the empty string ""
1086    /// is an empty vector, not a vector containing a single string.
1087    /// The reason for this special case is that being able to represent
1088    /// an empty vector is typically more useful than consistent handling
1089    /// of empty elements. If you do need to represent empty elements,
1090    /// you'll need to check for the empty string before calling this
1091    /// function.
1092    ///
1093    /// A pattern that can match empty strings splits @string into
1094    /// separate characters wherever it matches the empty string between
1095    /// characters. For example splitting "ab c" using as a separator
1096    /// "\s*", you will get "a", "b" and "c".
1097    /// ## `pattern`
1098    /// the regular expression
1099    /// ## `string`
1100    /// the string to scan for matches
1101    /// ## `compile_options`
1102    /// compile options for the regular expression, or 0
1103    /// ## `match_options`
1104    /// match options, or 0
1105    ///
1106    /// # Returns
1107    ///
1108    /// a [`None`]-terminated array of strings. Free
1109    /// it using g_strfreev()
1110    // rustdoc-stripper-ignore-next-stop
1111    /// Breaks the string on the pattern, and returns an array of
1112    /// the tokens. If the pattern contains capturing parentheses,
1113    /// then the text for each of the substrings will also be returned.
1114    /// If the pattern does not match anywhere in the string, then the
1115    /// whole string is returned as the first token.
1116    ///
1117    /// This function is equivalent to g_regex_split() but it does
1118    /// not require to compile the pattern with g_regex_new(), avoiding
1119    /// some lines of code when you need just to do a split without
1120    /// extracting substrings, capture counts, and so on.
1121    ///
1122    /// If this function is to be called on the same @pattern more than
1123    /// once, it's more efficient to compile the pattern once with
1124    /// g_regex_new() and then use g_regex_split().
1125    ///
1126    /// As a special case, the result of splitting the empty string ""
1127    /// is an empty vector, not a vector containing a single string.
1128    /// The reason for this special case is that being able to represent
1129    /// an empty vector is typically more useful than consistent handling
1130    /// of empty elements. If you do need to represent empty elements,
1131    /// you'll need to check for the empty string before calling this
1132    /// function.
1133    ///
1134    /// A pattern that can match empty strings splits @string into
1135    /// separate characters wherever it matches the empty string between
1136    /// characters. For example splitting "ab c" using as a separator
1137    /// "\s*", you will get "a", "b" and "c".
1138    /// ## `pattern`
1139    /// the regular expression
1140    /// ## `string`
1141    /// the string to scan for matches
1142    /// ## `compile_options`
1143    /// compile options for the regular expression, or 0
1144    /// ## `match_options`
1145    /// match options, or 0
1146    ///
1147    /// # Returns
1148    ///
1149    /// a [`None`]-terminated array of strings. Free
1150    /// it using g_strfreev()
1151    #[doc(alias = "g_regex_split_simple")]
1152    pub fn split_simple(
1153        pattern: impl IntoGStr,
1154        string: impl IntoGStr,
1155        compile_options: RegexCompileFlags,
1156        match_options: RegexMatchFlags,
1157    ) -> PtrSlice<GStringPtr> {
1158        pattern.run_with_gstr(|pattern| {
1159            string.run_with_gstr(|string| unsafe {
1160                FromGlibPtrContainer::from_glib_full(ffi::g_regex_split_simple(
1161                    pattern.to_glib_none().0,
1162                    string.to_glib_none().0,
1163                    compile_options.into_glib(),
1164                    match_options.into_glib(),
1165                ))
1166            })
1167        })
1168    }
1169}
1170
1171#[cfg(test)]
1172mod tests {
1173    use super::*;
1174    use crate::RegexCompileFlags;
1175
1176    #[test]
1177    fn test_replace_literal() {
1178        let regex = Regex::new(
1179            "s[ai]mple",
1180            RegexCompileFlags::OPTIMIZE,
1181            RegexMatchFlags::DEFAULT,
1182        )
1183        .expect("Regex new")
1184        .expect("Null regex");
1185
1186        let quote = "This is a simple sample.";
1187        let result = regex
1188            .replace_literal(quote, 0, "XXX", RegexMatchFlags::DEFAULT)
1189            .expect("regex replace");
1190
1191        assert_eq!(result, "This is a XXX XXX.");
1192    }
1193
1194    #[test]
1195    fn test_split() {
1196        let regex = Regex::new(
1197            "s[ai]mple",
1198            RegexCompileFlags::OPTIMIZE,
1199            RegexMatchFlags::DEFAULT,
1200        )
1201        .expect("Regex new")
1202        .expect("Null regex");
1203
1204        let quote = "This is a simple sample.";
1205        let result = regex.split(quote, RegexMatchFlags::DEFAULT);
1206
1207        assert_eq!(result.len(), 3);
1208        assert_eq!(result[0], "This is a ");
1209        assert_eq!(result[1], " ");
1210        assert_eq!(result[2], ".");
1211    }
1212}