glib/auto/
regex.rs

1// This file was generated by gir (https://github.com/gtk-rs/gir)
2// from gir-files (https://github.com/gtk-rs/gir-files)
3// DO NOT EDIT
4
5use crate::{ffi, translate::*, Error, RegexCompileFlags, RegexMatchFlags};
6
7crate::wrapper! {
8    /// A `GRegex` is a compiled form of a regular expression.
9    ///
10    /// After instantiating a `GRegex`, you can use its methods to find matches
11    /// in a string, replace matches within a string, or split the string at matches.
12    ///
13    /// `GRegex` implements regular expression pattern matching using syntax and
14    /// semantics (such as character classes, quantifiers, and capture groups)
15    /// similar to Perl regular expression. See the
16    /// [PCRE documentation](man:pcre2pattern(3)) for details.
17    ///
18    /// A typical scenario for regex pattern matching is to check if a string
19    /// matches a pattern. The following statements implement this scenario.
20    ///
21    /// **⚠️ The following code is in  { .c } ⚠️**
22    ///
23    /// ``` { .c }
24    /// const char *regex_pattern = ".*GLib.*";
25    /// const char *string_to_search = "You will love the GLib implementation of regex";
26    /// g_autoptr(GMatchInfo) match_info = NULL;
27    /// g_autoptr(GRegex) regex = NULL;
28    ///
29    /// regex = g_regex_new (regex_pattern, G_REGEX_DEFAULT, G_REGEX_MATCH_DEFAULT, NULL);
30    /// g_assert (regex != NULL);
31    ///
32    /// if (g_regex_match (regex, string_to_search, G_REGEX_MATCH_DEFAULT, &match_info))
33    ///   {
34    ///     int start_pos, end_pos;
35    ///     g_match_info_fetch_pos (match_info, 0, &start_pos, &end_pos);
36    ///     g_print ("Match successful! Overall pattern matches bytes %d to %d\n", start_pos, end_pos);
37    ///   }
38    /// else
39    ///   {
40    ///     g_print ("No match!\n");
41    ///   }
42    /// ```
43    ///
44    /// The constructor for `GRegex` includes two sets of bitmapped flags:
45    ///
46    /// * [`RegexCompileFlags`][crate::RegexCompileFlags]—These flags
47    /// control how GLib compiles the regex. There are options for case
48    /// sensitivity, multiline, ignoring whitespace, etc.
49    /// * [`RegexMatchFlags`][crate::RegexMatchFlags]—These flags control
50    /// `GRegex`’s matching behavior, such as anchoring and customizing definitions
51    /// for newline characters.
52    ///
53    /// Some regex patterns include backslash assertions, such as `\d` (digit) or
54    /// `\D` (non-digit). The regex pattern must escape those backslashes. For
55    /// example, the pattern `"\\d\\D"` matches a digit followed by a non-digit.
56    ///
57    /// GLib’s implementation of pattern matching includes a `start_position`
58    /// argument for some of the match, replace, and split methods. Specifying
59    /// a start position provides flexibility when you want to ignore the first
60    /// _n_ characters of a string, but want to incorporate backslash assertions
61    /// at character _n_ - 1. For example, a database field contains inconsistent
62    /// spelling for a job title: `healthcare provider` and `health-care provider`.
63    /// The database manager wants to make the spelling consistent by adding a
64    /// hyphen when it is missing. The following regex pattern tests for the string
65    /// `care` preceded by a non-word boundary character (instead of a hyphen)
66    /// and followed by a space.
67    ///
68    /// **⚠️ The following code is in  { .c } ⚠️**
69    ///
70    /// ``` { .c }
71    /// const char *regex_pattern = "\\Bcare\\s";
72    /// ```
73    ///
74    /// An efficient way to match with this pattern is to start examining at
75    /// `start_position` 6 in the string `healthcare` or `health-care`.
76    ///
77    /// **⚠️ The following code is in  { .c } ⚠️**
78    ///
79    /// ``` { .c }
80    /// const char *regex_pattern = "\\Bcare\\s";
81    /// const char *string_to_search = "healthcare provider";
82    /// g_autoptr(GMatchInfo) match_info = NULL;
83    /// g_autoptr(GRegex) regex = NULL;
84    ///
85    /// regex = g_regex_new (
86    ///   regex_pattern,
87    ///   G_REGEX_DEFAULT,
88    ///   G_REGEX_MATCH_DEFAULT,
89    ///   NULL);
90    /// g_assert (regex != NULL);
91    ///
92    /// g_regex_match_full (
93    ///   regex,
94    ///   string_to_search,
95    ///   -1,
96    ///   6, // position of 'c' in the test string.
97    ///   G_REGEX_MATCH_DEFAULT,
98    ///   &match_info,
99    ///   NULL);
100    /// ```
101    ///
102    /// The method [`match_full()`][Self::match_full()] (and other methods implementing
103    /// `start_pos`) allow for lookback before the start position to determine if
104    /// the previous character satisfies an assertion.
105    ///
106    /// Unless you set the [flags@GLib.RegexCompileFlags.RAW] as one of
107    /// the `GRegexCompileFlags`, all the strings passed to `GRegex` methods must
108    /// be encoded in UTF-8. The lengths and the positions inside the strings are
109    /// in bytes and not in characters, so, for instance, `\xc3\xa0` (i.e., `à`)
110    /// is two bytes long but it is treated as a single character. If you set
111    /// `G_REGEX_RAW`, the strings can be non-valid UTF-8 strings and a byte is
112    /// treated as a character, so `\xc3\xa0` is two bytes and two characters long.
113    ///
114    /// Regarding line endings, `\n` matches a `\n` character, and `\r` matches
115    /// a `\r` character. More generally, `\R` matches all typical line endings:
116    /// CR + LF (`\r\n`), LF (linefeed, U+000A, `\n`), VT (vertical tab, U+000B,
117    /// `\v`), FF (formfeed, U+000C, `\f`), CR (carriage return, U+000D, `\r`),
118    /// NEL (next line, U+0085), LS (line separator, U+2028), and PS (paragraph
119    /// separator, U+2029).
120    ///
121    /// The behaviour of the dot, circumflex, and dollar metacharacters are
122    /// affected by newline characters. By default, `GRegex` matches any newline
123    /// character matched by `\R`. You can limit the matched newline characters by
124    /// specifying the [flags@GLib.RegexMatchFlags.NEWLINE_CR],
125    /// [flags@GLib.RegexMatchFlags.NEWLINE_LF], and
126    /// [flags@GLib.RegexMatchFlags.NEWLINE_CRLF] compile options, and
127    /// with [flags@GLib.RegexMatchFlags.NEWLINE_ANY],
128    /// [flags@GLib.RegexMatchFlags.NEWLINE_CR],
129    /// [flags@GLib.RegexMatchFlags.NEWLINE_LF] and
130    /// [flags@GLib.RegexMatchFlags.NEWLINE_CRLF] match options.
131    /// These settings are also relevant when compiling a pattern if
132    /// [flags@GLib.RegexCompileFlags.EXTENDED] is set and an unescaped
133    /// `#` outside a character class is encountered. This indicates a comment
134    /// that lasts until after the next newline.
135    ///
136    /// Because `GRegex` does not modify its internal state between creation and
137    /// destruction, you can create and modify the same `GRegex` instance from
138    /// different threads. In contrast, [`MatchInfo`][crate::MatchInfo] is not thread safe.
139    ///
140    /// The regular expression low-level functionalities are obtained through
141    /// the excellent [PCRE](http://www.pcre.org/) library written by Philip Hazel.
142    // rustdoc-stripper-ignore-next-stop
143    /// A `GRegex` is a compiled form of a regular expression.
144    ///
145    /// After instantiating a `GRegex`, you can use its methods to find matches
146    /// in a string, replace matches within a string, or split the string at matches.
147    ///
148    /// `GRegex` implements regular expression pattern matching using syntax and
149    /// semantics (such as character classes, quantifiers, and capture groups)
150    /// similar to Perl regular expression. See the
151    /// [PCRE documentation](man:pcre2pattern(3)) for details.
152    ///
153    /// A typical scenario for regex pattern matching is to check if a string
154    /// matches a pattern. The following statements implement this scenario.
155    ///
156    /// **⚠️ The following code is in  { .c } ⚠️**
157    ///
158    /// ``` { .c }
159    /// const char *regex_pattern = ".*GLib.*";
160    /// const char *string_to_search = "You will love the GLib implementation of regex";
161    /// g_autoptr(GMatchInfo) match_info = NULL;
162    /// g_autoptr(GRegex) regex = NULL;
163    ///
164    /// regex = g_regex_new (regex_pattern, G_REGEX_DEFAULT, G_REGEX_MATCH_DEFAULT, NULL);
165    /// g_assert (regex != NULL);
166    ///
167    /// if (g_regex_match (regex, string_to_search, G_REGEX_MATCH_DEFAULT, &match_info))
168    ///   {
169    ///     int start_pos, end_pos;
170    ///     g_match_info_fetch_pos (match_info, 0, &start_pos, &end_pos);
171    ///     g_print ("Match successful! Overall pattern matches bytes %d to %d\n", start_pos, end_pos);
172    ///   }
173    /// else
174    ///   {
175    ///     g_print ("No match!\n");
176    ///   }
177    /// ```
178    ///
179    /// The constructor for `GRegex` includes two sets of bitmapped flags:
180    ///
181    /// * [`RegexCompileFlags`][crate::RegexCompileFlags]—These flags
182    /// control how GLib compiles the regex. There are options for case
183    /// sensitivity, multiline, ignoring whitespace, etc.
184    /// * [`RegexMatchFlags`][crate::RegexMatchFlags]—These flags control
185    /// `GRegex`’s matching behavior, such as anchoring and customizing definitions
186    /// for newline characters.
187    ///
188    /// Some regex patterns include backslash assertions, such as `\d` (digit) or
189    /// `\D` (non-digit). The regex pattern must escape those backslashes. For
190    /// example, the pattern `"\\d\\D"` matches a digit followed by a non-digit.
191    ///
192    /// GLib’s implementation of pattern matching includes a `start_position`
193    /// argument for some of the match, replace, and split methods. Specifying
194    /// a start position provides flexibility when you want to ignore the first
195    /// _n_ characters of a string, but want to incorporate backslash assertions
196    /// at character _n_ - 1. For example, a database field contains inconsistent
197    /// spelling for a job title: `healthcare provider` and `health-care provider`.
198    /// The database manager wants to make the spelling consistent by adding a
199    /// hyphen when it is missing. The following regex pattern tests for the string
200    /// `care` preceded by a non-word boundary character (instead of a hyphen)
201    /// and followed by a space.
202    ///
203    /// **⚠️ The following code is in  { .c } ⚠️**
204    ///
205    /// ``` { .c }
206    /// const char *regex_pattern = "\\Bcare\\s";
207    /// ```
208    ///
209    /// An efficient way to match with this pattern is to start examining at
210    /// `start_position` 6 in the string `healthcare` or `health-care`.
211    ///
212    /// **⚠️ The following code is in  { .c } ⚠️**
213    ///
214    /// ``` { .c }
215    /// const char *regex_pattern = "\\Bcare\\s";
216    /// const char *string_to_search = "healthcare provider";
217    /// g_autoptr(GMatchInfo) match_info = NULL;
218    /// g_autoptr(GRegex) regex = NULL;
219    ///
220    /// regex = g_regex_new (
221    ///   regex_pattern,
222    ///   G_REGEX_DEFAULT,
223    ///   G_REGEX_MATCH_DEFAULT,
224    ///   NULL);
225    /// g_assert (regex != NULL);
226    ///
227    /// g_regex_match_full (
228    ///   regex,
229    ///   string_to_search,
230    ///   -1,
231    ///   6, // position of 'c' in the test string.
232    ///   G_REGEX_MATCH_DEFAULT,
233    ///   &match_info,
234    ///   NULL);
235    /// ```
236    ///
237    /// The method [`match_full()`][Self::match_full()] (and other methods implementing
238    /// `start_pos`) allow for lookback before the start position to determine if
239    /// the previous character satisfies an assertion.
240    ///
241    /// Unless you set the [flags@GLib.RegexCompileFlags.RAW] as one of
242    /// the `GRegexCompileFlags`, all the strings passed to `GRegex` methods must
243    /// be encoded in UTF-8. The lengths and the positions inside the strings are
244    /// in bytes and not in characters, so, for instance, `\xc3\xa0` (i.e., `à`)
245    /// is two bytes long but it is treated as a single character. If you set
246    /// `G_REGEX_RAW`, the strings can be non-valid UTF-8 strings and a byte is
247    /// treated as a character, so `\xc3\xa0` is two bytes and two characters long.
248    ///
249    /// Regarding line endings, `\n` matches a `\n` character, and `\r` matches
250    /// a `\r` character. More generally, `\R` matches all typical line endings:
251    /// CR + LF (`\r\n`), LF (linefeed, U+000A, `\n`), VT (vertical tab, U+000B,
252    /// `\v`), FF (formfeed, U+000C, `\f`), CR (carriage return, U+000D, `\r`),
253    /// NEL (next line, U+0085), LS (line separator, U+2028), and PS (paragraph
254    /// separator, U+2029).
255    ///
256    /// The behaviour of the dot, circumflex, and dollar metacharacters are
257    /// affected by newline characters. By default, `GRegex` matches any newline
258    /// character matched by `\R`. You can limit the matched newline characters by
259    /// specifying the [flags@GLib.RegexMatchFlags.NEWLINE_CR],
260    /// [flags@GLib.RegexMatchFlags.NEWLINE_LF], and
261    /// [flags@GLib.RegexMatchFlags.NEWLINE_CRLF] compile options, and
262    /// with [flags@GLib.RegexMatchFlags.NEWLINE_ANY],
263    /// [flags@GLib.RegexMatchFlags.NEWLINE_CR],
264    /// [flags@GLib.RegexMatchFlags.NEWLINE_LF] and
265    /// [flags@GLib.RegexMatchFlags.NEWLINE_CRLF] match options.
266    /// These settings are also relevant when compiling a pattern if
267    /// [flags@GLib.RegexCompileFlags.EXTENDED] is set and an unescaped
268    /// `#` outside a character class is encountered. This indicates a comment
269    /// that lasts until after the next newline.
270    ///
271    /// Because `GRegex` does not modify its internal state between creation and
272    /// destruction, you can create and modify the same `GRegex` instance from
273    /// different threads. In contrast, [`MatchInfo`][crate::MatchInfo] is not thread safe.
274    ///
275    /// The regular expression low-level functionalities are obtained through
276    /// the excellent [PCRE](http://www.pcre.org/) library written by Philip Hazel.
277    #[derive(Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
278    pub struct Regex(Shared<ffi::GRegex>);
279
280    match fn {
281        ref => |ptr| ffi::g_regex_ref(ptr),
282        unref => |ptr| ffi::g_regex_unref(ptr),
283        type_ => || ffi::g_regex_get_type(),
284    }
285}
286
287impl Regex {
288    /// Compiles the regular expression to an internal form, and does
289    /// the initial setup of the #GRegex structure.
290    /// ## `pattern`
291    /// the regular expression
292    /// ## `compile_options`
293    /// compile options for the regular expression, or 0
294    /// ## `match_options`
295    /// match options for the regular expression, or 0
296    ///
297    /// # Returns
298    ///
299    /// a #GRegex structure or [`None`] if an error occurred. Call
300    ///   g_regex_unref() when you are done with it
301    // rustdoc-stripper-ignore-next-stop
302    /// Compiles the regular expression to an internal form, and does
303    /// the initial setup of the #GRegex structure.
304    /// ## `pattern`
305    /// the regular expression
306    /// ## `compile_options`
307    /// compile options for the regular expression, or 0
308    /// ## `match_options`
309    /// match options for the regular expression, or 0
310    ///
311    /// # Returns
312    ///
313    /// a #GRegex structure or [`None`] if an error occurred. Call
314    ///   g_regex_unref() when you are done with it
315    #[doc(alias = "g_regex_new")]
316    pub fn new(
317        pattern: &str,
318        compile_options: RegexCompileFlags,
319        match_options: RegexMatchFlags,
320    ) -> Result<Option<Regex>, crate::Error> {
321        unsafe {
322            let mut error = std::ptr::null_mut();
323            let ret = ffi::g_regex_new(
324                pattern.to_glib_none().0,
325                compile_options.into_glib(),
326                match_options.into_glib(),
327                &mut error,
328            );
329            if error.is_null() {
330                Ok(from_glib_full(ret))
331            } else {
332                Err(from_glib_full(error))
333            }
334        }
335    }
336
337    /// Returns the number of capturing subpatterns in the pattern.
338    ///
339    /// # Returns
340    ///
341    /// the number of capturing subpatterns
342    // rustdoc-stripper-ignore-next-stop
343    /// Returns the number of capturing subpatterns in the pattern.
344    ///
345    /// # Returns
346    ///
347    /// the number of capturing subpatterns
348    #[doc(alias = "g_regex_get_capture_count")]
349    #[doc(alias = "get_capture_count")]
350    pub fn capture_count(&self) -> i32 {
351        unsafe { ffi::g_regex_get_capture_count(self.to_glib_none().0) }
352    }
353
354    /// Returns the compile options that @self was created with.
355    ///
356    /// Depending on the version of PCRE that is used, this may or may not
357    /// include flags set by option expressions such as `(?i)` found at the
358    /// top-level within the compiled pattern.
359    ///
360    /// # Returns
361    ///
362    /// flags from #GRegexCompileFlags
363    // rustdoc-stripper-ignore-next-stop
364    /// Returns the compile options that @self was created with.
365    ///
366    /// Depending on the version of PCRE that is used, this may or may not
367    /// include flags set by option expressions such as `(?i)` found at the
368    /// top-level within the compiled pattern.
369    ///
370    /// # Returns
371    ///
372    /// flags from #GRegexCompileFlags
373    #[doc(alias = "g_regex_get_compile_flags")]
374    #[doc(alias = "get_compile_flags")]
375    pub fn compile_flags(&self) -> RegexCompileFlags {
376        unsafe { from_glib(ffi::g_regex_get_compile_flags(self.to_glib_none().0)) }
377    }
378
379    /// Checks whether the pattern contains explicit CR or LF references.
380    ///
381    /// # Returns
382    ///
383    /// [`true`] if the pattern contains explicit CR or LF references
384    // rustdoc-stripper-ignore-next-stop
385    /// Checks whether the pattern contains explicit CR or LF references.
386    ///
387    /// # Returns
388    ///
389    /// [`true`] if the pattern contains explicit CR or LF references
390    #[doc(alias = "g_regex_get_has_cr_or_lf")]
391    #[doc(alias = "get_has_cr_or_lf")]
392    pub fn has_cr_or_lf(&self) -> bool {
393        unsafe { from_glib(ffi::g_regex_get_has_cr_or_lf(self.to_glib_none().0)) }
394    }
395
396    /// Returns the match options that @self was created with.
397    ///
398    /// # Returns
399    ///
400    /// flags from #GRegexMatchFlags
401    // rustdoc-stripper-ignore-next-stop
402    /// Returns the match options that @self was created with.
403    ///
404    /// # Returns
405    ///
406    /// flags from #GRegexMatchFlags
407    #[doc(alias = "g_regex_get_match_flags")]
408    #[doc(alias = "get_match_flags")]
409    pub fn match_flags(&self) -> RegexMatchFlags {
410        unsafe { from_glib(ffi::g_regex_get_match_flags(self.to_glib_none().0)) }
411    }
412
413    /// Returns the number of the highest back reference
414    /// in the pattern, or 0 if the pattern does not contain
415    /// back references.
416    ///
417    /// # Returns
418    ///
419    /// the number of the highest back reference
420    // rustdoc-stripper-ignore-next-stop
421    /// Returns the number of the highest back reference
422    /// in the pattern, or 0 if the pattern does not contain
423    /// back references.
424    ///
425    /// # Returns
426    ///
427    /// the number of the highest back reference
428    #[doc(alias = "g_regex_get_max_backref")]
429    #[doc(alias = "get_max_backref")]
430    pub fn max_backref(&self) -> i32 {
431        unsafe { ffi::g_regex_get_max_backref(self.to_glib_none().0) }
432    }
433
434    /// Gets the number of characters in the longest lookbehind assertion in the
435    /// pattern. This information is useful when doing multi-segment matching using
436    /// the partial matching facilities.
437    ///
438    /// # Returns
439    ///
440    /// the number of characters in the longest lookbehind assertion.
441    // rustdoc-stripper-ignore-next-stop
442    /// Gets the number of characters in the longest lookbehind assertion in the
443    /// pattern. This information is useful when doing multi-segment matching using
444    /// the partial matching facilities.
445    ///
446    /// # Returns
447    ///
448    /// the number of characters in the longest lookbehind assertion.
449    #[doc(alias = "g_regex_get_max_lookbehind")]
450    #[doc(alias = "get_max_lookbehind")]
451    pub fn max_lookbehind(&self) -> i32 {
452        unsafe { ffi::g_regex_get_max_lookbehind(self.to_glib_none().0) }
453    }
454
455    /// Gets the pattern string associated with @self, i.e. a copy of
456    /// the string passed to g_regex_new().
457    ///
458    /// # Returns
459    ///
460    /// the pattern of @self
461    // rustdoc-stripper-ignore-next-stop
462    /// Gets the pattern string associated with @self, i.e. a copy of
463    /// the string passed to g_regex_new().
464    ///
465    /// # Returns
466    ///
467    /// the pattern of @self
468    #[doc(alias = "g_regex_get_pattern")]
469    #[doc(alias = "get_pattern")]
470    pub fn pattern(&self) -> crate::GString {
471        unsafe { from_glib_none(ffi::g_regex_get_pattern(self.to_glib_none().0)) }
472    }
473
474    //#[doc(alias = "g_regex_replace_eval")]
475    //pub fn replace_eval(&self, string: &[&str], start_position: i32, match_options: RegexMatchFlags, eval: /*Unimplemented*/FnMut(&MatchInfo, /*Ignored*/String) -> bool, user_data: /*Unimplemented*/Option<Basic: Pointer>) -> Result<crate::GString, crate::Error> {
476    //    unsafe { TODO: call ffi:g_regex_replace_eval() }
477    //}
478}