glib/auto/regex.rs
1// This file was generated by gir (https://github.com/gtk-rs/gir)
2// from gir-files (https://github.com/gtk-rs/gir-files)
3// DO NOT EDIT
4
5use crate::{ffi, translate::*, Error, RegexCompileFlags, RegexMatchFlags};
6
7crate::wrapper! {
8 /// A `GRegex` is a compiled form of a regular expression.
9 ///
10 /// After instantiating a `GRegex`, you can use its methods to find matches
11 /// in a string, replace matches within a string, or split the string at matches.
12 ///
13 /// `GRegex` implements regular expression pattern matching using syntax and
14 /// semantics (such as character classes, quantifiers, and capture groups)
15 /// similar to Perl regular expression. See the
16 /// [PCRE documentation](man:pcre2pattern(3)) for details.
17 ///
18 /// A typical scenario for regex pattern matching is to check if a string
19 /// matches a pattern. The following statements implement this scenario.
20 ///
21 /// **⚠️ The following code is in { .c } ⚠️**
22 ///
23 /// ``` { .c }
24 /// const char *regex_pattern = ".*GLib.*";
25 /// const char *string_to_search = "You will love the GLib implementation of regex";
26 /// g_autoptr(GMatchInfo) match_info = NULL;
27 /// g_autoptr(GRegex) regex = NULL;
28 ///
29 /// regex = g_regex_new (regex_pattern, G_REGEX_DEFAULT, G_REGEX_MATCH_DEFAULT, NULL);
30 /// g_assert (regex != NULL);
31 ///
32 /// if (g_regex_match (regex, string_to_search, G_REGEX_MATCH_DEFAULT, &match_info))
33 /// {
34 /// int start_pos, end_pos;
35 /// g_match_info_fetch_pos (match_info, 0, &start_pos, &end_pos);
36 /// g_print ("Match successful! Overall pattern matches bytes %d to %d\n", start_pos, end_pos);
37 /// }
38 /// else
39 /// {
40 /// g_print ("No match!\n");
41 /// }
42 /// ```
43 ///
44 /// The constructor for `GRegex` includes two sets of bitmapped flags:
45 ///
46 /// * [`RegexCompileFlags`][crate::RegexCompileFlags]—These flags
47 /// control how GLib compiles the regex. There are options for case
48 /// sensitivity, multiline, ignoring whitespace, etc.
49 /// * [`RegexMatchFlags`][crate::RegexMatchFlags]—These flags control
50 /// `GRegex`’s matching behavior, such as anchoring and customizing definitions
51 /// for newline characters.
52 ///
53 /// Some regex patterns include backslash assertions, such as `\d` (digit) or
54 /// `\D` (non-digit). The regex pattern must escape those backslashes. For
55 /// example, the pattern `"\\d\\D"` matches a digit followed by a non-digit.
56 ///
57 /// GLib’s implementation of pattern matching includes a `start_position`
58 /// argument for some of the match, replace, and split methods. Specifying
59 /// a start position provides flexibility when you want to ignore the first
60 /// _n_ characters of a string, but want to incorporate backslash assertions
61 /// at character _n_ - 1. For example, a database field contains inconsistent
62 /// spelling for a job title: `healthcare provider` and `health-care provider`.
63 /// The database manager wants to make the spelling consistent by adding a
64 /// hyphen when it is missing. The following regex pattern tests for the string
65 /// `care` preceded by a non-word boundary character (instead of a hyphen)
66 /// and followed by a space.
67 ///
68 /// **⚠️ The following code is in { .c } ⚠️**
69 ///
70 /// ``` { .c }
71 /// const char *regex_pattern = "\\Bcare\\s";
72 /// ```
73 ///
74 /// An efficient way to match with this pattern is to start examining at
75 /// `start_position` 6 in the string `healthcare` or `health-care`.
76 ///
77 /// **⚠️ The following code is in { .c } ⚠️**
78 ///
79 /// ``` { .c }
80 /// const char *regex_pattern = "\\Bcare\\s";
81 /// const char *string_to_search = "healthcare provider";
82 /// g_autoptr(GMatchInfo) match_info = NULL;
83 /// g_autoptr(GRegex) regex = NULL;
84 ///
85 /// regex = g_regex_new (
86 /// regex_pattern,
87 /// G_REGEX_DEFAULT,
88 /// G_REGEX_MATCH_DEFAULT,
89 /// NULL);
90 /// g_assert (regex != NULL);
91 ///
92 /// g_regex_match_full (
93 /// regex,
94 /// string_to_search,
95 /// -1,
96 /// 6, // position of 'c' in the test string.
97 /// G_REGEX_MATCH_DEFAULT,
98 /// &match_info,
99 /// NULL);
100 /// ```
101 ///
102 /// The method [`match_full()`][Self::match_full()] (and other methods implementing
103 /// `start_pos`) allow for lookback before the start position to determine if
104 /// the previous character satisfies an assertion.
105 ///
106 /// Unless you set the [flags@GLib.RegexCompileFlags.RAW] as one of
107 /// the `GRegexCompileFlags`, all the strings passed to `GRegex` methods must
108 /// be encoded in UTF-8. The lengths and the positions inside the strings are
109 /// in bytes and not in characters, so, for instance, `\xc3\xa0` (i.e., `à`)
110 /// is two bytes long but it is treated as a single character. If you set
111 /// `G_REGEX_RAW`, the strings can be non-valid UTF-8 strings and a byte is
112 /// treated as a character, so `\xc3\xa0` is two bytes and two characters long.
113 ///
114 /// Regarding line endings, `\n` matches a `\n` character, and `\r` matches
115 /// a `\r` character. More generally, `\R` matches all typical line endings:
116 /// CR + LF (`\r\n`), LF (linefeed, U+000A, `\n`), VT (vertical tab, U+000B,
117 /// `\v`), FF (formfeed, U+000C, `\f`), CR (carriage return, U+000D, `\r`),
118 /// NEL (next line, U+0085), LS (line separator, U+2028), and PS (paragraph
119 /// separator, U+2029).
120 ///
121 /// The behaviour of the dot, circumflex, and dollar metacharacters are
122 /// affected by newline characters. By default, `GRegex` matches any newline
123 /// character matched by `\R`. You can limit the matched newline characters by
124 /// specifying the [flags@GLib.RegexMatchFlags.NEWLINE_CR],
125 /// [flags@GLib.RegexMatchFlags.NEWLINE_LF], and
126 /// [flags@GLib.RegexMatchFlags.NEWLINE_CRLF] compile options, and
127 /// with [flags@GLib.RegexMatchFlags.NEWLINE_ANY],
128 /// [flags@GLib.RegexMatchFlags.NEWLINE_CR],
129 /// [flags@GLib.RegexMatchFlags.NEWLINE_LF] and
130 /// [flags@GLib.RegexMatchFlags.NEWLINE_CRLF] match options.
131 /// These settings are also relevant when compiling a pattern if
132 /// [flags@GLib.RegexCompileFlags.EXTENDED] is set and an unescaped
133 /// `#` outside a character class is encountered. This indicates a comment
134 /// that lasts until after the next newline.
135 ///
136 /// Because `GRegex` does not modify its internal state between creation and
137 /// destruction, you can create and modify the same `GRegex` instance from
138 /// different threads. In contrast, [`MatchInfo`][crate::MatchInfo] is not thread safe.
139 ///
140 /// The regular expression low-level functionalities are obtained through
141 /// the excellent [PCRE](http://www.pcre.org/) library written by Philip Hazel.
142 #[derive(Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
143 pub struct Regex(Shared<ffi::GRegex>);
144
145 match fn {
146 ref => |ptr| ffi::g_regex_ref(ptr),
147 unref => |ptr| ffi::g_regex_unref(ptr),
148 type_ => || ffi::g_regex_get_type(),
149 }
150}
151
152impl Regex {
153 /// Compiles the regular expression to an internal form, and does
154 /// the initial setup of the #GRegex structure.
155 /// ## `pattern`
156 /// the regular expression
157 /// ## `compile_options`
158 /// compile options for the regular expression, or 0
159 /// ## `match_options`
160 /// match options for the regular expression, or 0
161 ///
162 /// # Returns
163 ///
164 /// a #GRegex structure or [`None`] if an error occurred. Call
165 /// g_regex_unref() when you are done with it
166 #[doc(alias = "g_regex_new")]
167 pub fn new(
168 pattern: &str,
169 compile_options: RegexCompileFlags,
170 match_options: RegexMatchFlags,
171 ) -> Result<Option<Regex>, crate::Error> {
172 unsafe {
173 let mut error = std::ptr::null_mut();
174 let ret = ffi::g_regex_new(
175 pattern.to_glib_none().0,
176 compile_options.into_glib(),
177 match_options.into_glib(),
178 &mut error,
179 );
180 if error.is_null() {
181 Ok(from_glib_full(ret))
182 } else {
183 Err(from_glib_full(error))
184 }
185 }
186 }
187
188 /// Returns the number of capturing subpatterns in the pattern.
189 ///
190 /// # Returns
191 ///
192 /// the number of capturing subpatterns
193 #[doc(alias = "g_regex_get_capture_count")]
194 #[doc(alias = "get_capture_count")]
195 pub fn capture_count(&self) -> i32 {
196 unsafe { ffi::g_regex_get_capture_count(self.to_glib_none().0) }
197 }
198
199 /// Returns the compile options that @self was created with.
200 ///
201 /// Depending on the version of PCRE that is used, this may or may not
202 /// include flags set by option expressions such as `(?i)` found at the
203 /// top-level within the compiled pattern.
204 ///
205 /// # Returns
206 ///
207 /// flags from #GRegexCompileFlags
208 #[doc(alias = "g_regex_get_compile_flags")]
209 #[doc(alias = "get_compile_flags")]
210 pub fn compile_flags(&self) -> RegexCompileFlags {
211 unsafe { from_glib(ffi::g_regex_get_compile_flags(self.to_glib_none().0)) }
212 }
213
214 /// Checks whether the pattern contains explicit CR or LF references.
215 ///
216 /// # Returns
217 ///
218 /// [`true`] if the pattern contains explicit CR or LF references
219 #[doc(alias = "g_regex_get_has_cr_or_lf")]
220 #[doc(alias = "get_has_cr_or_lf")]
221 pub fn has_cr_or_lf(&self) -> bool {
222 unsafe { from_glib(ffi::g_regex_get_has_cr_or_lf(self.to_glib_none().0)) }
223 }
224
225 /// Returns the match options that @self was created with.
226 ///
227 /// # Returns
228 ///
229 /// flags from #GRegexMatchFlags
230 #[doc(alias = "g_regex_get_match_flags")]
231 #[doc(alias = "get_match_flags")]
232 pub fn match_flags(&self) -> RegexMatchFlags {
233 unsafe { from_glib(ffi::g_regex_get_match_flags(self.to_glib_none().0)) }
234 }
235
236 /// Returns the number of the highest back reference
237 /// in the pattern, or 0 if the pattern does not contain
238 /// back references.
239 ///
240 /// # Returns
241 ///
242 /// the number of the highest back reference
243 #[doc(alias = "g_regex_get_max_backref")]
244 #[doc(alias = "get_max_backref")]
245 pub fn max_backref(&self) -> i32 {
246 unsafe { ffi::g_regex_get_max_backref(self.to_glib_none().0) }
247 }
248
249 /// Gets the number of characters in the longest lookbehind assertion in the
250 /// pattern. This information is useful when doing multi-segment matching using
251 /// the partial matching facilities.
252 ///
253 /// # Returns
254 ///
255 /// the number of characters in the longest lookbehind assertion.
256 #[doc(alias = "g_regex_get_max_lookbehind")]
257 #[doc(alias = "get_max_lookbehind")]
258 pub fn max_lookbehind(&self) -> i32 {
259 unsafe { ffi::g_regex_get_max_lookbehind(self.to_glib_none().0) }
260 }
261
262 /// Gets the pattern string associated with @self, i.e. a copy of
263 /// the string passed to g_regex_new().
264 ///
265 /// # Returns
266 ///
267 /// the pattern of @self
268 #[doc(alias = "g_regex_get_pattern")]
269 #[doc(alias = "get_pattern")]
270 pub fn pattern(&self) -> crate::GString {
271 unsafe { from_glib_none(ffi::g_regex_get_pattern(self.to_glib_none().0)) }
272 }
273
274 //#[doc(alias = "g_regex_replace_eval")]
275 //pub fn replace_eval(&self, string: &[&str], start_position: i32, match_options: RegexMatchFlags, eval: /*Unimplemented*/FnMut(&MatchInfo, /*Ignored*/String) -> bool, user_data: /*Unimplemented*/Option<Basic: Pointer>) -> Result<crate::GString, crate::Error> {
276 // unsafe { TODO: call ffi:g_regex_replace_eval() }
277 //}
278}