glib/regex.rs
1// Take a look at the license at the top of the repository in the LICENSE file.
2
3// rustdoc-stripper-ignore-next
4//! This module is inefficient and should not be used by Rust programs except for
5//! compatibility with GLib.Regex based APIs.
6
7use crate::{
8 ffi, translate::*, GStr, GStringPtr, MatchInfo, PtrSlice, Regex, RegexCompileFlags,
9 RegexMatchFlags,
10};
11use std::{mem, ptr};
12
13impl Regex {
14 /// Retrieves the number of the subexpression named @name.
15 /// ## `name`
16 /// name of the subexpression
17 ///
18 /// # Returns
19 ///
20 /// The number of the subexpression or -1 if @name
21 /// does not exists
22 // rustdoc-stripper-ignore-next-stop
23 /// Retrieves the number of the subexpression named @name.
24 /// ## `name`
25 /// name of the subexpression
26 ///
27 /// # Returns
28 ///
29 /// The number of the subexpression or -1 if @name
30 /// does not exists
31 #[doc(alias = "g_regex_get_string_number")]
32 #[doc(alias = "get_string_number")]
33 pub fn string_number(&self, name: impl IntoGStr) -> i32 {
34 name.run_with_gstr(|name| unsafe {
35 ffi::g_regex_get_string_number(self.to_glib_none().0, name.to_glib_none().0)
36 })
37 }
38
39 /// Escapes the nul characters in @string to "\x00". It can be used
40 /// to compile a regex with embedded nul characters.
41 ///
42 /// For completeness, @length can be -1 for a nul-terminated string.
43 /// In this case the output string will be of course equal to @string.
44 /// ## `string`
45 /// the string to escape
46 /// ## `length`
47 /// the length of @string
48 ///
49 /// # Returns
50 ///
51 /// a newly-allocated escaped string
52 // rustdoc-stripper-ignore-next-stop
53 /// Escapes the nul characters in @string to "\x00". It can be used
54 /// to compile a regex with embedded nul characters.
55 ///
56 /// For completeness, @length can be -1 for a nul-terminated string.
57 /// In this case the output string will be of course equal to @string.
58 /// ## `string`
59 /// the string to escape
60 /// ## `length`
61 /// the length of @string
62 ///
63 /// # Returns
64 ///
65 /// a newly-allocated escaped string
66 #[doc(alias = "g_regex_escape_nul")]
67 pub fn escape_nul(string: impl IntoGStr) -> crate::GString {
68 unsafe {
69 string.run_with_gstr(|string| {
70 from_glib_full(ffi::g_regex_escape_nul(
71 string.to_glib_none().0,
72 string.len() as _,
73 ))
74 })
75 }
76 }
77
78 /// Escapes the special characters used for regular expressions
79 /// in @string, for instance "a.b*c" becomes "a\.b\*c". This
80 /// function is useful to dynamically generate regular expressions.
81 ///
82 /// @string can contain nul characters that are replaced with "\0",
83 /// in this case remember to specify the correct length of @string
84 /// in @length.
85 /// ## `string`
86 /// the string to escape
87 /// ## `length`
88 /// the length of @string, in bytes, or -1 if @string is nul-terminated
89 ///
90 /// # Returns
91 ///
92 /// a newly-allocated escaped string
93 // rustdoc-stripper-ignore-next-stop
94 /// Escapes the special characters used for regular expressions
95 /// in @string, for instance "a.b*c" becomes "a\.b\*c". This
96 /// function is useful to dynamically generate regular expressions.
97 ///
98 /// @string can contain nul characters that are replaced with "\0",
99 /// in this case remember to specify the correct length of @string
100 /// in @length.
101 /// ## `string`
102 /// the string to escape
103 /// ## `length`
104 /// the length of @string, in bytes, or -1 if @string is nul-terminated
105 ///
106 /// # Returns
107 ///
108 /// a newly-allocated escaped string
109 #[doc(alias = "g_regex_escape_string")]
110 pub fn escape_string(string: impl IntoGStr) -> crate::GString {
111 unsafe {
112 string.run_with_gstr(|string| {
113 from_glib_full(ffi::g_regex_escape_string(
114 string.to_glib_none().0,
115 string.len() as _,
116 ))
117 })
118 }
119 }
120
121 /// Checks whether @replacement is a valid replacement string
122 /// (see g_regex_replace()), i.e. that all escape sequences in
123 /// it are valid.
124 ///
125 /// If @has_references is not [`None`] then @replacement is checked
126 /// for pattern references. For instance, replacement text 'foo\n'
127 /// does not contain references and may be evaluated without information
128 /// about actual match, but '\0\1' (whole match followed by first
129 /// subpattern) requires valid #GMatchInfo object.
130 /// ## `replacement`
131 /// the replacement string
132 ///
133 /// # Returns
134 ///
135 /// whether @replacement is a valid replacement string
136 ///
137 /// ## `has_references`
138 /// location to store information about
139 /// references in @replacement or [`None`]
140 // rustdoc-stripper-ignore-next-stop
141 /// Checks whether @replacement is a valid replacement string
142 /// (see g_regex_replace()), i.e. that all escape sequences in
143 /// it are valid.
144 ///
145 /// If @has_references is not [`None`] then @replacement is checked
146 /// for pattern references. For instance, replacement text 'foo\n'
147 /// does not contain references and may be evaluated without information
148 /// about actual match, but '\0\1' (whole match followed by first
149 /// subpattern) requires valid #GMatchInfo object.
150 /// ## `replacement`
151 /// the replacement string
152 ///
153 /// # Returns
154 ///
155 /// whether @replacement is a valid replacement string
156 ///
157 /// ## `has_references`
158 /// location to store information about
159 /// references in @replacement or [`None`]
160 #[doc(alias = "g_regex_check_replacement")]
161 pub fn check_replacement(replacement: impl IntoGStr) -> Result<bool, crate::Error> {
162 replacement.run_with_gstr(|replacement| unsafe {
163 let mut has_references = mem::MaybeUninit::uninit();
164 let mut error = ptr::null_mut();
165 let is_ok = ffi::g_regex_check_replacement(
166 replacement.to_glib_none().0,
167 has_references.as_mut_ptr(),
168 &mut error,
169 );
170 debug_assert_eq!(is_ok == crate::ffi::GFALSE, !error.is_null());
171 if error.is_null() {
172 Ok(from_glib(has_references.assume_init()))
173 } else {
174 Err(from_glib_full(error))
175 }
176 })
177 }
178
179 /// Scans for a match in @string for @pattern.
180 ///
181 /// This function is equivalent to g_regex_match() but it does not
182 /// require to compile the pattern with g_regex_new(), avoiding some
183 /// lines of code when you need just to do a match without extracting
184 /// substrings, capture counts, and so on.
185 ///
186 /// If this function is to be called on the same @pattern more than
187 /// once, it's more efficient to compile the pattern once with
188 /// g_regex_new() and then use g_regex_match().
189 /// ## `pattern`
190 /// the regular expression
191 /// ## `string`
192 /// the string to scan for matches
193 /// ## `compile_options`
194 /// compile options for the regular expression, or 0
195 /// ## `match_options`
196 /// match options, or 0
197 ///
198 /// # Returns
199 ///
200 /// [`true`] if the string matched, [`false`] otherwise
201 // rustdoc-stripper-ignore-next-stop
202 /// Scans for a match in @string for @pattern.
203 ///
204 /// This function is equivalent to g_regex_match() but it does not
205 /// require to compile the pattern with g_regex_new(), avoiding some
206 /// lines of code when you need just to do a match without extracting
207 /// substrings, capture counts, and so on.
208 ///
209 /// If this function is to be called on the same @pattern more than
210 /// once, it's more efficient to compile the pattern once with
211 /// g_regex_new() and then use g_regex_match().
212 /// ## `pattern`
213 /// the regular expression
214 /// ## `string`
215 /// the string to scan for matches
216 /// ## `compile_options`
217 /// compile options for the regular expression, or 0
218 /// ## `match_options`
219 /// match options, or 0
220 ///
221 /// # Returns
222 ///
223 /// [`true`] if the string matched, [`false`] otherwise
224 #[doc(alias = "g_regex_match_simple")]
225 pub fn match_simple(
226 pattern: impl IntoGStr,
227 string: impl IntoGStr,
228 compile_options: RegexCompileFlags,
229 match_options: RegexMatchFlags,
230 ) -> bool {
231 pattern.run_with_gstr(|pattern| {
232 string.run_with_gstr(|string| unsafe {
233 from_glib(ffi::g_regex_match_simple(
234 pattern.to_glib_none().0,
235 string.to_glib_none().0,
236 compile_options.into_glib(),
237 match_options.into_glib(),
238 ))
239 })
240 })
241 }
242
243 /// Replaces all occurrences of the pattern in @self with the
244 /// replacement text. Backreferences of the form `\number` or
245 /// `\g<number>` in the replacement text are interpolated by the
246 /// number-th captured subexpression of the match, `\g<name>` refers
247 /// to the captured subexpression with the given name. `\0` refers
248 /// to the complete match, but `\0` followed by a number is the octal
249 /// representation of a character. To include a literal `\` in the
250 /// replacement, write `\\\\`.
251 ///
252 /// There are also escapes that changes the case of the following text:
253 ///
254 /// - \l: Convert to lower case the next character
255 /// - \u: Convert to upper case the next character
256 /// - \L: Convert to lower case till \E
257 /// - \U: Convert to upper case till \E
258 /// - \E: End case modification
259 ///
260 /// If you do not need to use backreferences use g_regex_replace_literal().
261 ///
262 /// The @replacement string must be UTF-8 encoded even if [`RegexCompileFlags::RAW`][crate::RegexCompileFlags::RAW] was
263 /// passed to g_regex_new(). If you want to use not UTF-8 encoded strings
264 /// you can use g_regex_replace_literal().
265 ///
266 /// Setting @start_position differs from just passing over a shortened
267 /// string and setting [`RegexMatchFlags::NOTBOL`][crate::RegexMatchFlags::NOTBOL] in the case of a pattern that
268 /// begins with any kind of lookbehind assertion, such as "\b".
269 /// ## `string`
270 /// the string to perform matches against
271 /// ## `start_position`
272 /// starting index of the string to match, in bytes
273 /// ## `replacement`
274 /// text to replace each match with
275 /// ## `match_options`
276 /// options for the match
277 ///
278 /// # Returns
279 ///
280 /// a newly allocated string containing the replacements
281 // rustdoc-stripper-ignore-next-stop
282 /// Replaces all occurrences of the pattern in @self with the
283 /// replacement text. Backreferences of the form `\number` or
284 /// `\g<number>` in the replacement text are interpolated by the
285 /// number-th captured subexpression of the match, `\g<name>` refers
286 /// to the captured subexpression with the given name. `\0` refers
287 /// to the complete match, but `\0` followed by a number is the octal
288 /// representation of a character. To include a literal `\` in the
289 /// replacement, write `\\\\`.
290 ///
291 /// There are also escapes that changes the case of the following text:
292 ///
293 /// - \l: Convert to lower case the next character
294 /// - \u: Convert to upper case the next character
295 /// - \L: Convert to lower case till \E
296 /// - \U: Convert to upper case till \E
297 /// - \E: End case modification
298 ///
299 /// If you do not need to use backreferences use g_regex_replace_literal().
300 ///
301 /// The @replacement string must be UTF-8 encoded even if [`RegexCompileFlags::RAW`][crate::RegexCompileFlags::RAW] was
302 /// passed to g_regex_new(). If you want to use not UTF-8 encoded strings
303 /// you can use g_regex_replace_literal().
304 ///
305 /// Setting @start_position differs from just passing over a shortened
306 /// string and setting [`RegexMatchFlags::NOTBOL`][crate::RegexMatchFlags::NOTBOL] in the case of a pattern that
307 /// begins with any kind of lookbehind assertion, such as "\b".
308 /// ## `string`
309 /// the string to perform matches against
310 /// ## `start_position`
311 /// starting index of the string to match, in bytes
312 /// ## `replacement`
313 /// text to replace each match with
314 /// ## `match_options`
315 /// options for the match
316 ///
317 /// # Returns
318 ///
319 /// a newly allocated string containing the replacements
320 #[doc(alias = "g_regex_replace")]
321 pub fn replace(
322 &self,
323 string: impl IntoGStr,
324 start_position: i32,
325 replacement: impl IntoGStr,
326 match_options: RegexMatchFlags,
327 ) -> Result<crate::GString, crate::Error> {
328 unsafe {
329 string.run_with_gstr(|string| {
330 replacement.run_with_gstr(|replacement| {
331 let mut error = ptr::null_mut();
332 let ret = ffi::g_regex_replace(
333 self.to_glib_none().0,
334 string.as_ptr() as *const _,
335 string.len() as _,
336 start_position,
337 replacement.to_glib_none().0,
338 match_options.into_glib(),
339 &mut error,
340 );
341 debug_assert_eq!(ret.is_null(), !error.is_null());
342 if error.is_null() {
343 Ok(from_glib_full(ret))
344 } else {
345 Err(from_glib_full(error))
346 }
347 })
348 })
349 }
350 }
351
352 /// Using the standard algorithm for regular expression matching only
353 /// the longest match in the string is retrieved. This function uses
354 /// a different algorithm so it can retrieve all the possible matches.
355 /// For more documentation see g_regex_match_all_full().
356 ///
357 /// A #GMatchInfo structure, used to get information on the match, is
358 /// stored in @match_info if not [`None`]. Note that if @match_info is
359 /// not [`None`] then it is created even if the function returns [`false`],
360 /// i.e. you must free it regardless if regular expression actually
361 /// matched.
362 ///
363 /// @string is not copied and is used in #GMatchInfo internally. If
364 /// you use any #GMatchInfo method (except g_match_info_free()) after
365 /// freeing or modifying @string then the behaviour is undefined.
366 /// ## `string`
367 /// the string to scan for matches
368 /// ## `match_options`
369 /// match options
370 ///
371 /// # Returns
372 ///
373 /// [`true`] is the string matched, [`false`] otherwise
374 ///
375 /// ## `match_info`
376 /// pointer to location where to store
377 /// the #GMatchInfo, or [`None`] if you do not need it
378 // rustdoc-stripper-ignore-next-stop
379 /// Using the standard algorithm for regular expression matching only
380 /// the longest match in the string is retrieved. This function uses
381 /// a different algorithm so it can retrieve all the possible matches.
382 /// For more documentation see g_regex_match_all_full().
383 ///
384 /// A #GMatchInfo structure, used to get information on the match, is
385 /// stored in @match_info if not [`None`]. Note that if @match_info is
386 /// not [`None`] then it is created even if the function returns [`false`],
387 /// i.e. you must free it regardless if regular expression actually
388 /// matched.
389 ///
390 /// @string is not copied and is used in #GMatchInfo internally. If
391 /// you use any #GMatchInfo method (except g_match_info_free()) after
392 /// freeing or modifying @string then the behaviour is undefined.
393 /// ## `string`
394 /// the string to scan for matches
395 /// ## `match_options`
396 /// match options
397 ///
398 /// # Returns
399 ///
400 /// [`true`] is the string matched, [`false`] otherwise
401 ///
402 /// ## `match_info`
403 /// pointer to location where to store
404 /// the #GMatchInfo, or [`None`] if you do not need it
405 #[doc(alias = "g_regex_match_all")]
406 pub fn match_all<'input>(
407 &self,
408 string: &'input GStr,
409 match_options: RegexMatchFlags,
410 ) -> Result<MatchInfo<'input>, crate::Error> {
411 self.match_all_full(string, 0, match_options)
412 }
413
414 /// Using the standard algorithm for regular expression matching only
415 /// the longest match in the @string is retrieved, it is not possible
416 /// to obtain all the available matches. For instance matching
417 /// `"<a> <b> <c>"` against the pattern `"<.*>"`
418 /// you get `"<a> <b> <c>"`.
419 ///
420 /// This function uses a different algorithm (called DFA, i.e. deterministic
421 /// finite automaton), so it can retrieve all the possible matches, all
422 /// starting at the same point in the string. For instance matching
423 /// `"<a> <b> <c>"` against the pattern `"<.*>"`
424 /// you would obtain three matches: `"<a> <b> <c>"`,
425 /// `"<a> <b>"` and `"<a>"`.
426 ///
427 /// The number of matched strings is retrieved using
428 /// g_match_info_get_match_count(). To obtain the matched strings and
429 /// their position you can use, respectively, g_match_info_fetch() and
430 /// g_match_info_fetch_pos(). Note that the strings are returned in
431 /// reverse order of length; that is, the longest matching string is
432 /// given first.
433 ///
434 /// Note that the DFA algorithm is slower than the standard one and it
435 /// is not able to capture substrings, so backreferences do not work.
436 ///
437 /// Setting @start_position differs from just passing over a shortened
438 /// string and setting [`RegexMatchFlags::NOTBOL`][crate::RegexMatchFlags::NOTBOL] in the case of a pattern
439 /// that begins with any kind of lookbehind assertion, such as "\b".
440 ///
441 /// Unless [`RegexCompileFlags::RAW`][crate::RegexCompileFlags::RAW] is specified in the options, @string must be valid UTF-8.
442 ///
443 /// A #GMatchInfo structure, used to get information on the match, is
444 /// stored in @match_info if not [`None`]. Note that if @match_info is
445 /// not [`None`] then it is created even if the function returns [`false`],
446 /// i.e. you must free it regardless if regular expression actually
447 /// matched.
448 ///
449 /// @string is not copied and is used in #GMatchInfo internally. If
450 /// you use any #GMatchInfo method (except g_match_info_free()) after
451 /// freeing or modifying @string then the behaviour is undefined.
452 /// ## `string`
453 /// the string to scan for matches
454 /// ## `start_position`
455 /// starting index of the string to match, in bytes
456 /// ## `match_options`
457 /// match options
458 ///
459 /// # Returns
460 ///
461 /// [`true`] is the string matched, [`false`] otherwise
462 ///
463 /// ## `match_info`
464 /// pointer to location where to store
465 /// the #GMatchInfo, or [`None`] if you do not need it
466 // rustdoc-stripper-ignore-next-stop
467 /// Using the standard algorithm for regular expression matching only
468 /// the longest match in the @string is retrieved, it is not possible
469 /// to obtain all the available matches. For instance matching
470 /// `"<a> <b> <c>"` against the pattern `"<.*>"`
471 /// you get `"<a> <b> <c>"`.
472 ///
473 /// This function uses a different algorithm (called DFA, i.e. deterministic
474 /// finite automaton), so it can retrieve all the possible matches, all
475 /// starting at the same point in the string. For instance matching
476 /// `"<a> <b> <c>"` against the pattern `"<.*>"`
477 /// you would obtain three matches: `"<a> <b> <c>"`,
478 /// `"<a> <b>"` and `"<a>"`.
479 ///
480 /// The number of matched strings is retrieved using
481 /// g_match_info_get_match_count(). To obtain the matched strings and
482 /// their position you can use, respectively, g_match_info_fetch() and
483 /// g_match_info_fetch_pos(). Note that the strings are returned in
484 /// reverse order of length; that is, the longest matching string is
485 /// given first.
486 ///
487 /// Note that the DFA algorithm is slower than the standard one and it
488 /// is not able to capture substrings, so backreferences do not work.
489 ///
490 /// Setting @start_position differs from just passing over a shortened
491 /// string and setting [`RegexMatchFlags::NOTBOL`][crate::RegexMatchFlags::NOTBOL] in the case of a pattern
492 /// that begins with any kind of lookbehind assertion, such as "\b".
493 ///
494 /// Unless [`RegexCompileFlags::RAW`][crate::RegexCompileFlags::RAW] is specified in the options, @string must be valid UTF-8.
495 ///
496 /// A #GMatchInfo structure, used to get information on the match, is
497 /// stored in @match_info if not [`None`]. Note that if @match_info is
498 /// not [`None`] then it is created even if the function returns [`false`],
499 /// i.e. you must free it regardless if regular expression actually
500 /// matched.
501 ///
502 /// @string is not copied and is used in #GMatchInfo internally. If
503 /// you use any #GMatchInfo method (except g_match_info_free()) after
504 /// freeing or modifying @string then the behaviour is undefined.
505 /// ## `string`
506 /// the string to scan for matches
507 /// ## `start_position`
508 /// starting index of the string to match, in bytes
509 /// ## `match_options`
510 /// match options
511 ///
512 /// # Returns
513 ///
514 /// [`true`] is the string matched, [`false`] otherwise
515 ///
516 /// ## `match_info`
517 /// pointer to location where to store
518 /// the #GMatchInfo, or [`None`] if you do not need it
519 #[doc(alias = "g_regex_match_all_full")]
520 pub fn match_all_full<'input>(
521 &self,
522 string: &'input GStr,
523 start_position: i32,
524 match_options: RegexMatchFlags,
525 ) -> Result<MatchInfo<'input>, crate::Error> {
526 unsafe {
527 let mut match_info = ptr::null_mut();
528 let mut error = ptr::null_mut();
529 let res = ffi::g_regex_match_all_full(
530 self.to_glib_none().0,
531 string.to_glib_none().0,
532 string.len() as _,
533 start_position,
534 match_options.into_glib(),
535 &mut match_info,
536 &mut error,
537 );
538 if error.is_null() {
539 let match_info = MatchInfo::from_glib_full(match_info);
540 debug_assert_eq!(match_info.matches(), from_glib(res));
541 Ok(match_info)
542 } else {
543 debug_assert!(match_info.is_null());
544 Err(from_glib_full(error))
545 }
546 }
547 }
548
549 /// Scans for a match in @string for the pattern in @self.
550 /// The @match_options are combined with the match options specified
551 /// when the @self structure was created, letting you have more
552 /// flexibility in reusing #GRegex structures.
553 ///
554 /// Unless [`RegexCompileFlags::RAW`][crate::RegexCompileFlags::RAW] is specified in the options, @string must be valid UTF-8.
555 ///
556 /// A #GMatchInfo structure, used to get information on the match,
557 /// is stored in @match_info if not [`None`]. Note that if @match_info
558 /// is not [`None`] then it is created even if the function returns [`false`],
559 /// i.e. you must free it regardless if regular expression actually matched.
560 ///
561 /// To retrieve all the non-overlapping matches of the pattern in
562 /// string you can use g_match_info_next().
563 ///
564 ///
565 ///
566 /// **⚠️ The following code is in C ⚠️**
567 ///
568 /// ```C
569 /// static void
570 /// print_uppercase_words (const gchar *string)
571 /// {
572 /// // Print all uppercase-only words.
573 /// GRegex *regex;
574 /// GMatchInfo *match_info;
575 ///
576 /// regex = g_regex_new ("[A-Z]+", G_REGEX_DEFAULT, G_REGEX_MATCH_DEFAULT, NULL);
577 /// g_regex_match (regex, string, 0, &match_info);
578 /// while (g_match_info_matches (match_info))
579 /// {
580 /// gchar *word = g_match_info_fetch (match_info, 0);
581 /// g_print ("Found: %s\n", word);
582 /// g_free (word);
583 /// g_match_info_next (match_info, NULL);
584 /// }
585 /// g_match_info_free (match_info);
586 /// g_regex_unref (regex);
587 /// }
588 /// ```
589 ///
590 /// @string is not copied and is used in #GMatchInfo internally. If
591 /// you use any #GMatchInfo method (except g_match_info_free()) after
592 /// freeing or modifying @string then the behaviour is undefined.
593 /// ## `string`
594 /// the string to scan for matches
595 /// ## `match_options`
596 /// match options
597 ///
598 /// # Returns
599 ///
600 /// [`true`] is the string matched, [`false`] otherwise
601 ///
602 /// ## `match_info`
603 /// pointer to location where to store
604 /// the #GMatchInfo, or [`None`] if you do not need it
605 // rustdoc-stripper-ignore-next-stop
606 /// Scans for a match in @string for the pattern in @self.
607 /// The @match_options are combined with the match options specified
608 /// when the @self structure was created, letting you have more
609 /// flexibility in reusing #GRegex structures.
610 ///
611 /// Unless [`RegexCompileFlags::RAW`][crate::RegexCompileFlags::RAW] is specified in the options, @string must be valid UTF-8.
612 ///
613 /// A #GMatchInfo structure, used to get information on the match,
614 /// is stored in @match_info if not [`None`]. Note that if @match_info
615 /// is not [`None`] then it is created even if the function returns [`false`],
616 /// i.e. you must free it regardless if regular expression actually matched.
617 ///
618 /// To retrieve all the non-overlapping matches of the pattern in
619 /// string you can use g_match_info_next().
620 ///
621 ///
622 ///
623 /// **⚠️ The following code is in C ⚠️**
624 ///
625 /// ```C
626 /// static void
627 /// print_uppercase_words (const gchar *string)
628 /// {
629 /// // Print all uppercase-only words.
630 /// GRegex *regex;
631 /// GMatchInfo *match_info;
632 ///
633 /// regex = g_regex_new ("[A-Z]+", G_REGEX_DEFAULT, G_REGEX_MATCH_DEFAULT, NULL);
634 /// g_regex_match (regex, string, 0, &match_info);
635 /// while (g_match_info_matches (match_info))
636 /// {
637 /// gchar *word = g_match_info_fetch (match_info, 0);
638 /// g_print ("Found: %s\n", word);
639 /// g_free (word);
640 /// g_match_info_next (match_info, NULL);
641 /// }
642 /// g_match_info_free (match_info);
643 /// g_regex_unref (regex);
644 /// }
645 /// ```
646 ///
647 /// @string is not copied and is used in #GMatchInfo internally. If
648 /// you use any #GMatchInfo method (except g_match_info_free()) after
649 /// freeing or modifying @string then the behaviour is undefined.
650 /// ## `string`
651 /// the string to scan for matches
652 /// ## `match_options`
653 /// match options
654 ///
655 /// # Returns
656 ///
657 /// [`true`] is the string matched, [`false`] otherwise
658 ///
659 /// ## `match_info`
660 /// pointer to location where to store
661 /// the #GMatchInfo, or [`None`] if you do not need it
662 #[doc(alias = "g_regex_match")]
663 pub fn match_<'input>(
664 &self,
665 string: &'input GStr,
666 match_options: RegexMatchFlags,
667 ) -> Result<MatchInfo<'input>, crate::Error> {
668 self.match_full(string, 0, match_options)
669 }
670
671 /// Scans for a match in @string for the pattern in @self.
672 /// The @match_options are combined with the match options specified
673 /// when the @self structure was created, letting you have more
674 /// flexibility in reusing #GRegex structures.
675 ///
676 /// Setting @start_position differs from just passing over a shortened
677 /// string and setting [`RegexMatchFlags::NOTBOL`][crate::RegexMatchFlags::NOTBOL] in the case of a pattern
678 /// that begins with any kind of lookbehind assertion, such as "\b".
679 ///
680 /// Unless [`RegexCompileFlags::RAW`][crate::RegexCompileFlags::RAW] is specified in the options, @string must be valid UTF-8.
681 ///
682 /// A #GMatchInfo structure, used to get information on the match, is
683 /// stored in @match_info if not [`None`]. Note that if @match_info is
684 /// not [`None`] then it is created even if the function returns [`false`],
685 /// i.e. you must free it regardless if regular expression actually
686 /// matched.
687 ///
688 /// @string is not copied and is used in #GMatchInfo internally. If
689 /// you use any #GMatchInfo method (except g_match_info_free()) after
690 /// freeing or modifying @string then the behaviour is undefined.
691 ///
692 /// To retrieve all the non-overlapping matches of the pattern in
693 /// string you can use g_match_info_next().
694 ///
695 ///
696 ///
697 /// **⚠️ The following code is in C ⚠️**
698 ///
699 /// ```C
700 /// static void
701 /// print_uppercase_words (const gchar *string)
702 /// {
703 /// // Print all uppercase-only words.
704 /// GRegex *regex;
705 /// GMatchInfo *match_info;
706 /// GError *error = NULL;
707 ///
708 /// regex = g_regex_new ("[A-Z]+", G_REGEX_DEFAULT, G_REGEX_MATCH_DEFAULT, NULL);
709 /// g_regex_match_full (regex, string, -1, 0, 0, &match_info, &error);
710 /// while (g_match_info_matches (match_info))
711 /// {
712 /// gchar *word = g_match_info_fetch (match_info, 0);
713 /// g_print ("Found: %s\n", word);
714 /// g_free (word);
715 /// g_match_info_next (match_info, &error);
716 /// }
717 /// g_match_info_free (match_info);
718 /// g_regex_unref (regex);
719 /// if (error != NULL)
720 /// {
721 /// g_printerr ("Error while matching: %s\n", error->message);
722 /// g_error_free (error);
723 /// }
724 /// }
725 /// ```
726 /// ## `string`
727 /// the string to scan for matches
728 /// ## `start_position`
729 /// starting index of the string to match, in bytes
730 /// ## `match_options`
731 /// match options
732 ///
733 /// # Returns
734 ///
735 /// [`true`] is the string matched, [`false`] otherwise
736 ///
737 /// ## `match_info`
738 /// pointer to location where to store
739 /// the #GMatchInfo, or [`None`] if you do not need it
740 // rustdoc-stripper-ignore-next-stop
741 /// Scans for a match in @string for the pattern in @self.
742 /// The @match_options are combined with the match options specified
743 /// when the @self structure was created, letting you have more
744 /// flexibility in reusing #GRegex structures.
745 ///
746 /// Setting @start_position differs from just passing over a shortened
747 /// string and setting [`RegexMatchFlags::NOTBOL`][crate::RegexMatchFlags::NOTBOL] in the case of a pattern
748 /// that begins with any kind of lookbehind assertion, such as "\b".
749 ///
750 /// Unless [`RegexCompileFlags::RAW`][crate::RegexCompileFlags::RAW] is specified in the options, @string must be valid UTF-8.
751 ///
752 /// A #GMatchInfo structure, used to get information on the match, is
753 /// stored in @match_info if not [`None`]. Note that if @match_info is
754 /// not [`None`] then it is created even if the function returns [`false`],
755 /// i.e. you must free it regardless if regular expression actually
756 /// matched.
757 ///
758 /// @string is not copied and is used in #GMatchInfo internally. If
759 /// you use any #GMatchInfo method (except g_match_info_free()) after
760 /// freeing or modifying @string then the behaviour is undefined.
761 ///
762 /// To retrieve all the non-overlapping matches of the pattern in
763 /// string you can use g_match_info_next().
764 ///
765 ///
766 ///
767 /// **⚠️ The following code is in C ⚠️**
768 ///
769 /// ```C
770 /// static void
771 /// print_uppercase_words (const gchar *string)
772 /// {
773 /// // Print all uppercase-only words.
774 /// GRegex *regex;
775 /// GMatchInfo *match_info;
776 /// GError *error = NULL;
777 ///
778 /// regex = g_regex_new ("[A-Z]+", G_REGEX_DEFAULT, G_REGEX_MATCH_DEFAULT, NULL);
779 /// g_regex_match_full (regex, string, -1, 0, 0, &match_info, &error);
780 /// while (g_match_info_matches (match_info))
781 /// {
782 /// gchar *word = g_match_info_fetch (match_info, 0);
783 /// g_print ("Found: %s\n", word);
784 /// g_free (word);
785 /// g_match_info_next (match_info, &error);
786 /// }
787 /// g_match_info_free (match_info);
788 /// g_regex_unref (regex);
789 /// if (error != NULL)
790 /// {
791 /// g_printerr ("Error while matching: %s\n", error->message);
792 /// g_error_free (error);
793 /// }
794 /// }
795 /// ```
796 /// ## `string`
797 /// the string to scan for matches
798 /// ## `start_position`
799 /// starting index of the string to match, in bytes
800 /// ## `match_options`
801 /// match options
802 ///
803 /// # Returns
804 ///
805 /// [`true`] is the string matched, [`false`] otherwise
806 ///
807 /// ## `match_info`
808 /// pointer to location where to store
809 /// the #GMatchInfo, or [`None`] if you do not need it
810 #[doc(alias = "g_regex_match_full")]
811 pub fn match_full<'input>(
812 &self,
813 string: &'input GStr,
814 start_position: i32,
815 match_options: RegexMatchFlags,
816 ) -> Result<MatchInfo<'input>, crate::Error> {
817 unsafe {
818 let mut match_info = ptr::null_mut();
819 let mut error = ptr::null_mut();
820 let res = ffi::g_regex_match_full(
821 self.to_glib_none().0,
822 string.to_glib_none().0,
823 string.len() as _,
824 start_position,
825 match_options.into_glib(),
826 &mut match_info,
827 &mut error,
828 );
829 if error.is_null() {
830 let match_info = MatchInfo::from_glib_full(match_info);
831 debug_assert_eq!(match_info.matches(), from_glib(res));
832 Ok(match_info)
833 } else {
834 debug_assert!(match_info.is_null());
835 Err(from_glib_full(error))
836 }
837 }
838 }
839
840 /// Replaces all occurrences of the pattern in @self with the
841 /// replacement text. @replacement is replaced literally, to
842 /// include backreferences use g_regex_replace().
843 ///
844 /// Setting @start_position differs from just passing over a
845 /// shortened string and setting [`RegexMatchFlags::NOTBOL`][crate::RegexMatchFlags::NOTBOL] in the
846 /// case of a pattern that begins with any kind of lookbehind
847 /// assertion, such as "\b".
848 /// ## `string`
849 /// the string to perform matches against
850 /// ## `start_position`
851 /// starting index of the string to match, in bytes
852 /// ## `replacement`
853 /// text to replace each match with
854 /// ## `match_options`
855 /// options for the match
856 ///
857 /// # Returns
858 ///
859 /// a newly allocated string containing the replacements
860 // rustdoc-stripper-ignore-next-stop
861 /// Replaces all occurrences of the pattern in @self with the
862 /// replacement text. @replacement is replaced literally, to
863 /// include backreferences use g_regex_replace().
864 ///
865 /// Setting @start_position differs from just passing over a
866 /// shortened string and setting [`RegexMatchFlags::NOTBOL`][crate::RegexMatchFlags::NOTBOL] in the
867 /// case of a pattern that begins with any kind of lookbehind
868 /// assertion, such as "\b".
869 /// ## `string`
870 /// the string to perform matches against
871 /// ## `start_position`
872 /// starting index of the string to match, in bytes
873 /// ## `replacement`
874 /// text to replace each match with
875 /// ## `match_options`
876 /// options for the match
877 ///
878 /// # Returns
879 ///
880 /// a newly allocated string containing the replacements
881 #[doc(alias = "g_regex_replace_literal")]
882 pub fn replace_literal(
883 &self,
884 string: impl IntoGStr,
885 start_position: i32,
886 replacement: impl IntoGStr,
887 match_options: RegexMatchFlags,
888 ) -> Result<crate::GString, crate::Error> {
889 unsafe {
890 string.run_with_gstr(|string| {
891 replacement.run_with_gstr(|replacement| {
892 let mut error = ptr::null_mut();
893 let ret = ffi::g_regex_replace_literal(
894 self.to_glib_none().0,
895 string.to_glib_none().0,
896 string.len() as _,
897 start_position,
898 replacement.to_glib_none().0,
899 match_options.into_glib(),
900 &mut error,
901 );
902 debug_assert_eq!(ret.is_null(), !error.is_null());
903 if error.is_null() {
904 Ok(from_glib_full(ret))
905 } else {
906 Err(from_glib_full(error))
907 }
908 })
909 })
910 }
911 }
912
913 /// Breaks the string on the pattern, and returns an array of the tokens.
914 /// If the pattern contains capturing parentheses, then the text for each
915 /// of the substrings will also be returned. If the pattern does not match
916 /// anywhere in the string, then the whole string is returned as the first
917 /// token.
918 ///
919 /// As a special case, the result of splitting the empty string "" is an
920 /// empty vector, not a vector containing a single string. The reason for
921 /// this special case is that being able to represent an empty vector is
922 /// typically more useful than consistent handling of empty elements. If
923 /// you do need to represent empty elements, you'll need to check for the
924 /// empty string before calling this function.
925 ///
926 /// A pattern that can match empty strings splits @string into separate
927 /// characters wherever it matches the empty string between characters.
928 /// For example splitting "ab c" using as a separator "\s*", you will get
929 /// "a", "b" and "c".
930 /// ## `string`
931 /// the string to split with the pattern
932 /// ## `match_options`
933 /// match time option flags
934 ///
935 /// # Returns
936 ///
937 /// a [`None`]-terminated gchar ** array. Free
938 /// it using g_strfreev()
939 // rustdoc-stripper-ignore-next-stop
940 /// Breaks the string on the pattern, and returns an array of the tokens.
941 /// If the pattern contains capturing parentheses, then the text for each
942 /// of the substrings will also be returned. If the pattern does not match
943 /// anywhere in the string, then the whole string is returned as the first
944 /// token.
945 ///
946 /// As a special case, the result of splitting the empty string "" is an
947 /// empty vector, not a vector containing a single string. The reason for
948 /// this special case is that being able to represent an empty vector is
949 /// typically more useful than consistent handling of empty elements. If
950 /// you do need to represent empty elements, you'll need to check for the
951 /// empty string before calling this function.
952 ///
953 /// A pattern that can match empty strings splits @string into separate
954 /// characters wherever it matches the empty string between characters.
955 /// For example splitting "ab c" using as a separator "\s*", you will get
956 /// "a", "b" and "c".
957 /// ## `string`
958 /// the string to split with the pattern
959 /// ## `match_options`
960 /// match time option flags
961 ///
962 /// # Returns
963 ///
964 /// a [`None`]-terminated gchar ** array. Free
965 /// it using g_strfreev()
966 #[doc(alias = "g_regex_split")]
967 pub fn split(
968 &self,
969 string: impl IntoGStr,
970 match_options: RegexMatchFlags,
971 ) -> PtrSlice<GStringPtr> {
972 self.split_full(string, 0, match_options, 0)
973 .unwrap_or_default()
974 }
975
976 /// Breaks the string on the pattern, and returns an array of the tokens.
977 /// If the pattern contains capturing parentheses, then the text for each
978 /// of the substrings will also be returned. If the pattern does not match
979 /// anywhere in the string, then the whole string is returned as the first
980 /// token.
981 ///
982 /// As a special case, the result of splitting the empty string "" is an
983 /// empty vector, not a vector containing a single string. The reason for
984 /// this special case is that being able to represent an empty vector is
985 /// typically more useful than consistent handling of empty elements. If
986 /// you do need to represent empty elements, you'll need to check for the
987 /// empty string before calling this function.
988 ///
989 /// A pattern that can match empty strings splits @string into separate
990 /// characters wherever it matches the empty string between characters.
991 /// For example splitting "ab c" using as a separator "\s*", you will get
992 /// "a", "b" and "c".
993 ///
994 /// Setting @start_position differs from just passing over a shortened
995 /// string and setting [`RegexMatchFlags::NOTBOL`][crate::RegexMatchFlags::NOTBOL] in the case of a pattern
996 /// that begins with any kind of lookbehind assertion, such as "\b".
997 /// ## `string`
998 /// the string to split with the pattern
999 /// ## `start_position`
1000 /// starting index of the string to match, in bytes
1001 /// ## `match_options`
1002 /// match time option flags
1003 /// ## `max_tokens`
1004 /// the maximum number of tokens to split @string into.
1005 /// If this is less than 1, the string is split completely
1006 ///
1007 /// # Returns
1008 ///
1009 /// a [`None`]-terminated gchar ** array. Free
1010 /// it using g_strfreev()
1011 // rustdoc-stripper-ignore-next-stop
1012 /// Breaks the string on the pattern, and returns an array of the tokens.
1013 /// If the pattern contains capturing parentheses, then the text for each
1014 /// of the substrings will also be returned. If the pattern does not match
1015 /// anywhere in the string, then the whole string is returned as the first
1016 /// token.
1017 ///
1018 /// As a special case, the result of splitting the empty string "" is an
1019 /// empty vector, not a vector containing a single string. The reason for
1020 /// this special case is that being able to represent an empty vector is
1021 /// typically more useful than consistent handling of empty elements. If
1022 /// you do need to represent empty elements, you'll need to check for the
1023 /// empty string before calling this function.
1024 ///
1025 /// A pattern that can match empty strings splits @string into separate
1026 /// characters wherever it matches the empty string between characters.
1027 /// For example splitting "ab c" using as a separator "\s*", you will get
1028 /// "a", "b" and "c".
1029 ///
1030 /// Setting @start_position differs from just passing over a shortened
1031 /// string and setting [`RegexMatchFlags::NOTBOL`][crate::RegexMatchFlags::NOTBOL] in the case of a pattern
1032 /// that begins with any kind of lookbehind assertion, such as "\b".
1033 /// ## `string`
1034 /// the string to split with the pattern
1035 /// ## `start_position`
1036 /// starting index of the string to match, in bytes
1037 /// ## `match_options`
1038 /// match time option flags
1039 /// ## `max_tokens`
1040 /// the maximum number of tokens to split @string into.
1041 /// If this is less than 1, the string is split completely
1042 ///
1043 /// # Returns
1044 ///
1045 /// a [`None`]-terminated gchar ** array. Free
1046 /// it using g_strfreev()
1047 #[doc(alias = "g_regex_split_full")]
1048 pub fn split_full(
1049 &self,
1050 string: impl IntoGStr,
1051 start_position: i32,
1052 match_options: RegexMatchFlags,
1053 max_tokens: i32,
1054 ) -> Result<PtrSlice<GStringPtr>, crate::Error> {
1055 unsafe {
1056 let mut error = ptr::null_mut();
1057 string.run_with_gstr(|string| {
1058 let ret = ffi::g_regex_split_full(
1059 self.to_glib_none().0,
1060 string.to_glib_none().0,
1061 string.len() as _,
1062 start_position,
1063 match_options.into_glib(),
1064 max_tokens,
1065 &mut error,
1066 );
1067 debug_assert_eq!(ret.is_null(), !error.is_null());
1068 if error.is_null() {
1069 Ok(FromGlibPtrContainer::from_glib_full(ret))
1070 } else {
1071 Err(from_glib_full(error))
1072 }
1073 })
1074 }
1075 }
1076
1077 /// Breaks the string on the pattern, and returns an array of
1078 /// the tokens. If the pattern contains capturing parentheses,
1079 /// then the text for each of the substrings will also be returned.
1080 /// If the pattern does not match anywhere in the string, then the
1081 /// whole string is returned as the first token.
1082 ///
1083 /// This function is equivalent to g_regex_split() but it does
1084 /// not require to compile the pattern with g_regex_new(), avoiding
1085 /// some lines of code when you need just to do a split without
1086 /// extracting substrings, capture counts, and so on.
1087 ///
1088 /// If this function is to be called on the same @pattern more than
1089 /// once, it's more efficient to compile the pattern once with
1090 /// g_regex_new() and then use g_regex_split().
1091 ///
1092 /// As a special case, the result of splitting the empty string ""
1093 /// is an empty vector, not a vector containing a single string.
1094 /// The reason for this special case is that being able to represent
1095 /// an empty vector is typically more useful than consistent handling
1096 /// of empty elements. If you do need to represent empty elements,
1097 /// you'll need to check for the empty string before calling this
1098 /// function.
1099 ///
1100 /// A pattern that can match empty strings splits @string into
1101 /// separate characters wherever it matches the empty string between
1102 /// characters. For example splitting "ab c" using as a separator
1103 /// "\s*", you will get "a", "b" and "c".
1104 /// ## `pattern`
1105 /// the regular expression
1106 /// ## `string`
1107 /// the string to scan for matches
1108 /// ## `compile_options`
1109 /// compile options for the regular expression, or 0
1110 /// ## `match_options`
1111 /// match options, or 0
1112 ///
1113 /// # Returns
1114 ///
1115 /// a [`None`]-terminated array of strings. Free
1116 /// it using g_strfreev()
1117 // rustdoc-stripper-ignore-next-stop
1118 /// Breaks the string on the pattern, and returns an array of
1119 /// the tokens. If the pattern contains capturing parentheses,
1120 /// then the text for each of the substrings will also be returned.
1121 /// If the pattern does not match anywhere in the string, then the
1122 /// whole string is returned as the first token.
1123 ///
1124 /// This function is equivalent to g_regex_split() but it does
1125 /// not require to compile the pattern with g_regex_new(), avoiding
1126 /// some lines of code when you need just to do a split without
1127 /// extracting substrings, capture counts, and so on.
1128 ///
1129 /// If this function is to be called on the same @pattern more than
1130 /// once, it's more efficient to compile the pattern once with
1131 /// g_regex_new() and then use g_regex_split().
1132 ///
1133 /// As a special case, the result of splitting the empty string ""
1134 /// is an empty vector, not a vector containing a single string.
1135 /// The reason for this special case is that being able to represent
1136 /// an empty vector is typically more useful than consistent handling
1137 /// of empty elements. If you do need to represent empty elements,
1138 /// you'll need to check for the empty string before calling this
1139 /// function.
1140 ///
1141 /// A pattern that can match empty strings splits @string into
1142 /// separate characters wherever it matches the empty string between
1143 /// characters. For example splitting "ab c" using as a separator
1144 /// "\s*", you will get "a", "b" and "c".
1145 /// ## `pattern`
1146 /// the regular expression
1147 /// ## `string`
1148 /// the string to scan for matches
1149 /// ## `compile_options`
1150 /// compile options for the regular expression, or 0
1151 /// ## `match_options`
1152 /// match options, or 0
1153 ///
1154 /// # Returns
1155 ///
1156 /// a [`None`]-terminated array of strings. Free
1157 /// it using g_strfreev()
1158 #[doc(alias = "g_regex_split_simple")]
1159 pub fn split_simple(
1160 pattern: impl IntoGStr,
1161 string: impl IntoGStr,
1162 compile_options: RegexCompileFlags,
1163 match_options: RegexMatchFlags,
1164 ) -> PtrSlice<GStringPtr> {
1165 pattern.run_with_gstr(|pattern| {
1166 string.run_with_gstr(|string| unsafe {
1167 FromGlibPtrContainer::from_glib_full(ffi::g_regex_split_simple(
1168 pattern.to_glib_none().0,
1169 string.to_glib_none().0,
1170 compile_options.into_glib(),
1171 match_options.into_glib(),
1172 ))
1173 })
1174 })
1175 }
1176}
1177
1178#[cfg(test)]
1179mod tests {
1180 use super::*;
1181
1182 #[test]
1183 fn test_replace_literal() {
1184 let regex = Regex::new(
1185 "s[ai]mple",
1186 RegexCompileFlags::OPTIMIZE,
1187 RegexMatchFlags::DEFAULT,
1188 )
1189 .expect("Regex new")
1190 .expect("Null regex");
1191
1192 let quote = "This is a simple sample.";
1193 let result = regex
1194 .replace_literal(quote, 0, "XXX", RegexMatchFlags::DEFAULT)
1195 .expect("regex replace");
1196
1197 assert_eq!(result, "This is a XXX XXX.");
1198 }
1199
1200 #[test]
1201 fn test_split() {
1202 let regex = Regex::new(
1203 "s[ai]mple",
1204 RegexCompileFlags::OPTIMIZE,
1205 RegexMatchFlags::DEFAULT,
1206 )
1207 .expect("Regex new")
1208 .expect("Null regex");
1209
1210 let quote = "This is a simple sample.";
1211 let result = regex.split(quote, RegexMatchFlags::DEFAULT);
1212
1213 assert_eq!(result.len(), 3);
1214 assert_eq!(result[0], "This is a ");
1215 assert_eq!(result[1], " ");
1216 assert_eq!(result[2], ".");
1217 }
1218
1219 #[test]
1220 fn test_match() {
1221 let regex = Regex::new(r"\d", RegexCompileFlags::DEFAULT, RegexMatchFlags::DEFAULT)
1222 .expect("Regex new")
1223 .expect("Null regex");
1224
1225 let input = crate::GString::from("87");
1226 let m = regex.match_(input.as_gstr(), RegexMatchFlags::DEFAULT);
1227 let m = m.unwrap();
1228 assert!(m.matches());
1229 assert_eq!(m.match_count(), 1);
1230 assert_eq!(m.fetch(0).as_deref(), Some("8"));
1231 assert!(m.next().unwrap());
1232 assert_eq!(m.fetch(0).as_deref(), Some("7"));
1233 assert!(!m.next().unwrap());
1234 assert!(m.fetch(0).is_none());
1235
1236 let input = crate::GString::from("a");
1237 let m = regex.match_(input.as_gstr(), RegexMatchFlags::DEFAULT);
1238 let m = m.unwrap();
1239 assert!(!m.matches());
1240 assert_eq!(m.match_count(), 0);
1241 assert!(m.fetch(0).is_none());
1242 }
1243}