glib/regex.rs
1// Take a look at the license at the top of the repository in the LICENSE file.
2
3// rustdoc-stripper-ignore-next
4//! This module is inefficient and should not be used by Rust programs except for
5//! compatibility with GLib.Regex based APIs.
6
7use crate::{
8 ffi, translate::*, GStr, GStringPtr, MatchInfo, PtrSlice, Regex, RegexCompileFlags,
9 RegexMatchFlags,
10};
11use std::{mem, ptr};
12
13impl Regex {
14 /// Retrieves the number of the subexpression named @name.
15 /// ## `name`
16 /// name of the subexpression
17 ///
18 /// # Returns
19 ///
20 /// The number of the subexpression or -1 if @name
21 /// does not exists
22 // rustdoc-stripper-ignore-next-stop
23 /// Retrieves the number of the subexpression named @name.
24 /// ## `name`
25 /// name of the subexpression
26 ///
27 /// # Returns
28 ///
29 /// The number of the subexpression or -1 if @name
30 /// does not exists
31 #[doc(alias = "g_regex_get_string_number")]
32 #[doc(alias = "get_string_number")]
33 pub fn string_number(&self, name: impl IntoGStr) -> i32 {
34 name.run_with_gstr(|name| unsafe {
35 ffi::g_regex_get_string_number(self.to_glib_none().0, name.to_glib_none().0)
36 })
37 }
38
39 /// Escapes the nul characters in @string to "\x00". It can be used
40 /// to compile a regex with embedded nul characters.
41 ///
42 /// For completeness, @length can be -1 for a nul-terminated string.
43 /// In this case the output string will be of course equal to @string.
44 /// ## `string`
45 /// the string to escape
46 /// ## `length`
47 /// the length of @string
48 ///
49 /// # Returns
50 ///
51 /// a newly-allocated escaped string
52 // rustdoc-stripper-ignore-next-stop
53 /// Escapes the nul characters in @string to "\x00". It can be used
54 /// to compile a regex with embedded nul characters.
55 ///
56 /// For completeness, @length can be -1 for a nul-terminated string.
57 /// In this case the output string will be of course equal to @string.
58 /// ## `string`
59 /// the string to escape
60 /// ## `length`
61 /// the length of @string
62 ///
63 /// # Returns
64 ///
65 /// a newly-allocated escaped string
66 #[doc(alias = "g_regex_escape_nul")]
67 pub fn escape_nul(string: impl IntoGStr) -> crate::GString {
68 unsafe {
69 string.run_with_gstr(|string| {
70 from_glib_full(ffi::g_regex_escape_nul(
71 string.to_glib_none().0,
72 string.len() as _,
73 ))
74 })
75 }
76 }
77
78 /// Escapes the special characters used for regular expressions
79 /// in @string, for instance "a.b*c" becomes "a\.b\*c". This
80 /// function is useful to dynamically generate regular expressions.
81 ///
82 /// @string can contain nul characters that are replaced with "\0",
83 /// in this case remember to specify the correct length of @string
84 /// in @length.
85 /// ## `string`
86 /// the string to escape
87 /// ## `length`
88 /// the length of @string, in bytes, or -1 if @string is nul-terminated
89 ///
90 /// # Returns
91 ///
92 /// a newly-allocated escaped string
93 // rustdoc-stripper-ignore-next-stop
94 /// Escapes the special characters used for regular expressions
95 /// in @string, for instance "a.b*c" becomes "a\.b\*c". This
96 /// function is useful to dynamically generate regular expressions.
97 ///
98 /// @string can contain nul characters that are replaced with "\0",
99 /// in this case remember to specify the correct length of @string
100 /// in @length.
101 /// ## `string`
102 /// the string to escape
103 /// ## `length`
104 /// the length of @string, in bytes, or -1 if @string is nul-terminated
105 ///
106 /// # Returns
107 ///
108 /// a newly-allocated escaped string
109 #[doc(alias = "g_regex_escape_string")]
110 pub fn escape_string(string: impl IntoGStr) -> crate::GString {
111 unsafe {
112 string.run_with_gstr(|string| {
113 from_glib_full(ffi::g_regex_escape_string(
114 string.to_glib_none().0,
115 string.len() as _,
116 ))
117 })
118 }
119 }
120
121 /// Checks whether @replacement is a valid replacement string
122 /// (see g_regex_replace()), i.e. that all escape sequences in
123 /// it are valid.
124 ///
125 /// If @has_references is not [`None`] then @replacement is checked
126 /// for pattern references. For instance, replacement text 'foo\n'
127 /// does not contain references and may be evaluated without information
128 /// about actual match, but '\0\1' (whole match followed by first
129 /// subpattern) requires valid #GMatchInfo object.
130 /// ## `replacement`
131 /// the replacement string
132 ///
133 /// # Returns
134 ///
135 /// whether @replacement is a valid replacement string
136 ///
137 /// ## `has_references`
138 /// location to store information about
139 /// references in @replacement or [`None`]
140 // rustdoc-stripper-ignore-next-stop
141 /// Checks whether @replacement is a valid replacement string
142 /// (see g_regex_replace()), i.e. that all escape sequences in
143 /// it are valid.
144 ///
145 /// If @has_references is not [`None`] then @replacement is checked
146 /// for pattern references. For instance, replacement text 'foo\n'
147 /// does not contain references and may be evaluated without information
148 /// about actual match, but '\0\1' (whole match followed by first
149 /// subpattern) requires valid #GMatchInfo object.
150 /// ## `replacement`
151 /// the replacement string
152 ///
153 /// # Returns
154 ///
155 /// whether @replacement is a valid replacement string
156 ///
157 /// ## `has_references`
158 /// location to store information about
159 /// references in @replacement or [`None`]
160 #[doc(alias = "g_regex_check_replacement")]
161 pub fn check_replacement(replacement: impl IntoGStr) -> Result<bool, crate::Error> {
162 replacement.run_with_gstr(|replacement| unsafe {
163 let mut has_references = mem::MaybeUninit::uninit();
164 let mut error = ptr::null_mut();
165 let is_ok = ffi::g_regex_check_replacement(
166 replacement.to_glib_none().0,
167 has_references.as_mut_ptr(),
168 &mut error,
169 );
170 debug_assert_eq!(is_ok == crate::ffi::GFALSE, !error.is_null());
171 if error.is_null() {
172 Ok(from_glib(has_references.assume_init()))
173 } else {
174 Err(from_glib_full(error))
175 }
176 })
177 }
178
179 /// Scans for a match in @string for @pattern.
180 ///
181 /// This function is equivalent to g_regex_match() but it does not
182 /// require to compile the pattern with g_regex_new(), avoiding some
183 /// lines of code when you need just to do a match without extracting
184 /// substrings, capture counts, and so on.
185 ///
186 /// If this function is to be called on the same @pattern more than
187 /// once, it's more efficient to compile the pattern once with
188 /// g_regex_new() and then use g_regex_match().
189 /// ## `pattern`
190 /// the regular expression
191 /// ## `string`
192 /// the string to scan for matches
193 /// ## `compile_options`
194 /// compile options for the regular expression, or 0
195 /// ## `match_options`
196 /// match options, or 0
197 ///
198 /// # Returns
199 ///
200 /// [`true`] if the string matched, [`false`] otherwise
201 // rustdoc-stripper-ignore-next-stop
202 /// Scans for a match in @string for @pattern.
203 ///
204 /// This function is equivalent to g_regex_match() but it does not
205 /// require to compile the pattern with g_regex_new(), avoiding some
206 /// lines of code when you need just to do a match without extracting
207 /// substrings, capture counts, and so on.
208 ///
209 /// If this function is to be called on the same @pattern more than
210 /// once, it's more efficient to compile the pattern once with
211 /// g_regex_new() and then use g_regex_match().
212 /// ## `pattern`
213 /// the regular expression
214 /// ## `string`
215 /// the string to scan for matches
216 /// ## `compile_options`
217 /// compile options for the regular expression, or 0
218 /// ## `match_options`
219 /// match options, or 0
220 ///
221 /// # Returns
222 ///
223 /// [`true`] if the string matched, [`false`] otherwise
224 #[doc(alias = "g_regex_match_simple")]
225 pub fn match_simple(
226 pattern: impl IntoGStr,
227 string: impl IntoGStr,
228 compile_options: RegexCompileFlags,
229 match_options: RegexMatchFlags,
230 ) -> bool {
231 pattern.run_with_gstr(|pattern| {
232 string.run_with_gstr(|string| unsafe {
233 from_glib(ffi::g_regex_match_simple(
234 pattern.to_glib_none().0,
235 string.to_glib_none().0,
236 compile_options.into_glib(),
237 match_options.into_glib(),
238 ))
239 })
240 })
241 }
242
243 /// Replaces all occurrences of the pattern in @self with the
244 /// replacement text. Backreferences of the form `\number` or
245 /// `\g<number>` in the replacement text are interpolated by the
246 /// number-th captured subexpression of the match, `\g<name>` refers
247 /// to the captured subexpression with the given name. `\0` refers
248 /// to the complete match, but `\0` followed by a number is the octal
249 /// representation of a character. To include a literal `\` in the
250 /// replacement, write `\\\\`.
251 ///
252 /// There are also escapes that changes the case of the following text:
253 ///
254 /// - \l: Convert to lower case the next character
255 /// - \u: Convert to upper case the next character
256 /// - \L: Convert to lower case till \E
257 /// - \U: Convert to upper case till \E
258 /// - \E: End case modification
259 ///
260 /// If you do not need to use backreferences use g_regex_replace_literal().
261 ///
262 /// The @replacement string must be UTF-8 encoded even if [`RegexCompileFlags::RAW`][crate::RegexCompileFlags::RAW] was
263 /// passed to g_regex_new(). If you want to use not UTF-8 encoded strings
264 /// you can use g_regex_replace_literal().
265 ///
266 /// Setting @start_position differs from just passing over a shortened
267 /// string and setting [`RegexMatchFlags::NOTBOL`][crate::RegexMatchFlags::NOTBOL] in the case of a pattern that
268 /// begins with any kind of lookbehind assertion, such as "\b".
269 /// ## `string`
270 /// the string to perform matches against
271 /// ## `start_position`
272 /// starting index of the string to match, in bytes
273 /// ## `replacement`
274 /// text to replace each match with
275 /// ## `match_options`
276 /// options for the match
277 ///
278 /// # Returns
279 ///
280 /// a newly allocated string containing the replacements
281 // rustdoc-stripper-ignore-next-stop
282 /// Replaces all occurrences of the pattern in @self with the
283 /// replacement text. Backreferences of the form `\number` or
284 /// `\g<number>` in the replacement text are interpolated by the
285 /// number-th captured subexpression of the match, `\g<name>` refers
286 /// to the captured subexpression with the given name. `\0` refers
287 /// to the complete match, but `\0` followed by a number is the octal
288 /// representation of a character. To include a literal `\` in the
289 /// replacement, write `\\\\`.
290 ///
291 /// There are also escapes that changes the case of the following text:
292 ///
293 /// - \l: Convert to lower case the next character
294 /// - \u: Convert to upper case the next character
295 /// - \L: Convert to lower case till \E
296 /// - \U: Convert to upper case till \E
297 /// - \E: End case modification
298 ///
299 /// If you do not need to use backreferences use g_regex_replace_literal().
300 ///
301 /// The @replacement string must be UTF-8 encoded even if [`RegexCompileFlags::RAW`][crate::RegexCompileFlags::RAW] was
302 /// passed to g_regex_new(). If you want to use not UTF-8 encoded strings
303 /// you can use g_regex_replace_literal().
304 ///
305 /// Setting @start_position differs from just passing over a shortened
306 /// string and setting [`RegexMatchFlags::NOTBOL`][crate::RegexMatchFlags::NOTBOL] in the case of a pattern that
307 /// begins with any kind of lookbehind assertion, such as "\b".
308 /// ## `string`
309 /// the string to perform matches against
310 /// ## `start_position`
311 /// starting index of the string to match, in bytes
312 /// ## `replacement`
313 /// text to replace each match with
314 /// ## `match_options`
315 /// options for the match
316 ///
317 /// # Returns
318 ///
319 /// a newly allocated string containing the replacements
320 #[doc(alias = "g_regex_replace")]
321 pub fn replace(
322 &self,
323 string: impl IntoGStr,
324 start_position: i32,
325 replacement: impl IntoGStr,
326 match_options: RegexMatchFlags,
327 ) -> Result<crate::GString, crate::Error> {
328 unsafe {
329 string.run_with_gstr(|string| {
330 replacement.run_with_gstr(|replacement| {
331 let mut error = ptr::null_mut();
332 let ret = ffi::g_regex_replace(
333 self.to_glib_none().0,
334 string.as_ptr() as *const _,
335 string.len() as _,
336 start_position,
337 replacement.to_glib_none().0,
338 match_options.into_glib(),
339 &mut error,
340 );
341 if error.is_null() {
342 Ok(from_glib_full(ret))
343 } else {
344 Err(from_glib_full(error))
345 }
346 })
347 })
348 }
349 }
350
351 /// Using the standard algorithm for regular expression matching only
352 /// the longest match in the string is retrieved. This function uses
353 /// a different algorithm so it can retrieve all the possible matches.
354 /// For more documentation see g_regex_match_all_full().
355 ///
356 /// A #GMatchInfo structure, used to get information on the match, is
357 /// stored in @match_info if not [`None`]. Note that if @match_info is
358 /// not [`None`] then it is created even if the function returns [`false`],
359 /// i.e. you must free it regardless if regular expression actually
360 /// matched.
361 ///
362 /// @string is not copied and is used in #GMatchInfo internally. If
363 /// you use any #GMatchInfo method (except g_match_info_free()) after
364 /// freeing or modifying @string then the behaviour is undefined.
365 /// ## `string`
366 /// the string to scan for matches
367 /// ## `match_options`
368 /// match options
369 ///
370 /// # Returns
371 ///
372 /// [`true`] is the string matched, [`false`] otherwise
373 ///
374 /// ## `match_info`
375 /// pointer to location where to store
376 /// the #GMatchInfo, or [`None`] if you do not need it
377 // rustdoc-stripper-ignore-next-stop
378 /// Using the standard algorithm for regular expression matching only
379 /// the longest match in the string is retrieved. This function uses
380 /// a different algorithm so it can retrieve all the possible matches.
381 /// For more documentation see g_regex_match_all_full().
382 ///
383 /// A #GMatchInfo structure, used to get information on the match, is
384 /// stored in @match_info if not [`None`]. Note that if @match_info is
385 /// not [`None`] then it is created even if the function returns [`false`],
386 /// i.e. you must free it regardless if regular expression actually
387 /// matched.
388 ///
389 /// @string is not copied and is used in #GMatchInfo internally. If
390 /// you use any #GMatchInfo method (except g_match_info_free()) after
391 /// freeing or modifying @string then the behaviour is undefined.
392 /// ## `string`
393 /// the string to scan for matches
394 /// ## `match_options`
395 /// match options
396 ///
397 /// # Returns
398 ///
399 /// [`true`] is the string matched, [`false`] otherwise
400 ///
401 /// ## `match_info`
402 /// pointer to location where to store
403 /// the #GMatchInfo, or [`None`] if you do not need it
404 #[doc(alias = "g_regex_match_all")]
405 pub fn match_all<'input>(
406 &self,
407 string: &'input GStr,
408 match_options: RegexMatchFlags,
409 ) -> Option<MatchInfo<'input>> {
410 self.match_all_full(string, 0, match_options).ok()
411 }
412
413 /// Using the standard algorithm for regular expression matching only
414 /// the longest match in the @string is retrieved, it is not possible
415 /// to obtain all the available matches. For instance matching
416 /// `"<a> <b> <c>"` against the pattern `"<.*>"`
417 /// you get `"<a> <b> <c>"`.
418 ///
419 /// This function uses a different algorithm (called DFA, i.e. deterministic
420 /// finite automaton), so it can retrieve all the possible matches, all
421 /// starting at the same point in the string. For instance matching
422 /// `"<a> <b> <c>"` against the pattern `"<.*>"`
423 /// you would obtain three matches: `"<a> <b> <c>"`,
424 /// `"<a> <b>"` and `"<a>"`.
425 ///
426 /// The number of matched strings is retrieved using
427 /// g_match_info_get_match_count(). To obtain the matched strings and
428 /// their position you can use, respectively, g_match_info_fetch() and
429 /// g_match_info_fetch_pos(). Note that the strings are returned in
430 /// reverse order of length; that is, the longest matching string is
431 /// given first.
432 ///
433 /// Note that the DFA algorithm is slower than the standard one and it
434 /// is not able to capture substrings, so backreferences do not work.
435 ///
436 /// Setting @start_position differs from just passing over a shortened
437 /// string and setting [`RegexMatchFlags::NOTBOL`][crate::RegexMatchFlags::NOTBOL] in the case of a pattern
438 /// that begins with any kind of lookbehind assertion, such as "\b".
439 ///
440 /// Unless [`RegexCompileFlags::RAW`][crate::RegexCompileFlags::RAW] is specified in the options, @string must be valid UTF-8.
441 ///
442 /// A #GMatchInfo structure, used to get information on the match, is
443 /// stored in @match_info if not [`None`]. Note that if @match_info is
444 /// not [`None`] then it is created even if the function returns [`false`],
445 /// i.e. you must free it regardless if regular expression actually
446 /// matched.
447 ///
448 /// @string is not copied and is used in #GMatchInfo internally. If
449 /// you use any #GMatchInfo method (except g_match_info_free()) after
450 /// freeing or modifying @string then the behaviour is undefined.
451 /// ## `string`
452 /// the string to scan for matches
453 /// ## `start_position`
454 /// starting index of the string to match, in bytes
455 /// ## `match_options`
456 /// match options
457 ///
458 /// # Returns
459 ///
460 /// [`true`] is the string matched, [`false`] otherwise
461 ///
462 /// ## `match_info`
463 /// pointer to location where to store
464 /// the #GMatchInfo, or [`None`] if you do not need it
465 // rustdoc-stripper-ignore-next-stop
466 /// Using the standard algorithm for regular expression matching only
467 /// the longest match in the @string is retrieved, it is not possible
468 /// to obtain all the available matches. For instance matching
469 /// `"<a> <b> <c>"` against the pattern `"<.*>"`
470 /// you get `"<a> <b> <c>"`.
471 ///
472 /// This function uses a different algorithm (called DFA, i.e. deterministic
473 /// finite automaton), so it can retrieve all the possible matches, all
474 /// starting at the same point in the string. For instance matching
475 /// `"<a> <b> <c>"` against the pattern `"<.*>"`
476 /// you would obtain three matches: `"<a> <b> <c>"`,
477 /// `"<a> <b>"` and `"<a>"`.
478 ///
479 /// The number of matched strings is retrieved using
480 /// g_match_info_get_match_count(). To obtain the matched strings and
481 /// their position you can use, respectively, g_match_info_fetch() and
482 /// g_match_info_fetch_pos(). Note that the strings are returned in
483 /// reverse order of length; that is, the longest matching string is
484 /// given first.
485 ///
486 /// Note that the DFA algorithm is slower than the standard one and it
487 /// is not able to capture substrings, so backreferences do not work.
488 ///
489 /// Setting @start_position differs from just passing over a shortened
490 /// string and setting [`RegexMatchFlags::NOTBOL`][crate::RegexMatchFlags::NOTBOL] in the case of a pattern
491 /// that begins with any kind of lookbehind assertion, such as "\b".
492 ///
493 /// Unless [`RegexCompileFlags::RAW`][crate::RegexCompileFlags::RAW] is specified in the options, @string must be valid UTF-8.
494 ///
495 /// A #GMatchInfo structure, used to get information on the match, is
496 /// stored in @match_info if not [`None`]. Note that if @match_info is
497 /// not [`None`] then it is created even if the function returns [`false`],
498 /// i.e. you must free it regardless if regular expression actually
499 /// matched.
500 ///
501 /// @string is not copied and is used in #GMatchInfo internally. If
502 /// you use any #GMatchInfo method (except g_match_info_free()) after
503 /// freeing or modifying @string then the behaviour is undefined.
504 /// ## `string`
505 /// the string to scan for matches
506 /// ## `start_position`
507 /// starting index of the string to match, in bytes
508 /// ## `match_options`
509 /// match options
510 ///
511 /// # Returns
512 ///
513 /// [`true`] is the string matched, [`false`] otherwise
514 ///
515 /// ## `match_info`
516 /// pointer to location where to store
517 /// the #GMatchInfo, or [`None`] if you do not need it
518 #[doc(alias = "g_regex_match_all_full")]
519 pub fn match_all_full<'input>(
520 &self,
521 string: &'input GStr,
522 start_position: i32,
523 match_options: RegexMatchFlags,
524 ) -> Result<MatchInfo<'input>, crate::Error> {
525 unsafe {
526 let mut match_info = ptr::null_mut();
527 let mut error = ptr::null_mut();
528 let is_ok = ffi::g_regex_match_all_full(
529 self.to_glib_none().0,
530 string.to_glib_none().0,
531 string.len() as _,
532 start_position,
533 match_options.into_glib(),
534 &mut match_info,
535 &mut error,
536 );
537 debug_assert_eq!(is_ok == crate::ffi::GFALSE, !error.is_null());
538 if error.is_null() {
539 Ok(from_glib_full(match_info))
540 } else {
541 Err(from_glib_full(error))
542 }
543 }
544 }
545
546 /// Scans for a match in @string for the pattern in @self.
547 /// The @match_options are combined with the match options specified
548 /// when the @self structure was created, letting you have more
549 /// flexibility in reusing #GRegex structures.
550 ///
551 /// Unless [`RegexCompileFlags::RAW`][crate::RegexCompileFlags::RAW] is specified in the options, @string must be valid UTF-8.
552 ///
553 /// A #GMatchInfo structure, used to get information on the match,
554 /// is stored in @match_info if not [`None`]. Note that if @match_info
555 /// is not [`None`] then it is created even if the function returns [`false`],
556 /// i.e. you must free it regardless if regular expression actually matched.
557 ///
558 /// To retrieve all the non-overlapping matches of the pattern in
559 /// string you can use g_match_info_next().
560 ///
561 ///
562 ///
563 /// **⚠️ The following code is in C ⚠️**
564 ///
565 /// ```C
566 /// static void
567 /// print_uppercase_words (const gchar *string)
568 /// {
569 /// // Print all uppercase-only words.
570 /// GRegex *regex;
571 /// GMatchInfo *match_info;
572 ///
573 /// regex = g_regex_new ("[A-Z]+", G_REGEX_DEFAULT, G_REGEX_MATCH_DEFAULT, NULL);
574 /// g_regex_match (regex, string, 0, &match_info);
575 /// while (g_match_info_matches (match_info))
576 /// {
577 /// gchar *word = g_match_info_fetch (match_info, 0);
578 /// g_print ("Found: %s\n", word);
579 /// g_free (word);
580 /// g_match_info_next (match_info, NULL);
581 /// }
582 /// g_match_info_free (match_info);
583 /// g_regex_unref (regex);
584 /// }
585 /// ```
586 ///
587 /// @string is not copied and is used in #GMatchInfo internally. If
588 /// you use any #GMatchInfo method (except g_match_info_free()) after
589 /// freeing or modifying @string then the behaviour is undefined.
590 /// ## `string`
591 /// the string to scan for matches
592 /// ## `match_options`
593 /// match options
594 ///
595 /// # Returns
596 ///
597 /// [`true`] is the string matched, [`false`] otherwise
598 ///
599 /// ## `match_info`
600 /// pointer to location where to store
601 /// the #GMatchInfo, or [`None`] if you do not need it
602 // rustdoc-stripper-ignore-next-stop
603 /// Scans for a match in @string for the pattern in @self.
604 /// The @match_options are combined with the match options specified
605 /// when the @self structure was created, letting you have more
606 /// flexibility in reusing #GRegex structures.
607 ///
608 /// Unless [`RegexCompileFlags::RAW`][crate::RegexCompileFlags::RAW] is specified in the options, @string must be valid UTF-8.
609 ///
610 /// A #GMatchInfo structure, used to get information on the match,
611 /// is stored in @match_info if not [`None`]. Note that if @match_info
612 /// is not [`None`] then it is created even if the function returns [`false`],
613 /// i.e. you must free it regardless if regular expression actually matched.
614 ///
615 /// To retrieve all the non-overlapping matches of the pattern in
616 /// string you can use g_match_info_next().
617 ///
618 ///
619 ///
620 /// **⚠️ The following code is in C ⚠️**
621 ///
622 /// ```C
623 /// static void
624 /// print_uppercase_words (const gchar *string)
625 /// {
626 /// // Print all uppercase-only words.
627 /// GRegex *regex;
628 /// GMatchInfo *match_info;
629 ///
630 /// regex = g_regex_new ("[A-Z]+", G_REGEX_DEFAULT, G_REGEX_MATCH_DEFAULT, NULL);
631 /// g_regex_match (regex, string, 0, &match_info);
632 /// while (g_match_info_matches (match_info))
633 /// {
634 /// gchar *word = g_match_info_fetch (match_info, 0);
635 /// g_print ("Found: %s\n", word);
636 /// g_free (word);
637 /// g_match_info_next (match_info, NULL);
638 /// }
639 /// g_match_info_free (match_info);
640 /// g_regex_unref (regex);
641 /// }
642 /// ```
643 ///
644 /// @string is not copied and is used in #GMatchInfo internally. If
645 /// you use any #GMatchInfo method (except g_match_info_free()) after
646 /// freeing or modifying @string then the behaviour is undefined.
647 /// ## `string`
648 /// the string to scan for matches
649 /// ## `match_options`
650 /// match options
651 ///
652 /// # Returns
653 ///
654 /// [`true`] is the string matched, [`false`] otherwise
655 ///
656 /// ## `match_info`
657 /// pointer to location where to store
658 /// the #GMatchInfo, or [`None`] if you do not need it
659 #[doc(alias = "g_regex_match")]
660 pub fn match_<'input>(
661 &self,
662 string: &'input GStr,
663 match_options: RegexMatchFlags,
664 ) -> Option<MatchInfo<'input>> {
665 self.match_full(string, 0, match_options).ok()
666 }
667
668 /// Scans for a match in @string for the pattern in @self.
669 /// The @match_options are combined with the match options specified
670 /// when the @self structure was created, letting you have more
671 /// flexibility in reusing #GRegex structures.
672 ///
673 /// Setting @start_position differs from just passing over a shortened
674 /// string and setting [`RegexMatchFlags::NOTBOL`][crate::RegexMatchFlags::NOTBOL] in the case of a pattern
675 /// that begins with any kind of lookbehind assertion, such as "\b".
676 ///
677 /// Unless [`RegexCompileFlags::RAW`][crate::RegexCompileFlags::RAW] is specified in the options, @string must be valid UTF-8.
678 ///
679 /// A #GMatchInfo structure, used to get information on the match, is
680 /// stored in @match_info if not [`None`]. Note that if @match_info is
681 /// not [`None`] then it is created even if the function returns [`false`],
682 /// i.e. you must free it regardless if regular expression actually
683 /// matched.
684 ///
685 /// @string is not copied and is used in #GMatchInfo internally. If
686 /// you use any #GMatchInfo method (except g_match_info_free()) after
687 /// freeing or modifying @string then the behaviour is undefined.
688 ///
689 /// To retrieve all the non-overlapping matches of the pattern in
690 /// string you can use g_match_info_next().
691 ///
692 ///
693 ///
694 /// **⚠️ The following code is in C ⚠️**
695 ///
696 /// ```C
697 /// static void
698 /// print_uppercase_words (const gchar *string)
699 /// {
700 /// // Print all uppercase-only words.
701 /// GRegex *regex;
702 /// GMatchInfo *match_info;
703 /// GError *error = NULL;
704 ///
705 /// regex = g_regex_new ("[A-Z]+", G_REGEX_DEFAULT, G_REGEX_MATCH_DEFAULT, NULL);
706 /// g_regex_match_full (regex, string, -1, 0, 0, &match_info, &error);
707 /// while (g_match_info_matches (match_info))
708 /// {
709 /// gchar *word = g_match_info_fetch (match_info, 0);
710 /// g_print ("Found: %s\n", word);
711 /// g_free (word);
712 /// g_match_info_next (match_info, &error);
713 /// }
714 /// g_match_info_free (match_info);
715 /// g_regex_unref (regex);
716 /// if (error != NULL)
717 /// {
718 /// g_printerr ("Error while matching: %s\n", error->message);
719 /// g_error_free (error);
720 /// }
721 /// }
722 /// ```
723 /// ## `string`
724 /// the string to scan for matches
725 /// ## `start_position`
726 /// starting index of the string to match, in bytes
727 /// ## `match_options`
728 /// match options
729 ///
730 /// # Returns
731 ///
732 /// [`true`] is the string matched, [`false`] otherwise
733 ///
734 /// ## `match_info`
735 /// pointer to location where to store
736 /// the #GMatchInfo, or [`None`] if you do not need it
737 // rustdoc-stripper-ignore-next-stop
738 /// Scans for a match in @string for the pattern in @self.
739 /// The @match_options are combined with the match options specified
740 /// when the @self structure was created, letting you have more
741 /// flexibility in reusing #GRegex structures.
742 ///
743 /// Setting @start_position differs from just passing over a shortened
744 /// string and setting [`RegexMatchFlags::NOTBOL`][crate::RegexMatchFlags::NOTBOL] in the case of a pattern
745 /// that begins with any kind of lookbehind assertion, such as "\b".
746 ///
747 /// Unless [`RegexCompileFlags::RAW`][crate::RegexCompileFlags::RAW] is specified in the options, @string must be valid UTF-8.
748 ///
749 /// A #GMatchInfo structure, used to get information on the match, is
750 /// stored in @match_info if not [`None`]. Note that if @match_info is
751 /// not [`None`] then it is created even if the function returns [`false`],
752 /// i.e. you must free it regardless if regular expression actually
753 /// matched.
754 ///
755 /// @string is not copied and is used in #GMatchInfo internally. If
756 /// you use any #GMatchInfo method (except g_match_info_free()) after
757 /// freeing or modifying @string then the behaviour is undefined.
758 ///
759 /// To retrieve all the non-overlapping matches of the pattern in
760 /// string you can use g_match_info_next().
761 ///
762 ///
763 ///
764 /// **⚠️ The following code is in C ⚠️**
765 ///
766 /// ```C
767 /// static void
768 /// print_uppercase_words (const gchar *string)
769 /// {
770 /// // Print all uppercase-only words.
771 /// GRegex *regex;
772 /// GMatchInfo *match_info;
773 /// GError *error = NULL;
774 ///
775 /// regex = g_regex_new ("[A-Z]+", G_REGEX_DEFAULT, G_REGEX_MATCH_DEFAULT, NULL);
776 /// g_regex_match_full (regex, string, -1, 0, 0, &match_info, &error);
777 /// while (g_match_info_matches (match_info))
778 /// {
779 /// gchar *word = g_match_info_fetch (match_info, 0);
780 /// g_print ("Found: %s\n", word);
781 /// g_free (word);
782 /// g_match_info_next (match_info, &error);
783 /// }
784 /// g_match_info_free (match_info);
785 /// g_regex_unref (regex);
786 /// if (error != NULL)
787 /// {
788 /// g_printerr ("Error while matching: %s\n", error->message);
789 /// g_error_free (error);
790 /// }
791 /// }
792 /// ```
793 /// ## `string`
794 /// the string to scan for matches
795 /// ## `start_position`
796 /// starting index of the string to match, in bytes
797 /// ## `match_options`
798 /// match options
799 ///
800 /// # Returns
801 ///
802 /// [`true`] is the string matched, [`false`] otherwise
803 ///
804 /// ## `match_info`
805 /// pointer to location where to store
806 /// the #GMatchInfo, or [`None`] if you do not need it
807 #[doc(alias = "g_regex_match_full")]
808 pub fn match_full<'input>(
809 &self,
810 string: &'input GStr,
811 start_position: i32,
812 match_options: RegexMatchFlags,
813 ) -> Result<MatchInfo<'input>, crate::Error> {
814 unsafe {
815 let mut match_info = ptr::null_mut();
816 let mut error = ptr::null_mut();
817 let is_ok = ffi::g_regex_match_full(
818 self.to_glib_none().0,
819 string.to_glib_none().0,
820 string.len() as _,
821 start_position,
822 match_options.into_glib(),
823 &mut match_info,
824 &mut error,
825 );
826 debug_assert_eq!(is_ok == crate::ffi::GFALSE, !error.is_null());
827 if error.is_null() {
828 Ok(from_glib_full(match_info))
829 } else {
830 Err(from_glib_full(error))
831 }
832 }
833 }
834
835 /// Replaces all occurrences of the pattern in @self with the
836 /// replacement text. @replacement is replaced literally, to
837 /// include backreferences use g_regex_replace().
838 ///
839 /// Setting @start_position differs from just passing over a
840 /// shortened string and setting [`RegexMatchFlags::NOTBOL`][crate::RegexMatchFlags::NOTBOL] in the
841 /// case of a pattern that begins with any kind of lookbehind
842 /// assertion, such as "\b".
843 /// ## `string`
844 /// the string to perform matches against
845 /// ## `start_position`
846 /// starting index of the string to match, in bytes
847 /// ## `replacement`
848 /// text to replace each match with
849 /// ## `match_options`
850 /// options for the match
851 ///
852 /// # Returns
853 ///
854 /// a newly allocated string containing the replacements
855 // rustdoc-stripper-ignore-next-stop
856 /// Replaces all occurrences of the pattern in @self with the
857 /// replacement text. @replacement is replaced literally, to
858 /// include backreferences use g_regex_replace().
859 ///
860 /// Setting @start_position differs from just passing over a
861 /// shortened string and setting [`RegexMatchFlags::NOTBOL`][crate::RegexMatchFlags::NOTBOL] in the
862 /// case of a pattern that begins with any kind of lookbehind
863 /// assertion, such as "\b".
864 /// ## `string`
865 /// the string to perform matches against
866 /// ## `start_position`
867 /// starting index of the string to match, in bytes
868 /// ## `replacement`
869 /// text to replace each match with
870 /// ## `match_options`
871 /// options for the match
872 ///
873 /// # Returns
874 ///
875 /// a newly allocated string containing the replacements
876 #[doc(alias = "g_regex_replace_literal")]
877 pub fn replace_literal(
878 &self,
879 string: impl IntoGStr,
880 start_position: i32,
881 replacement: impl IntoGStr,
882 match_options: RegexMatchFlags,
883 ) -> Result<crate::GString, crate::Error> {
884 unsafe {
885 string.run_with_gstr(|string| {
886 replacement.run_with_gstr(|replacement| {
887 let mut error = ptr::null_mut();
888 let ret = ffi::g_regex_replace_literal(
889 self.to_glib_none().0,
890 string.to_glib_none().0,
891 string.len() as _,
892 start_position,
893 replacement.to_glib_none().0,
894 match_options.into_glib(),
895 &mut error,
896 );
897 if error.is_null() {
898 Ok(from_glib_full(ret))
899 } else {
900 Err(from_glib_full(error))
901 }
902 })
903 })
904 }
905 }
906
907 /// Breaks the string on the pattern, and returns an array of the tokens.
908 /// If the pattern contains capturing parentheses, then the text for each
909 /// of the substrings will also be returned. If the pattern does not match
910 /// anywhere in the string, then the whole string is returned as the first
911 /// token.
912 ///
913 /// As a special case, the result of splitting the empty string "" is an
914 /// empty vector, not a vector containing a single string. The reason for
915 /// this special case is that being able to represent an empty vector is
916 /// typically more useful than consistent handling of empty elements. If
917 /// you do need to represent empty elements, you'll need to check for the
918 /// empty string before calling this function.
919 ///
920 /// A pattern that can match empty strings splits @string into separate
921 /// characters wherever it matches the empty string between characters.
922 /// For example splitting "ab c" using as a separator "\s*", you will get
923 /// "a", "b" and "c".
924 /// ## `string`
925 /// the string to split with the pattern
926 /// ## `match_options`
927 /// match time option flags
928 ///
929 /// # Returns
930 ///
931 /// a [`None`]-terminated gchar ** array. Free
932 /// it using g_strfreev()
933 // rustdoc-stripper-ignore-next-stop
934 /// Breaks the string on the pattern, and returns an array of the tokens.
935 /// If the pattern contains capturing parentheses, then the text for each
936 /// of the substrings will also be returned. If the pattern does not match
937 /// anywhere in the string, then the whole string is returned as the first
938 /// token.
939 ///
940 /// As a special case, the result of splitting the empty string "" is an
941 /// empty vector, not a vector containing a single string. The reason for
942 /// this special case is that being able to represent an empty vector is
943 /// typically more useful than consistent handling of empty elements. If
944 /// you do need to represent empty elements, you'll need to check for the
945 /// empty string before calling this function.
946 ///
947 /// A pattern that can match empty strings splits @string into separate
948 /// characters wherever it matches the empty string between characters.
949 /// For example splitting "ab c" using as a separator "\s*", you will get
950 /// "a", "b" and "c".
951 /// ## `string`
952 /// the string to split with the pattern
953 /// ## `match_options`
954 /// match time option flags
955 ///
956 /// # Returns
957 ///
958 /// a [`None`]-terminated gchar ** array. Free
959 /// it using g_strfreev()
960 #[doc(alias = "g_regex_split")]
961 pub fn split(
962 &self,
963 string: impl IntoGStr,
964 match_options: RegexMatchFlags,
965 ) -> PtrSlice<GStringPtr> {
966 self.split_full(string, 0, match_options, 0)
967 .unwrap_or_default()
968 }
969
970 /// Breaks the string on the pattern, and returns an array of the tokens.
971 /// If the pattern contains capturing parentheses, then the text for each
972 /// of the substrings will also be returned. If the pattern does not match
973 /// anywhere in the string, then the whole string is returned as the first
974 /// token.
975 ///
976 /// As a special case, the result of splitting the empty string "" is an
977 /// empty vector, not a vector containing a single string. The reason for
978 /// this special case is that being able to represent an empty vector is
979 /// typically more useful than consistent handling of empty elements. If
980 /// you do need to represent empty elements, you'll need to check for the
981 /// empty string before calling this function.
982 ///
983 /// A pattern that can match empty strings splits @string into separate
984 /// characters wherever it matches the empty string between characters.
985 /// For example splitting "ab c" using as a separator "\s*", you will get
986 /// "a", "b" and "c".
987 ///
988 /// Setting @start_position differs from just passing over a shortened
989 /// string and setting [`RegexMatchFlags::NOTBOL`][crate::RegexMatchFlags::NOTBOL] in the case of a pattern
990 /// that begins with any kind of lookbehind assertion, such as "\b".
991 /// ## `string`
992 /// the string to split with the pattern
993 /// ## `start_position`
994 /// starting index of the string to match, in bytes
995 /// ## `match_options`
996 /// match time option flags
997 /// ## `max_tokens`
998 /// the maximum number of tokens to split @string into.
999 /// If this is less than 1, the string is split completely
1000 ///
1001 /// # Returns
1002 ///
1003 /// a [`None`]-terminated gchar ** array. Free
1004 /// it using g_strfreev()
1005 // rustdoc-stripper-ignore-next-stop
1006 /// Breaks the string on the pattern, and returns an array of the tokens.
1007 /// If the pattern contains capturing parentheses, then the text for each
1008 /// of the substrings will also be returned. If the pattern does not match
1009 /// anywhere in the string, then the whole string is returned as the first
1010 /// token.
1011 ///
1012 /// As a special case, the result of splitting the empty string "" is an
1013 /// empty vector, not a vector containing a single string. The reason for
1014 /// this special case is that being able to represent an empty vector is
1015 /// typically more useful than consistent handling of empty elements. If
1016 /// you do need to represent empty elements, you'll need to check for the
1017 /// empty string before calling this function.
1018 ///
1019 /// A pattern that can match empty strings splits @string into separate
1020 /// characters wherever it matches the empty string between characters.
1021 /// For example splitting "ab c" using as a separator "\s*", you will get
1022 /// "a", "b" and "c".
1023 ///
1024 /// Setting @start_position differs from just passing over a shortened
1025 /// string and setting [`RegexMatchFlags::NOTBOL`][crate::RegexMatchFlags::NOTBOL] in the case of a pattern
1026 /// that begins with any kind of lookbehind assertion, such as "\b".
1027 /// ## `string`
1028 /// the string to split with the pattern
1029 /// ## `start_position`
1030 /// starting index of the string to match, in bytes
1031 /// ## `match_options`
1032 /// match time option flags
1033 /// ## `max_tokens`
1034 /// the maximum number of tokens to split @string into.
1035 /// If this is less than 1, the string is split completely
1036 ///
1037 /// # Returns
1038 ///
1039 /// a [`None`]-terminated gchar ** array. Free
1040 /// it using g_strfreev()
1041 #[doc(alias = "g_regex_split_full")]
1042 pub fn split_full(
1043 &self,
1044 string: impl IntoGStr,
1045 start_position: i32,
1046 match_options: RegexMatchFlags,
1047 max_tokens: i32,
1048 ) -> Result<PtrSlice<GStringPtr>, crate::Error> {
1049 unsafe {
1050 let mut error = ptr::null_mut();
1051 string.run_with_gstr(|string| {
1052 let ret = ffi::g_regex_split_full(
1053 self.to_glib_none().0,
1054 string.to_glib_none().0,
1055 string.len() as _,
1056 start_position,
1057 match_options.into_glib(),
1058 max_tokens,
1059 &mut error,
1060 );
1061 if error.is_null() {
1062 Ok(FromGlibPtrContainer::from_glib_full(ret))
1063 } else {
1064 Err(from_glib_full(error))
1065 }
1066 })
1067 }
1068 }
1069
1070 /// Breaks the string on the pattern, and returns an array of
1071 /// the tokens. If the pattern contains capturing parentheses,
1072 /// then the text for each of the substrings will also be returned.
1073 /// If the pattern does not match anywhere in the string, then the
1074 /// whole string is returned as the first token.
1075 ///
1076 /// This function is equivalent to g_regex_split() but it does
1077 /// not require to compile the pattern with g_regex_new(), avoiding
1078 /// some lines of code when you need just to do a split without
1079 /// extracting substrings, capture counts, and so on.
1080 ///
1081 /// If this function is to be called on the same @pattern more than
1082 /// once, it's more efficient to compile the pattern once with
1083 /// g_regex_new() and then use g_regex_split().
1084 ///
1085 /// As a special case, the result of splitting the empty string ""
1086 /// is an empty vector, not a vector containing a single string.
1087 /// The reason for this special case is that being able to represent
1088 /// an empty vector is typically more useful than consistent handling
1089 /// of empty elements. If you do need to represent empty elements,
1090 /// you'll need to check for the empty string before calling this
1091 /// function.
1092 ///
1093 /// A pattern that can match empty strings splits @string into
1094 /// separate characters wherever it matches the empty string between
1095 /// characters. For example splitting "ab c" using as a separator
1096 /// "\s*", you will get "a", "b" and "c".
1097 /// ## `pattern`
1098 /// the regular expression
1099 /// ## `string`
1100 /// the string to scan for matches
1101 /// ## `compile_options`
1102 /// compile options for the regular expression, or 0
1103 /// ## `match_options`
1104 /// match options, or 0
1105 ///
1106 /// # Returns
1107 ///
1108 /// a [`None`]-terminated array of strings. Free
1109 /// it using g_strfreev()
1110 // rustdoc-stripper-ignore-next-stop
1111 /// Breaks the string on the pattern, and returns an array of
1112 /// the tokens. If the pattern contains capturing parentheses,
1113 /// then the text for each of the substrings will also be returned.
1114 /// If the pattern does not match anywhere in the string, then the
1115 /// whole string is returned as the first token.
1116 ///
1117 /// This function is equivalent to g_regex_split() but it does
1118 /// not require to compile the pattern with g_regex_new(), avoiding
1119 /// some lines of code when you need just to do a split without
1120 /// extracting substrings, capture counts, and so on.
1121 ///
1122 /// If this function is to be called on the same @pattern more than
1123 /// once, it's more efficient to compile the pattern once with
1124 /// g_regex_new() and then use g_regex_split().
1125 ///
1126 /// As a special case, the result of splitting the empty string ""
1127 /// is an empty vector, not a vector containing a single string.
1128 /// The reason for this special case is that being able to represent
1129 /// an empty vector is typically more useful than consistent handling
1130 /// of empty elements. If you do need to represent empty elements,
1131 /// you'll need to check for the empty string before calling this
1132 /// function.
1133 ///
1134 /// A pattern that can match empty strings splits @string into
1135 /// separate characters wherever it matches the empty string between
1136 /// characters. For example splitting "ab c" using as a separator
1137 /// "\s*", you will get "a", "b" and "c".
1138 /// ## `pattern`
1139 /// the regular expression
1140 /// ## `string`
1141 /// the string to scan for matches
1142 /// ## `compile_options`
1143 /// compile options for the regular expression, or 0
1144 /// ## `match_options`
1145 /// match options, or 0
1146 ///
1147 /// # Returns
1148 ///
1149 /// a [`None`]-terminated array of strings. Free
1150 /// it using g_strfreev()
1151 #[doc(alias = "g_regex_split_simple")]
1152 pub fn split_simple(
1153 pattern: impl IntoGStr,
1154 string: impl IntoGStr,
1155 compile_options: RegexCompileFlags,
1156 match_options: RegexMatchFlags,
1157 ) -> PtrSlice<GStringPtr> {
1158 pattern.run_with_gstr(|pattern| {
1159 string.run_with_gstr(|string| unsafe {
1160 FromGlibPtrContainer::from_glib_full(ffi::g_regex_split_simple(
1161 pattern.to_glib_none().0,
1162 string.to_glib_none().0,
1163 compile_options.into_glib(),
1164 match_options.into_glib(),
1165 ))
1166 })
1167 })
1168 }
1169}
1170
1171#[cfg(test)]
1172mod tests {
1173 use super::*;
1174 use crate::RegexCompileFlags;
1175
1176 #[test]
1177 fn test_replace_literal() {
1178 let regex = Regex::new(
1179 "s[ai]mple",
1180 RegexCompileFlags::OPTIMIZE,
1181 RegexMatchFlags::DEFAULT,
1182 )
1183 .expect("Regex new")
1184 .expect("Null regex");
1185
1186 let quote = "This is a simple sample.";
1187 let result = regex
1188 .replace_literal(quote, 0, "XXX", RegexMatchFlags::DEFAULT)
1189 .expect("regex replace");
1190
1191 assert_eq!(result, "This is a XXX XXX.");
1192 }
1193
1194 #[test]
1195 fn test_split() {
1196 let regex = Regex::new(
1197 "s[ai]mple",
1198 RegexCompileFlags::OPTIMIZE,
1199 RegexMatchFlags::DEFAULT,
1200 )
1201 .expect("Regex new")
1202 .expect("Null regex");
1203
1204 let quote = "This is a simple sample.";
1205 let result = regex.split(quote, RegexMatchFlags::DEFAULT);
1206
1207 assert_eq!(result.len(), 3);
1208 assert_eq!(result[0], "This is a ");
1209 assert_eq!(result[1], " ");
1210 assert_eq!(result[2], ".");
1211 }
1212}