glib/regex.rs
1// Take a look at the license at the top of the repository in the LICENSE file.
2
3// rustdoc-stripper-ignore-next
4//! This module is inefficient and should not be used by Rust programs except for
5//! compatibility with GLib.Regex based APIs.
6
7use crate::{
8 ffi, translate::*, GStr, GStringPtr, MatchInfo, PtrSlice, Regex, RegexCompileFlags,
9 RegexMatchFlags,
10};
11use std::{mem, ptr};
12
13impl Regex {
14 /// Retrieves the number of the subexpression named @name.
15 /// ## `name`
16 /// name of the subexpression
17 ///
18 /// # Returns
19 ///
20 /// The number of the subexpression or -1 if @name
21 /// does not exists
22 #[doc(alias = "g_regex_get_string_number")]
23 #[doc(alias = "get_string_number")]
24 pub fn string_number(&self, name: impl IntoGStr) -> i32 {
25 name.run_with_gstr(|name| unsafe {
26 ffi::g_regex_get_string_number(self.to_glib_none().0, name.to_glib_none().0)
27 })
28 }
29
30 /// Escapes the nul characters in @string to "\x00". It can be used
31 /// to compile a regex with embedded nul characters.
32 ///
33 /// For completeness, @length can be -1 for a nul-terminated string.
34 /// In this case the output string will be of course equal to @string.
35 /// ## `string`
36 /// the string to escape
37 /// ## `length`
38 /// the length of @string
39 ///
40 /// # Returns
41 ///
42 /// a newly-allocated escaped string
43 #[doc(alias = "g_regex_escape_nul")]
44 pub fn escape_nul(string: impl IntoGStr) -> crate::GString {
45 unsafe {
46 string.run_with_gstr(|string| {
47 from_glib_full(ffi::g_regex_escape_nul(
48 string.to_glib_none().0,
49 string.len() as _,
50 ))
51 })
52 }
53 }
54
55 /// Escapes the special characters used for regular expressions
56 /// in @string, for instance "a.b*c" becomes "a\.b\*c". This
57 /// function is useful to dynamically generate regular expressions.
58 ///
59 /// @string can contain nul characters that are replaced with "\0",
60 /// in this case remember to specify the correct length of @string
61 /// in @length.
62 /// ## `string`
63 /// the string to escape
64 /// ## `length`
65 /// the length of @string, in bytes, or -1 if @string is nul-terminated
66 ///
67 /// # Returns
68 ///
69 /// a newly-allocated escaped string
70 #[doc(alias = "g_regex_escape_string")]
71 pub fn escape_string(string: impl IntoGStr) -> crate::GString {
72 unsafe {
73 string.run_with_gstr(|string| {
74 from_glib_full(ffi::g_regex_escape_string(
75 string.to_glib_none().0,
76 string.len() as _,
77 ))
78 })
79 }
80 }
81
82 /// Checks whether @replacement is a valid replacement string
83 /// (see g_regex_replace()), i.e. that all escape sequences in
84 /// it are valid.
85 ///
86 /// If @has_references is not [`None`] then @replacement is checked
87 /// for pattern references. For instance, replacement text 'foo\n'
88 /// does not contain references and may be evaluated without information
89 /// about actual match, but '\0\1' (whole match followed by first
90 /// subpattern) requires valid #GMatchInfo object.
91 /// ## `replacement`
92 /// the replacement string
93 ///
94 /// # Returns
95 ///
96 /// whether @replacement is a valid replacement string
97 ///
98 /// ## `has_references`
99 /// location to store information about
100 /// references in @replacement or [`None`]
101 #[doc(alias = "g_regex_check_replacement")]
102 pub fn check_replacement(replacement: impl IntoGStr) -> Result<bool, crate::Error> {
103 replacement.run_with_gstr(|replacement| unsafe {
104 let mut has_references = mem::MaybeUninit::uninit();
105 let mut error = ptr::null_mut();
106 let is_ok = ffi::g_regex_check_replacement(
107 replacement.to_glib_none().0,
108 has_references.as_mut_ptr(),
109 &mut error,
110 );
111 debug_assert_eq!(is_ok == crate::ffi::GFALSE, !error.is_null());
112 if error.is_null() {
113 Ok(from_glib(has_references.assume_init()))
114 } else {
115 Err(from_glib_full(error))
116 }
117 })
118 }
119
120 /// Scans for a match in @string for @pattern.
121 ///
122 /// This function is equivalent to g_regex_match() but it does not
123 /// require to compile the pattern with g_regex_new(), avoiding some
124 /// lines of code when you need just to do a match without extracting
125 /// substrings, capture counts, and so on.
126 ///
127 /// If this function is to be called on the same @pattern more than
128 /// once, it's more efficient to compile the pattern once with
129 /// g_regex_new() and then use g_regex_match().
130 /// ## `pattern`
131 /// the regular expression
132 /// ## `string`
133 /// the string to scan for matches
134 /// ## `compile_options`
135 /// compile options for the regular expression, or 0
136 /// ## `match_options`
137 /// match options, or 0
138 ///
139 /// # Returns
140 ///
141 /// [`true`] if the string matched, [`false`] otherwise
142 #[doc(alias = "g_regex_match_simple")]
143 pub fn match_simple(
144 pattern: impl IntoGStr,
145 string: impl IntoGStr,
146 compile_options: RegexCompileFlags,
147 match_options: RegexMatchFlags,
148 ) -> bool {
149 pattern.run_with_gstr(|pattern| {
150 string.run_with_gstr(|string| unsafe {
151 from_glib(ffi::g_regex_match_simple(
152 pattern.to_glib_none().0,
153 string.to_glib_none().0,
154 compile_options.into_glib(),
155 match_options.into_glib(),
156 ))
157 })
158 })
159 }
160
161 /// Replaces all occurrences of the pattern in @self with the
162 /// replacement text. Backreferences of the form `\number` or
163 /// `\g<number>` in the replacement text are interpolated by the
164 /// number-th captured subexpression of the match, `\g<name>` refers
165 /// to the captured subexpression with the given name. `\0` refers
166 /// to the complete match, but `\0` followed by a number is the octal
167 /// representation of a character. To include a literal `\` in the
168 /// replacement, write `\\\\`.
169 ///
170 /// There are also escapes that changes the case of the following text:
171 ///
172 /// - \l: Convert to lower case the next character
173 /// - \u: Convert to upper case the next character
174 /// - \L: Convert to lower case till \E
175 /// - \U: Convert to upper case till \E
176 /// - \E: End case modification
177 ///
178 /// If you do not need to use backreferences use g_regex_replace_literal().
179 ///
180 /// The @replacement string must be UTF-8 encoded even if [`RegexCompileFlags::RAW`][crate::RegexCompileFlags::RAW] was
181 /// passed to g_regex_new(). If you want to use not UTF-8 encoded strings
182 /// you can use g_regex_replace_literal().
183 ///
184 /// Setting @start_position differs from just passing over a shortened
185 /// string and setting [`RegexMatchFlags::NOTBOL`][crate::RegexMatchFlags::NOTBOL] in the case of a pattern that
186 /// begins with any kind of lookbehind assertion, such as "\b".
187 /// ## `string`
188 /// the string to perform matches against
189 /// ## `start_position`
190 /// starting index of the string to match, in bytes
191 /// ## `replacement`
192 /// text to replace each match with
193 /// ## `match_options`
194 /// options for the match
195 ///
196 /// # Returns
197 ///
198 /// a newly allocated string containing the replacements
199 #[doc(alias = "g_regex_replace")]
200 pub fn replace(
201 &self,
202 string: impl IntoGStr,
203 start_position: i32,
204 replacement: impl IntoGStr,
205 match_options: RegexMatchFlags,
206 ) -> Result<crate::GString, crate::Error> {
207 unsafe {
208 string.run_with_gstr(|string| {
209 replacement.run_with_gstr(|replacement| {
210 let mut error = ptr::null_mut();
211 let ret = ffi::g_regex_replace(
212 self.to_glib_none().0,
213 string.as_ptr() as *const _,
214 string.len() as _,
215 start_position,
216 replacement.to_glib_none().0,
217 match_options.into_glib(),
218 &mut error,
219 );
220 debug_assert_eq!(ret.is_null(), !error.is_null());
221 if error.is_null() {
222 Ok(from_glib_full(ret))
223 } else {
224 Err(from_glib_full(error))
225 }
226 })
227 })
228 }
229 }
230
231 /// Using the standard algorithm for regular expression matching only
232 /// the longest match in the string is retrieved. This function uses
233 /// a different algorithm so it can retrieve all the possible matches.
234 /// For more documentation see g_regex_match_all_full().
235 ///
236 /// A #GMatchInfo structure, used to get information on the match, is
237 /// stored in @match_info if not [`None`]. Note that if @match_info is
238 /// not [`None`] then it is created even if the function returns [`false`],
239 /// i.e. you must free it regardless if regular expression actually
240 /// matched.
241 ///
242 /// @string is not copied and is used in #GMatchInfo internally. If
243 /// you use any #GMatchInfo method (except g_match_info_free()) after
244 /// freeing or modifying @string then the behaviour is undefined.
245 /// ## `string`
246 /// the string to scan for matches
247 /// ## `match_options`
248 /// match options
249 ///
250 /// # Returns
251 ///
252 /// [`true`] is the string matched, [`false`] otherwise
253 ///
254 /// ## `match_info`
255 /// pointer to location where to store
256 /// the #GMatchInfo, or [`None`] if you do not need it
257 #[doc(alias = "g_regex_match_all")]
258 pub fn match_all<'input>(
259 &self,
260 string: &'input GStr,
261 match_options: RegexMatchFlags,
262 ) -> Result<MatchInfo<'input>, crate::Error> {
263 self.match_all_full(string, 0, match_options)
264 }
265
266 /// Using the standard algorithm for regular expression matching only
267 /// the longest match in the @string is retrieved, it is not possible
268 /// to obtain all the available matches. For instance matching
269 /// `"<a> <b> <c>"` against the pattern `"<.*>"`
270 /// you get `"<a> <b> <c>"`.
271 ///
272 /// This function uses a different algorithm (called DFA, i.e. deterministic
273 /// finite automaton), so it can retrieve all the possible matches, all
274 /// starting at the same point in the string. For instance matching
275 /// `"<a> <b> <c>"` against the pattern `"<.*>"`
276 /// you would obtain three matches: `"<a> <b> <c>"`,
277 /// `"<a> <b>"` and `"<a>"`.
278 ///
279 /// The number of matched strings is retrieved using
280 /// g_match_info_get_match_count(). To obtain the matched strings and
281 /// their position you can use, respectively, g_match_info_fetch() and
282 /// g_match_info_fetch_pos(). Note that the strings are returned in
283 /// reverse order of length; that is, the longest matching string is
284 /// given first.
285 ///
286 /// Note that the DFA algorithm is slower than the standard one and it
287 /// is not able to capture substrings, so backreferences do not work.
288 ///
289 /// Setting @start_position differs from just passing over a shortened
290 /// string and setting [`RegexMatchFlags::NOTBOL`][crate::RegexMatchFlags::NOTBOL] in the case of a pattern
291 /// that begins with any kind of lookbehind assertion, such as "\b".
292 ///
293 /// Unless [`RegexCompileFlags::RAW`][crate::RegexCompileFlags::RAW] is specified in the options, @string must be valid UTF-8.
294 ///
295 /// A #GMatchInfo structure, used to get information on the match, is
296 /// stored in @match_info if not [`None`]. Note that if @match_info is
297 /// not [`None`] then it is created even if the function returns [`false`],
298 /// i.e. you must free it regardless if regular expression actually
299 /// matched.
300 ///
301 /// @string is not copied and is used in #GMatchInfo internally. If
302 /// you use any #GMatchInfo method (except g_match_info_free()) after
303 /// freeing or modifying @string then the behaviour is undefined.
304 /// ## `string`
305 /// the string to scan for matches
306 /// ## `start_position`
307 /// starting index of the string to match, in bytes
308 /// ## `match_options`
309 /// match options
310 ///
311 /// # Returns
312 ///
313 /// [`true`] is the string matched, [`false`] otherwise
314 ///
315 /// ## `match_info`
316 /// pointer to location where to store
317 /// the #GMatchInfo, or [`None`] if you do not need it
318 #[doc(alias = "g_regex_match_all_full")]
319 pub fn match_all_full<'input>(
320 &self,
321 string: &'input GStr,
322 start_position: i32,
323 match_options: RegexMatchFlags,
324 ) -> Result<MatchInfo<'input>, crate::Error> {
325 unsafe {
326 let mut match_info = ptr::null_mut();
327 let mut error = ptr::null_mut();
328 let res = ffi::g_regex_match_all_full(
329 self.to_glib_none().0,
330 string.to_glib_none().0,
331 string.len() as _,
332 start_position,
333 match_options.into_glib(),
334 &mut match_info,
335 &mut error,
336 );
337 if error.is_null() {
338 let match_info = MatchInfo::from_glib_full(match_info);
339 debug_assert_eq!(match_info.matches(), from_glib(res));
340 Ok(match_info)
341 } else {
342 debug_assert!(match_info.is_null());
343 Err(from_glib_full(error))
344 }
345 }
346 }
347
348 /// Scans for a match in @string for the pattern in @self.
349 /// The @match_options are combined with the match options specified
350 /// when the @self structure was created, letting you have more
351 /// flexibility in reusing #GRegex structures.
352 ///
353 /// Unless [`RegexCompileFlags::RAW`][crate::RegexCompileFlags::RAW] is specified in the options, @string must be valid UTF-8.
354 ///
355 /// A #GMatchInfo structure, used to get information on the match,
356 /// is stored in @match_info if not [`None`]. Note that if @match_info
357 /// is not [`None`] then it is created even if the function returns [`false`],
358 /// i.e. you must free it regardless if regular expression actually matched.
359 ///
360 /// To retrieve all the non-overlapping matches of the pattern in
361 /// string you can use g_match_info_next().
362 ///
363 ///
364 ///
365 /// **⚠️ The following code is in C ⚠️**
366 ///
367 /// ```C
368 /// static void
369 /// print_uppercase_words (const gchar *string)
370 /// {
371 /// // Print all uppercase-only words.
372 /// GRegex *regex;
373 /// GMatchInfo *match_info;
374 ///
375 /// regex = g_regex_new ("[A-Z]+", G_REGEX_DEFAULT, G_REGEX_MATCH_DEFAULT, NULL);
376 /// g_regex_match (regex, string, 0, &match_info);
377 /// while (g_match_info_matches (match_info))
378 /// {
379 /// gchar *word = g_match_info_fetch (match_info, 0);
380 /// g_print ("Found: %s\n", word);
381 /// g_free (word);
382 /// g_match_info_next (match_info, NULL);
383 /// }
384 /// g_match_info_free (match_info);
385 /// g_regex_unref (regex);
386 /// }
387 /// ```
388 ///
389 /// @string is not copied and is used in #GMatchInfo internally. If
390 /// you use any #GMatchInfo method (except g_match_info_free()) after
391 /// freeing or modifying @string then the behaviour is undefined.
392 /// ## `string`
393 /// the string to scan for matches
394 /// ## `match_options`
395 /// match options
396 ///
397 /// # Returns
398 ///
399 /// [`true`] is the string matched, [`false`] otherwise
400 ///
401 /// ## `match_info`
402 /// pointer to location where to store
403 /// the #GMatchInfo, or [`None`] if you do not need it
404 #[doc(alias = "g_regex_match")]
405 pub fn match_<'input>(
406 &self,
407 string: &'input GStr,
408 match_options: RegexMatchFlags,
409 ) -> Result<MatchInfo<'input>, crate::Error> {
410 self.match_full(string, 0, match_options)
411 }
412
413 /// Scans for a match in @string for the pattern in @self.
414 /// The @match_options are combined with the match options specified
415 /// when the @self structure was created, letting you have more
416 /// flexibility in reusing #GRegex structures.
417 ///
418 /// Setting @start_position differs from just passing over a shortened
419 /// string and setting [`RegexMatchFlags::NOTBOL`][crate::RegexMatchFlags::NOTBOL] in the case of a pattern
420 /// that begins with any kind of lookbehind assertion, such as "\b".
421 ///
422 /// Unless [`RegexCompileFlags::RAW`][crate::RegexCompileFlags::RAW] is specified in the options, @string must be valid UTF-8.
423 ///
424 /// A #GMatchInfo structure, used to get information on the match, is
425 /// stored in @match_info if not [`None`]. Note that if @match_info is
426 /// not [`None`] then it is created even if the function returns [`false`],
427 /// i.e. you must free it regardless if regular expression actually
428 /// matched.
429 ///
430 /// @string is not copied and is used in #GMatchInfo internally. If
431 /// you use any #GMatchInfo method (except g_match_info_free()) after
432 /// freeing or modifying @string then the behaviour is undefined.
433 ///
434 /// To retrieve all the non-overlapping matches of the pattern in
435 /// string you can use g_match_info_next().
436 ///
437 ///
438 ///
439 /// **⚠️ The following code is in C ⚠️**
440 ///
441 /// ```C
442 /// static void
443 /// print_uppercase_words (const gchar *string)
444 /// {
445 /// // Print all uppercase-only words.
446 /// GRegex *regex;
447 /// GMatchInfo *match_info;
448 /// GError *error = NULL;
449 ///
450 /// regex = g_regex_new ("[A-Z]+", G_REGEX_DEFAULT, G_REGEX_MATCH_DEFAULT, NULL);
451 /// g_regex_match_full (regex, string, -1, 0, 0, &match_info, &error);
452 /// while (g_match_info_matches (match_info))
453 /// {
454 /// gchar *word = g_match_info_fetch (match_info, 0);
455 /// g_print ("Found: %s\n", word);
456 /// g_free (word);
457 /// g_match_info_next (match_info, &error);
458 /// }
459 /// g_match_info_free (match_info);
460 /// g_regex_unref (regex);
461 /// if (error != NULL)
462 /// {
463 /// g_printerr ("Error while matching: %s\n", error->message);
464 /// g_error_free (error);
465 /// }
466 /// }
467 /// ```
468 /// ## `string`
469 /// the string to scan for matches
470 /// ## `start_position`
471 /// starting index of the string to match, in bytes
472 /// ## `match_options`
473 /// match options
474 ///
475 /// # Returns
476 ///
477 /// [`true`] is the string matched, [`false`] otherwise
478 ///
479 /// ## `match_info`
480 /// pointer to location where to store
481 /// the #GMatchInfo, or [`None`] if you do not need it
482 #[doc(alias = "g_regex_match_full")]
483 pub fn match_full<'input>(
484 &self,
485 string: &'input GStr,
486 start_position: i32,
487 match_options: RegexMatchFlags,
488 ) -> Result<MatchInfo<'input>, crate::Error> {
489 unsafe {
490 let mut match_info = ptr::null_mut();
491 let mut error = ptr::null_mut();
492 let res = ffi::g_regex_match_full(
493 self.to_glib_none().0,
494 string.to_glib_none().0,
495 string.len() as _,
496 start_position,
497 match_options.into_glib(),
498 &mut match_info,
499 &mut error,
500 );
501 if error.is_null() {
502 let match_info = MatchInfo::from_glib_full(match_info);
503 debug_assert_eq!(match_info.matches(), from_glib(res));
504 Ok(match_info)
505 } else {
506 debug_assert!(match_info.is_null());
507 Err(from_glib_full(error))
508 }
509 }
510 }
511
512 /// Replaces all occurrences of the pattern in @self with the
513 /// replacement text. @replacement is replaced literally, to
514 /// include backreferences use g_regex_replace().
515 ///
516 /// Setting @start_position differs from just passing over a
517 /// shortened string and setting [`RegexMatchFlags::NOTBOL`][crate::RegexMatchFlags::NOTBOL] in the
518 /// case of a pattern that begins with any kind of lookbehind
519 /// assertion, such as "\b".
520 /// ## `string`
521 /// the string to perform matches against
522 /// ## `start_position`
523 /// starting index of the string to match, in bytes
524 /// ## `replacement`
525 /// text to replace each match with
526 /// ## `match_options`
527 /// options for the match
528 ///
529 /// # Returns
530 ///
531 /// a newly allocated string containing the replacements
532 #[doc(alias = "g_regex_replace_literal")]
533 pub fn replace_literal(
534 &self,
535 string: impl IntoGStr,
536 start_position: i32,
537 replacement: impl IntoGStr,
538 match_options: RegexMatchFlags,
539 ) -> Result<crate::GString, crate::Error> {
540 unsafe {
541 string.run_with_gstr(|string| {
542 replacement.run_with_gstr(|replacement| {
543 let mut error = ptr::null_mut();
544 let ret = ffi::g_regex_replace_literal(
545 self.to_glib_none().0,
546 string.to_glib_none().0,
547 string.len() as _,
548 start_position,
549 replacement.to_glib_none().0,
550 match_options.into_glib(),
551 &mut error,
552 );
553 debug_assert_eq!(ret.is_null(), !error.is_null());
554 if error.is_null() {
555 Ok(from_glib_full(ret))
556 } else {
557 Err(from_glib_full(error))
558 }
559 })
560 })
561 }
562 }
563
564 /// Breaks the string on the pattern, and returns an array of the tokens.
565 /// If the pattern contains capturing parentheses, then the text for each
566 /// of the substrings will also be returned. If the pattern does not match
567 /// anywhere in the string, then the whole string is returned as the first
568 /// token.
569 ///
570 /// As a special case, the result of splitting the empty string "" is an
571 /// empty vector, not a vector containing a single string. The reason for
572 /// this special case is that being able to represent an empty vector is
573 /// typically more useful than consistent handling of empty elements. If
574 /// you do need to represent empty elements, you'll need to check for the
575 /// empty string before calling this function.
576 ///
577 /// A pattern that can match empty strings splits @string into separate
578 /// characters wherever it matches the empty string between characters.
579 /// For example splitting "ab c" using as a separator "\s*", you will get
580 /// "a", "b" and "c".
581 /// ## `string`
582 /// the string to split with the pattern
583 /// ## `match_options`
584 /// match time option flags
585 ///
586 /// # Returns
587 ///
588 /// a [`None`]-terminated gchar ** array. Free
589 /// it using g_strfreev()
590 #[doc(alias = "g_regex_split")]
591 pub fn split(
592 &self,
593 string: impl IntoGStr,
594 match_options: RegexMatchFlags,
595 ) -> PtrSlice<GStringPtr> {
596 self.split_full(string, 0, match_options, 0)
597 .unwrap_or_default()
598 }
599
600 /// Breaks the string on the pattern, and returns an array of the tokens.
601 /// If the pattern contains capturing parentheses, then the text for each
602 /// of the substrings will also be returned. If the pattern does not match
603 /// anywhere in the string, then the whole string is returned as the first
604 /// token.
605 ///
606 /// As a special case, the result of splitting the empty string "" is an
607 /// empty vector, not a vector containing a single string. The reason for
608 /// this special case is that being able to represent an empty vector is
609 /// typically more useful than consistent handling of empty elements. If
610 /// you do need to represent empty elements, you'll need to check for the
611 /// empty string before calling this function.
612 ///
613 /// A pattern that can match empty strings splits @string into separate
614 /// characters wherever it matches the empty string between characters.
615 /// For example splitting "ab c" using as a separator "\s*", you will get
616 /// "a", "b" and "c".
617 ///
618 /// Setting @start_position differs from just passing over a shortened
619 /// string and setting [`RegexMatchFlags::NOTBOL`][crate::RegexMatchFlags::NOTBOL] in the case of a pattern
620 /// that begins with any kind of lookbehind assertion, such as "\b".
621 /// ## `string`
622 /// the string to split with the pattern
623 /// ## `start_position`
624 /// starting index of the string to match, in bytes
625 /// ## `match_options`
626 /// match time option flags
627 /// ## `max_tokens`
628 /// the maximum number of tokens to split @string into.
629 /// If this is less than 1, the string is split completely
630 ///
631 /// # Returns
632 ///
633 /// a [`None`]-terminated gchar ** array. Free
634 /// it using g_strfreev()
635 #[doc(alias = "g_regex_split_full")]
636 pub fn split_full(
637 &self,
638 string: impl IntoGStr,
639 start_position: i32,
640 match_options: RegexMatchFlags,
641 max_tokens: i32,
642 ) -> Result<PtrSlice<GStringPtr>, crate::Error> {
643 unsafe {
644 let mut error = ptr::null_mut();
645 string.run_with_gstr(|string| {
646 let ret = ffi::g_regex_split_full(
647 self.to_glib_none().0,
648 string.to_glib_none().0,
649 string.len() as _,
650 start_position,
651 match_options.into_glib(),
652 max_tokens,
653 &mut error,
654 );
655 debug_assert_eq!(ret.is_null(), !error.is_null());
656 if error.is_null() {
657 Ok(FromGlibPtrContainer::from_glib_full(ret))
658 } else {
659 Err(from_glib_full(error))
660 }
661 })
662 }
663 }
664
665 /// Breaks the string on the pattern, and returns an array of
666 /// the tokens. If the pattern contains capturing parentheses,
667 /// then the text for each of the substrings will also be returned.
668 /// If the pattern does not match anywhere in the string, then the
669 /// whole string is returned as the first token.
670 ///
671 /// This function is equivalent to g_regex_split() but it does
672 /// not require to compile the pattern with g_regex_new(), avoiding
673 /// some lines of code when you need just to do a split without
674 /// extracting substrings, capture counts, and so on.
675 ///
676 /// If this function is to be called on the same @pattern more than
677 /// once, it's more efficient to compile the pattern once with
678 /// g_regex_new() and then use g_regex_split().
679 ///
680 /// As a special case, the result of splitting the empty string ""
681 /// is an empty vector, not a vector containing a single string.
682 /// The reason for this special case is that being able to represent
683 /// an empty vector is typically more useful than consistent handling
684 /// of empty elements. If you do need to represent empty elements,
685 /// you'll need to check for the empty string before calling this
686 /// function.
687 ///
688 /// A pattern that can match empty strings splits @string into
689 /// separate characters wherever it matches the empty string between
690 /// characters. For example splitting "ab c" using as a separator
691 /// "\s*", you will get "a", "b" and "c".
692 /// ## `pattern`
693 /// the regular expression
694 /// ## `string`
695 /// the string to scan for matches
696 /// ## `compile_options`
697 /// compile options for the regular expression, or 0
698 /// ## `match_options`
699 /// match options, or 0
700 ///
701 /// # Returns
702 ///
703 /// a [`None`]-terminated array of strings. Free
704 /// it using g_strfreev()
705 #[doc(alias = "g_regex_split_simple")]
706 pub fn split_simple(
707 pattern: impl IntoGStr,
708 string: impl IntoGStr,
709 compile_options: RegexCompileFlags,
710 match_options: RegexMatchFlags,
711 ) -> PtrSlice<GStringPtr> {
712 pattern.run_with_gstr(|pattern| {
713 string.run_with_gstr(|string| unsafe {
714 FromGlibPtrContainer::from_glib_full(ffi::g_regex_split_simple(
715 pattern.to_glib_none().0,
716 string.to_glib_none().0,
717 compile_options.into_glib(),
718 match_options.into_glib(),
719 ))
720 })
721 })
722 }
723}
724
725#[cfg(test)]
726mod tests {
727 use super::*;
728 use crate::RegexCompileFlags;
729
730 #[test]
731 fn test_replace_literal() {
732 let regex = Regex::new(
733 "s[ai]mple",
734 RegexCompileFlags::OPTIMIZE,
735 RegexMatchFlags::DEFAULT,
736 )
737 .expect("Regex new")
738 .expect("Null regex");
739
740 let quote = "This is a simple sample.";
741 let result = regex
742 .replace_literal(quote, 0, "XXX", RegexMatchFlags::DEFAULT)
743 .expect("regex replace");
744
745 assert_eq!(result, "This is a XXX XXX.");
746 }
747
748 #[test]
749 fn test_split() {
750 let regex = Regex::new(
751 "s[ai]mple",
752 RegexCompileFlags::OPTIMIZE,
753 RegexMatchFlags::DEFAULT,
754 )
755 .expect("Regex new")
756 .expect("Null regex");
757
758 let quote = "This is a simple sample.";
759 let result = regex.split(quote, RegexMatchFlags::DEFAULT);
760
761 assert_eq!(result.len(), 3);
762 assert_eq!(result[0], "This is a ");
763 assert_eq!(result[1], " ");
764 assert_eq!(result[2], ".");
765 }
766
767 #[test]
768 fn test_match() {
769 let regex = glib::Regex::new(
770 r"\d",
771 glib::RegexCompileFlags::DEFAULT,
772 glib::RegexMatchFlags::DEFAULT,
773 )
774 .expect("Regex new")
775 .expect("Null regex");
776
777 // This works (matches)
778 let input = glib::GString::from("87");
779 let m = regex.match_(input.as_gstr(), glib::RegexMatchFlags::DEFAULT);
780 let m = m.unwrap();
781 assert!(m.matches());
782 assert_eq!(m.match_count(), 1);
783 assert_eq!(m.fetch(0).as_deref(), Some("8"));
784 assert!(m.next().unwrap());
785 assert_eq!(m.fetch(0).as_deref(), Some("7"));
786 assert!(!m.next().unwrap());
787 assert!(m.fetch(0).is_none());
788
789 let input = glib::GString::from("a");
790 let m = regex.match_(input.as_gstr(), glib::RegexMatchFlags::DEFAULT);
791 let m = m.unwrap();
792 assert!(!m.matches());
793 assert_eq!(m.match_count(), 0);
794 assert!(m.fetch(0).is_none());
795 }
796}