glib/regex.rs
1// Take a look at the license at the top of the repository in the LICENSE file.
2
3// rustdoc-stripper-ignore-next
4//! This module is inefficient and should not be used by Rust programs except for
5//! compatibility with GLib.Regex based APIs.
6
7use crate::{
8 ffi, translate::*, GStr, GStringPtr, MatchInfo, PtrSlice, Regex, RegexCompileFlags,
9 RegexMatchFlags,
10};
11use std::{mem, ptr};
12
13impl Regex {
14 /// Retrieves the number of the subexpression named @name.
15 /// ## `name`
16 /// name of the subexpression
17 ///
18 /// # Returns
19 ///
20 /// The number of the subexpression or -1 if @name
21 /// does not exists
22 #[doc(alias = "g_regex_get_string_number")]
23 #[doc(alias = "get_string_number")]
24 pub fn string_number(&self, name: impl IntoGStr) -> i32 {
25 name.run_with_gstr(|name| unsafe {
26 ffi::g_regex_get_string_number(self.to_glib_none().0, name.to_glib_none().0)
27 })
28 }
29
30 /// Escapes the nul characters in @string to "\x00". It can be used
31 /// to compile a regex with embedded nul characters.
32 ///
33 /// For completeness, @length can be -1 for a nul-terminated string.
34 /// In this case the output string will be of course equal to @string.
35 /// ## `string`
36 /// the string to escape
37 /// ## `length`
38 /// the length of @string
39 ///
40 /// # Returns
41 ///
42 /// a newly-allocated escaped string
43 #[doc(alias = "g_regex_escape_nul")]
44 pub fn escape_nul(string: impl IntoGStr) -> crate::GString {
45 unsafe {
46 string.run_with_gstr(|string| {
47 from_glib_full(ffi::g_regex_escape_nul(
48 string.to_glib_none().0,
49 string.len() as _,
50 ))
51 })
52 }
53 }
54
55 /// Escapes the special characters used for regular expressions
56 /// in @string, for instance "a.b*c" becomes "a\.b\*c". This
57 /// function is useful to dynamically generate regular expressions.
58 ///
59 /// @string can contain nul characters that are replaced with "\0",
60 /// in this case remember to specify the correct length of @string
61 /// in @length.
62 /// ## `string`
63 /// the string to escape
64 /// ## `length`
65 /// the length of @string, in bytes, or -1 if @string is nul-terminated
66 ///
67 /// # Returns
68 ///
69 /// a newly-allocated escaped string
70 #[doc(alias = "g_regex_escape_string")]
71 pub fn escape_string(string: impl IntoGStr) -> crate::GString {
72 unsafe {
73 string.run_with_gstr(|string| {
74 from_glib_full(ffi::g_regex_escape_string(
75 string.to_glib_none().0,
76 string.len() as _,
77 ))
78 })
79 }
80 }
81
82 /// Checks whether @replacement is a valid replacement string
83 /// (see g_regex_replace()), i.e. that all escape sequences in
84 /// it are valid.
85 ///
86 /// If @has_references is not [`None`] then @replacement is checked
87 /// for pattern references. For instance, replacement text 'foo\n'
88 /// does not contain references and may be evaluated without information
89 /// about actual match, but '\0\1' (whole match followed by first
90 /// subpattern) requires valid #GMatchInfo object.
91 /// ## `replacement`
92 /// the replacement string
93 ///
94 /// # Returns
95 ///
96 /// whether @replacement is a valid replacement string
97 ///
98 /// ## `has_references`
99 /// location to store information about
100 /// references in @replacement or [`None`]
101 #[doc(alias = "g_regex_check_replacement")]
102 pub fn check_replacement(replacement: impl IntoGStr) -> Result<bool, crate::Error> {
103 replacement.run_with_gstr(|replacement| unsafe {
104 let mut has_references = mem::MaybeUninit::uninit();
105 let mut error = ptr::null_mut();
106 let is_ok = ffi::g_regex_check_replacement(
107 replacement.to_glib_none().0,
108 has_references.as_mut_ptr(),
109 &mut error,
110 );
111 debug_assert_eq!(is_ok == crate::ffi::GFALSE, !error.is_null());
112 if error.is_null() {
113 Ok(from_glib(has_references.assume_init()))
114 } else {
115 Err(from_glib_full(error))
116 }
117 })
118 }
119
120 /// Scans for a match in @string for @pattern.
121 ///
122 /// This function is equivalent to g_regex_match() but it does not
123 /// require to compile the pattern with g_regex_new(), avoiding some
124 /// lines of code when you need just to do a match without extracting
125 /// substrings, capture counts, and so on.
126 ///
127 /// If this function is to be called on the same @pattern more than
128 /// once, it's more efficient to compile the pattern once with
129 /// g_regex_new() and then use g_regex_match().
130 /// ## `pattern`
131 /// the regular expression
132 /// ## `string`
133 /// the string to scan for matches
134 /// ## `compile_options`
135 /// compile options for the regular expression, or 0
136 /// ## `match_options`
137 /// match options, or 0
138 ///
139 /// # Returns
140 ///
141 /// [`true`] if the string matched, [`false`] otherwise
142 #[doc(alias = "g_regex_match_simple")]
143 pub fn match_simple(
144 pattern: impl IntoGStr,
145 string: impl IntoGStr,
146 compile_options: RegexCompileFlags,
147 match_options: RegexMatchFlags,
148 ) -> bool {
149 pattern.run_with_gstr(|pattern| {
150 string.run_with_gstr(|string| unsafe {
151 from_glib(ffi::g_regex_match_simple(
152 pattern.to_glib_none().0,
153 string.to_glib_none().0,
154 compile_options.into_glib(),
155 match_options.into_glib(),
156 ))
157 })
158 })
159 }
160
161 /// Replaces all occurrences of the pattern in @self with the
162 /// replacement text. Backreferences of the form `\number` or
163 /// `\g<number>` in the replacement text are interpolated by the
164 /// number-th captured subexpression of the match, `\g<name>` refers
165 /// to the captured subexpression with the given name. `\0` refers
166 /// to the complete match, but `\0` followed by a number is the octal
167 /// representation of a character. To include a literal `\` in the
168 /// replacement, write `\\\\`.
169 ///
170 /// There are also escapes that changes the case of the following text:
171 ///
172 /// - \l: Convert to lower case the next character
173 /// - \u: Convert to upper case the next character
174 /// - \L: Convert to lower case till \E
175 /// - \U: Convert to upper case till \E
176 /// - \E: End case modification
177 ///
178 /// If you do not need to use backreferences use g_regex_replace_literal().
179 ///
180 /// The @replacement string must be UTF-8 encoded even if [`RegexCompileFlags::RAW`][crate::RegexCompileFlags::RAW] was
181 /// passed to g_regex_new(). If you want to use not UTF-8 encoded strings
182 /// you can use g_regex_replace_literal().
183 ///
184 /// Setting @start_position differs from just passing over a shortened
185 /// string and setting [`RegexMatchFlags::NOTBOL`][crate::RegexMatchFlags::NOTBOL] in the case of a pattern that
186 /// begins with any kind of lookbehind assertion, such as "\b".
187 /// ## `string`
188 /// the string to perform matches against
189 /// ## `start_position`
190 /// starting index of the string to match, in bytes
191 /// ## `replacement`
192 /// text to replace each match with
193 /// ## `match_options`
194 /// options for the match
195 ///
196 /// # Returns
197 ///
198 /// a newly allocated string containing the replacements
199 #[doc(alias = "g_regex_replace")]
200 pub fn replace(
201 &self,
202 string: impl IntoGStr,
203 start_position: i32,
204 replacement: impl IntoGStr,
205 match_options: RegexMatchFlags,
206 ) -> Result<crate::GString, crate::Error> {
207 unsafe {
208 string.run_with_gstr(|string| {
209 replacement.run_with_gstr(|replacement| {
210 let mut error = ptr::null_mut();
211 let ret = ffi::g_regex_replace(
212 self.to_glib_none().0,
213 string.as_ptr() as *const _,
214 string.len() as _,
215 start_position,
216 replacement.to_glib_none().0,
217 match_options.into_glib(),
218 &mut error,
219 );
220 if error.is_null() {
221 Ok(from_glib_full(ret))
222 } else {
223 Err(from_glib_full(error))
224 }
225 })
226 })
227 }
228 }
229
230 /// Using the standard algorithm for regular expression matching only
231 /// the longest match in the string is retrieved. This function uses
232 /// a different algorithm so it can retrieve all the possible matches.
233 /// For more documentation see g_regex_match_all_full().
234 ///
235 /// A #GMatchInfo structure, used to get information on the match, is
236 /// stored in @match_info if not [`None`]. Note that if @match_info is
237 /// not [`None`] then it is created even if the function returns [`false`],
238 /// i.e. you must free it regardless if regular expression actually
239 /// matched.
240 ///
241 /// @string is not copied and is used in #GMatchInfo internally. If
242 /// you use any #GMatchInfo method (except g_match_info_free()) after
243 /// freeing or modifying @string then the behaviour is undefined.
244 /// ## `string`
245 /// the string to scan for matches
246 /// ## `match_options`
247 /// match options
248 ///
249 /// # Returns
250 ///
251 /// [`true`] is the string matched, [`false`] otherwise
252 ///
253 /// ## `match_info`
254 /// pointer to location where to store
255 /// the #GMatchInfo, or [`None`] if you do not need it
256 #[doc(alias = "g_regex_match_all")]
257 pub fn match_all<'input>(
258 &self,
259 string: &'input GStr,
260 match_options: RegexMatchFlags,
261 ) -> Option<MatchInfo<'input>> {
262 self.match_all_full(string, 0, match_options).ok()
263 }
264
265 /// Using the standard algorithm for regular expression matching only
266 /// the longest match in the @string is retrieved, it is not possible
267 /// to obtain all the available matches. For instance matching
268 /// `"<a> <b> <c>"` against the pattern `"<.*>"`
269 /// you get `"<a> <b> <c>"`.
270 ///
271 /// This function uses a different algorithm (called DFA, i.e. deterministic
272 /// finite automaton), so it can retrieve all the possible matches, all
273 /// starting at the same point in the string. For instance matching
274 /// `"<a> <b> <c>"` against the pattern `"<.*>"`
275 /// you would obtain three matches: `"<a> <b> <c>"`,
276 /// `"<a> <b>"` and `"<a>"`.
277 ///
278 /// The number of matched strings is retrieved using
279 /// g_match_info_get_match_count(). To obtain the matched strings and
280 /// their position you can use, respectively, g_match_info_fetch() and
281 /// g_match_info_fetch_pos(). Note that the strings are returned in
282 /// reverse order of length; that is, the longest matching string is
283 /// given first.
284 ///
285 /// Note that the DFA algorithm is slower than the standard one and it
286 /// is not able to capture substrings, so backreferences do not work.
287 ///
288 /// Setting @start_position differs from just passing over a shortened
289 /// string and setting [`RegexMatchFlags::NOTBOL`][crate::RegexMatchFlags::NOTBOL] in the case of a pattern
290 /// that begins with any kind of lookbehind assertion, such as "\b".
291 ///
292 /// Unless [`RegexCompileFlags::RAW`][crate::RegexCompileFlags::RAW] is specified in the options, @string must be valid UTF-8.
293 ///
294 /// A #GMatchInfo structure, used to get information on the match, is
295 /// stored in @match_info if not [`None`]. Note that if @match_info is
296 /// not [`None`] then it is created even if the function returns [`false`],
297 /// i.e. you must free it regardless if regular expression actually
298 /// matched.
299 ///
300 /// @string is not copied and is used in #GMatchInfo internally. If
301 /// you use any #GMatchInfo method (except g_match_info_free()) after
302 /// freeing or modifying @string then the behaviour is undefined.
303 /// ## `string`
304 /// the string to scan for matches
305 /// ## `start_position`
306 /// starting index of the string to match, in bytes
307 /// ## `match_options`
308 /// match options
309 ///
310 /// # Returns
311 ///
312 /// [`true`] is the string matched, [`false`] otherwise
313 ///
314 /// ## `match_info`
315 /// pointer to location where to store
316 /// the #GMatchInfo, or [`None`] if you do not need it
317 #[doc(alias = "g_regex_match_all_full")]
318 pub fn match_all_full<'input>(
319 &self,
320 string: &'input GStr,
321 start_position: i32,
322 match_options: RegexMatchFlags,
323 ) -> Result<MatchInfo<'input>, crate::Error> {
324 unsafe {
325 let mut match_info = ptr::null_mut();
326 let mut error = ptr::null_mut();
327 let is_ok = ffi::g_regex_match_all_full(
328 self.to_glib_none().0,
329 string.to_glib_none().0,
330 string.len() as _,
331 start_position,
332 match_options.into_glib(),
333 &mut match_info,
334 &mut error,
335 );
336 debug_assert_eq!(is_ok == crate::ffi::GFALSE, !error.is_null());
337 if error.is_null() {
338 Ok(from_glib_full(match_info))
339 } else {
340 Err(from_glib_full(error))
341 }
342 }
343 }
344
345 /// Scans for a match in @string for the pattern in @self.
346 /// The @match_options are combined with the match options specified
347 /// when the @self structure was created, letting you have more
348 /// flexibility in reusing #GRegex structures.
349 ///
350 /// Unless [`RegexCompileFlags::RAW`][crate::RegexCompileFlags::RAW] is specified in the options, @string must be valid UTF-8.
351 ///
352 /// A #GMatchInfo structure, used to get information on the match,
353 /// is stored in @match_info if not [`None`]. Note that if @match_info
354 /// is not [`None`] then it is created even if the function returns [`false`],
355 /// i.e. you must free it regardless if regular expression actually matched.
356 ///
357 /// To retrieve all the non-overlapping matches of the pattern in
358 /// string you can use g_match_info_next().
359 ///
360 ///
361 ///
362 /// **⚠️ The following code is in C ⚠️**
363 ///
364 /// ```C
365 /// static void
366 /// print_uppercase_words (const gchar *string)
367 /// {
368 /// // Print all uppercase-only words.
369 /// GRegex *regex;
370 /// GMatchInfo *match_info;
371 ///
372 /// regex = g_regex_new ("[A-Z]+", G_REGEX_DEFAULT, G_REGEX_MATCH_DEFAULT, NULL);
373 /// g_regex_match (regex, string, 0, &match_info);
374 /// while (g_match_info_matches (match_info))
375 /// {
376 /// gchar *word = g_match_info_fetch (match_info, 0);
377 /// g_print ("Found: %s\n", word);
378 /// g_free (word);
379 /// g_match_info_next (match_info, NULL);
380 /// }
381 /// g_match_info_free (match_info);
382 /// g_regex_unref (regex);
383 /// }
384 /// ```
385 ///
386 /// @string is not copied and is used in #GMatchInfo internally. If
387 /// you use any #GMatchInfo method (except g_match_info_free()) after
388 /// freeing or modifying @string then the behaviour is undefined.
389 /// ## `string`
390 /// the string to scan for matches
391 /// ## `match_options`
392 /// match options
393 ///
394 /// # Returns
395 ///
396 /// [`true`] is the string matched, [`false`] otherwise
397 ///
398 /// ## `match_info`
399 /// pointer to location where to store
400 /// the #GMatchInfo, or [`None`] if you do not need it
401 #[doc(alias = "g_regex_match")]
402 pub fn match_<'input>(
403 &self,
404 string: &'input GStr,
405 match_options: RegexMatchFlags,
406 ) -> Option<MatchInfo<'input>> {
407 self.match_full(string, 0, match_options).ok()
408 }
409
410 /// Scans for a match in @string for the pattern in @self.
411 /// The @match_options are combined with the match options specified
412 /// when the @self structure was created, letting you have more
413 /// flexibility in reusing #GRegex structures.
414 ///
415 /// Setting @start_position differs from just passing over a shortened
416 /// string and setting [`RegexMatchFlags::NOTBOL`][crate::RegexMatchFlags::NOTBOL] in the case of a pattern
417 /// that begins with any kind of lookbehind assertion, such as "\b".
418 ///
419 /// Unless [`RegexCompileFlags::RAW`][crate::RegexCompileFlags::RAW] is specified in the options, @string must be valid UTF-8.
420 ///
421 /// A #GMatchInfo structure, used to get information on the match, is
422 /// stored in @match_info if not [`None`]. Note that if @match_info is
423 /// not [`None`] then it is created even if the function returns [`false`],
424 /// i.e. you must free it regardless if regular expression actually
425 /// matched.
426 ///
427 /// @string is not copied and is used in #GMatchInfo internally. If
428 /// you use any #GMatchInfo method (except g_match_info_free()) after
429 /// freeing or modifying @string then the behaviour is undefined.
430 ///
431 /// To retrieve all the non-overlapping matches of the pattern in
432 /// string you can use g_match_info_next().
433 ///
434 ///
435 ///
436 /// **⚠️ The following code is in C ⚠️**
437 ///
438 /// ```C
439 /// static void
440 /// print_uppercase_words (const gchar *string)
441 /// {
442 /// // Print all uppercase-only words.
443 /// GRegex *regex;
444 /// GMatchInfo *match_info;
445 /// GError *error = NULL;
446 ///
447 /// regex = g_regex_new ("[A-Z]+", G_REGEX_DEFAULT, G_REGEX_MATCH_DEFAULT, NULL);
448 /// g_regex_match_full (regex, string, -1, 0, 0, &match_info, &error);
449 /// while (g_match_info_matches (match_info))
450 /// {
451 /// gchar *word = g_match_info_fetch (match_info, 0);
452 /// g_print ("Found: %s\n", word);
453 /// g_free (word);
454 /// g_match_info_next (match_info, &error);
455 /// }
456 /// g_match_info_free (match_info);
457 /// g_regex_unref (regex);
458 /// if (error != NULL)
459 /// {
460 /// g_printerr ("Error while matching: %s\n", error->message);
461 /// g_error_free (error);
462 /// }
463 /// }
464 /// ```
465 /// ## `string`
466 /// the string to scan for matches
467 /// ## `start_position`
468 /// starting index of the string to match, in bytes
469 /// ## `match_options`
470 /// match options
471 ///
472 /// # Returns
473 ///
474 /// [`true`] is the string matched, [`false`] otherwise
475 ///
476 /// ## `match_info`
477 /// pointer to location where to store
478 /// the #GMatchInfo, or [`None`] if you do not need it
479 #[doc(alias = "g_regex_match_full")]
480 pub fn match_full<'input>(
481 &self,
482 string: &'input GStr,
483 start_position: i32,
484 match_options: RegexMatchFlags,
485 ) -> Result<MatchInfo<'input>, crate::Error> {
486 unsafe {
487 let mut match_info = ptr::null_mut();
488 let mut error = ptr::null_mut();
489 let is_ok = ffi::g_regex_match_full(
490 self.to_glib_none().0,
491 string.to_glib_none().0,
492 string.len() as _,
493 start_position,
494 match_options.into_glib(),
495 &mut match_info,
496 &mut error,
497 );
498 debug_assert_eq!(is_ok == crate::ffi::GFALSE, !error.is_null());
499 if error.is_null() {
500 Ok(from_glib_full(match_info))
501 } else {
502 Err(from_glib_full(error))
503 }
504 }
505 }
506
507 /// Replaces all occurrences of the pattern in @self with the
508 /// replacement text. @replacement is replaced literally, to
509 /// include backreferences use g_regex_replace().
510 ///
511 /// Setting @start_position differs from just passing over a
512 /// shortened string and setting [`RegexMatchFlags::NOTBOL`][crate::RegexMatchFlags::NOTBOL] in the
513 /// case of a pattern that begins with any kind of lookbehind
514 /// assertion, such as "\b".
515 /// ## `string`
516 /// the string to perform matches against
517 /// ## `start_position`
518 /// starting index of the string to match, in bytes
519 /// ## `replacement`
520 /// text to replace each match with
521 /// ## `match_options`
522 /// options for the match
523 ///
524 /// # Returns
525 ///
526 /// a newly allocated string containing the replacements
527 #[doc(alias = "g_regex_replace_literal")]
528 pub fn replace_literal(
529 &self,
530 string: impl IntoGStr,
531 start_position: i32,
532 replacement: impl IntoGStr,
533 match_options: RegexMatchFlags,
534 ) -> Result<crate::GString, crate::Error> {
535 unsafe {
536 string.run_with_gstr(|string| {
537 replacement.run_with_gstr(|replacement| {
538 let mut error = ptr::null_mut();
539 let ret = ffi::g_regex_replace_literal(
540 self.to_glib_none().0,
541 string.to_glib_none().0,
542 string.len() as _,
543 start_position,
544 replacement.to_glib_none().0,
545 match_options.into_glib(),
546 &mut error,
547 );
548 if error.is_null() {
549 Ok(from_glib_full(ret))
550 } else {
551 Err(from_glib_full(error))
552 }
553 })
554 })
555 }
556 }
557
558 /// Breaks the string on the pattern, and returns an array of the tokens.
559 /// If the pattern contains capturing parentheses, then the text for each
560 /// of the substrings will also be returned. If the pattern does not match
561 /// anywhere in the string, then the whole string is returned as the first
562 /// token.
563 ///
564 /// As a special case, the result of splitting the empty string "" is an
565 /// empty vector, not a vector containing a single string. The reason for
566 /// this special case is that being able to represent an empty vector is
567 /// typically more useful than consistent handling of empty elements. If
568 /// you do need to represent empty elements, you'll need to check for the
569 /// empty string before calling this function.
570 ///
571 /// A pattern that can match empty strings splits @string into separate
572 /// characters wherever it matches the empty string between characters.
573 /// For example splitting "ab c" using as a separator "\s*", you will get
574 /// "a", "b" and "c".
575 /// ## `string`
576 /// the string to split with the pattern
577 /// ## `match_options`
578 /// match time option flags
579 ///
580 /// # Returns
581 ///
582 /// a [`None`]-terminated gchar ** array. Free
583 /// it using g_strfreev()
584 #[doc(alias = "g_regex_split")]
585 pub fn split(
586 &self,
587 string: impl IntoGStr,
588 match_options: RegexMatchFlags,
589 ) -> PtrSlice<GStringPtr> {
590 self.split_full(string, 0, match_options, 0)
591 .unwrap_or_default()
592 }
593
594 /// Breaks the string on the pattern, and returns an array of the tokens.
595 /// If the pattern contains capturing parentheses, then the text for each
596 /// of the substrings will also be returned. If the pattern does not match
597 /// anywhere in the string, then the whole string is returned as the first
598 /// token.
599 ///
600 /// As a special case, the result of splitting the empty string "" is an
601 /// empty vector, not a vector containing a single string. The reason for
602 /// this special case is that being able to represent an empty vector is
603 /// typically more useful than consistent handling of empty elements. If
604 /// you do need to represent empty elements, you'll need to check for the
605 /// empty string before calling this function.
606 ///
607 /// A pattern that can match empty strings splits @string into separate
608 /// characters wherever it matches the empty string between characters.
609 /// For example splitting "ab c" using as a separator "\s*", you will get
610 /// "a", "b" and "c".
611 ///
612 /// Setting @start_position differs from just passing over a shortened
613 /// string and setting [`RegexMatchFlags::NOTBOL`][crate::RegexMatchFlags::NOTBOL] in the case of a pattern
614 /// that begins with any kind of lookbehind assertion, such as "\b".
615 /// ## `string`
616 /// the string to split with the pattern
617 /// ## `start_position`
618 /// starting index of the string to match, in bytes
619 /// ## `match_options`
620 /// match time option flags
621 /// ## `max_tokens`
622 /// the maximum number of tokens to split @string into.
623 /// If this is less than 1, the string is split completely
624 ///
625 /// # Returns
626 ///
627 /// a [`None`]-terminated gchar ** array. Free
628 /// it using g_strfreev()
629 #[doc(alias = "g_regex_split_full")]
630 pub fn split_full(
631 &self,
632 string: impl IntoGStr,
633 start_position: i32,
634 match_options: RegexMatchFlags,
635 max_tokens: i32,
636 ) -> Result<PtrSlice<GStringPtr>, crate::Error> {
637 unsafe {
638 let mut error = ptr::null_mut();
639 string.run_with_gstr(|string| {
640 let ret = ffi::g_regex_split_full(
641 self.to_glib_none().0,
642 string.to_glib_none().0,
643 string.len() as _,
644 start_position,
645 match_options.into_glib(),
646 max_tokens,
647 &mut error,
648 );
649 if error.is_null() {
650 Ok(FromGlibPtrContainer::from_glib_full(ret))
651 } else {
652 Err(from_glib_full(error))
653 }
654 })
655 }
656 }
657
658 /// Breaks the string on the pattern, and returns an array of
659 /// the tokens. If the pattern contains capturing parentheses,
660 /// then the text for each of the substrings will also be returned.
661 /// If the pattern does not match anywhere in the string, then the
662 /// whole string is returned as the first token.
663 ///
664 /// This function is equivalent to g_regex_split() but it does
665 /// not require to compile the pattern with g_regex_new(), avoiding
666 /// some lines of code when you need just to do a split without
667 /// extracting substrings, capture counts, and so on.
668 ///
669 /// If this function is to be called on the same @pattern more than
670 /// once, it's more efficient to compile the pattern once with
671 /// g_regex_new() and then use g_regex_split().
672 ///
673 /// As a special case, the result of splitting the empty string ""
674 /// is an empty vector, not a vector containing a single string.
675 /// The reason for this special case is that being able to represent
676 /// an empty vector is typically more useful than consistent handling
677 /// of empty elements. If you do need to represent empty elements,
678 /// you'll need to check for the empty string before calling this
679 /// function.
680 ///
681 /// A pattern that can match empty strings splits @string into
682 /// separate characters wherever it matches the empty string between
683 /// characters. For example splitting "ab c" using as a separator
684 /// "\s*", you will get "a", "b" and "c".
685 /// ## `pattern`
686 /// the regular expression
687 /// ## `string`
688 /// the string to scan for matches
689 /// ## `compile_options`
690 /// compile options for the regular expression, or 0
691 /// ## `match_options`
692 /// match options, or 0
693 ///
694 /// # Returns
695 ///
696 /// a [`None`]-terminated array of strings. Free
697 /// it using g_strfreev()
698 #[doc(alias = "g_regex_split_simple")]
699 pub fn split_simple(
700 pattern: impl IntoGStr,
701 string: impl IntoGStr,
702 compile_options: RegexCompileFlags,
703 match_options: RegexMatchFlags,
704 ) -> PtrSlice<GStringPtr> {
705 pattern.run_with_gstr(|pattern| {
706 string.run_with_gstr(|string| unsafe {
707 FromGlibPtrContainer::from_glib_full(ffi::g_regex_split_simple(
708 pattern.to_glib_none().0,
709 string.to_glib_none().0,
710 compile_options.into_glib(),
711 match_options.into_glib(),
712 ))
713 })
714 })
715 }
716}
717
718#[cfg(test)]
719mod tests {
720 use super::*;
721 use crate::RegexCompileFlags;
722
723 #[test]
724 fn test_replace_literal() {
725 let regex = Regex::new(
726 "s[ai]mple",
727 RegexCompileFlags::OPTIMIZE,
728 RegexMatchFlags::DEFAULT,
729 )
730 .expect("Regex new")
731 .expect("Null regex");
732
733 let quote = "This is a simple sample.";
734 let result = regex
735 .replace_literal(quote, 0, "XXX", RegexMatchFlags::DEFAULT)
736 .expect("regex replace");
737
738 assert_eq!(result, "This is a XXX XXX.");
739 }
740
741 #[test]
742 fn test_split() {
743 let regex = Regex::new(
744 "s[ai]mple",
745 RegexCompileFlags::OPTIMIZE,
746 RegexMatchFlags::DEFAULT,
747 )
748 .expect("Regex new")
749 .expect("Null regex");
750
751 let quote = "This is a simple sample.";
752 let result = regex.split(quote, RegexMatchFlags::DEFAULT);
753
754 assert_eq!(result.len(), 3);
755 assert_eq!(result[0], "This is a ");
756 assert_eq!(result[1], " ");
757 assert_eq!(result[2], ".");
758 }
759}