libgir/
xmlparser.rs

1use std::{
2    fmt,
3    fs::File,
4    io::{BufReader, Read},
5    path::{Path, PathBuf},
6    rc::Rc,
7    str,
8};
9
10use xml::{
11    self,
12    attribute::OwnedAttribute,
13    common::{Position, TextPosition},
14    name::OwnedName,
15    reader::{EventReader, XmlEvent},
16};
17
18/// NOTE: After parser returns an error its further behaviour is unspecified.
19pub struct XmlParser<'a> {
20    /// Inner XML parser doing actual work.
21    parser: EventReader<Box<dyn 'a + Read>>,
22    /// Next event to be returned.
23    ///
24    /// Takes priority over events returned from inner parser.
25    /// Used to support peaking one element ahead.
26    peek_event: Option<Result<XmlEvent, String>>,
27    /// Position on peek event if any.
28    peek_position: TextPosition,
29    /// Used to emits errors. Rc so that it can be cheaply shared with Element
30    /// type.
31    error_emitter: Rc<ErrorEmitter>,
32}
33
34struct ErrorEmitter {
35    /// Path to currently parsed document.
36    path: Option<PathBuf>,
37}
38
39impl ErrorEmitter {
40    pub fn emit(&self, message: &str, position: TextPosition) -> String {
41        let enriched = match self.path {
42            Some(ref path) => format!("{} at line {}: {}", path.display(), position, message),
43            None => format!("{position} {message}"),
44        };
45        format!("GirXml: {enriched}")
46    }
47
48    pub fn emit_error(&self, error: &xml::reader::Error) -> String {
49        // Error returned by EventReader already includes the position.
50        // That is why we have a separate implementation that only
51        // prepends the file path.
52        let enriched = match self.path {
53            Some(ref path) => format!("{}:{}", path.display(), error),
54            None => format!("{error}"),
55        };
56        format!("GirXml: {enriched}")
57    }
58}
59
60/// A wrapper for `XmlEvent::StartDocument` which doesn't have its own type.
61pub struct Document;
62
63/// A wrapper for `XmlEvent::StartElement` which doesn't have its own type.
64pub struct Element {
65    name: OwnedName,
66    attributes: Vec<OwnedAttribute>,
67    position: TextPosition,
68    error_emitter: Rc<ErrorEmitter>,
69}
70
71impl Element {
72    /// Returns the element local name.
73    pub fn name(&self) -> &str {
74        &self.name.local_name
75    }
76
77    /// Value of attribute with given name or None if it is not found.
78    pub fn attr(&self, name: &str) -> Option<&str> {
79        for attr in &self.attributes {
80            if attr.name.local_name == name {
81                return Some(&attr.value);
82            }
83        }
84        None
85    }
86
87    /// Checks if elements has any attributes.
88    pub fn has_attrs(&self) -> bool {
89        !self.attributes.is_empty()
90    }
91
92    pub fn attr_bool(&self, name: &str, default: bool) -> bool {
93        for attr in &self.attributes {
94            if attr.name.local_name == name {
95                return attr.value == "1";
96            }
97        }
98        default
99    }
100
101    pub fn attr_from_str<T>(&self, name: &str) -> Result<Option<T>, String>
102    where
103        T: str::FromStr,
104        T::Err: fmt::Display,
105    {
106        if let Some(value_str) = self.attr(name) {
107            match T::from_str(value_str) {
108                Ok(value) => Ok(Some(value)),
109                Err(error) => {
110                    let message = format!(
111                        "Attribute `{}` on element <{}> has invalid value: {}",
112                        name,
113                        self.name(),
114                        error
115                    );
116                    Err(self.error_emitter.emit(&message, self.position))
117                }
118            }
119        } else {
120            Ok(None)
121        }
122    }
123
124    /// Returns element position.
125    pub fn position(&self) -> TextPosition {
126        self.position
127    }
128
129    /// Value of attribute with given name or an error when absent.
130    pub fn attr_required(&self, name: &str) -> Result<&str, String> {
131        for attr in &self.attributes {
132            if attr.name.local_name == name {
133                return Ok(&attr.value);
134            }
135        }
136        let message = format!(
137            "Attribute `{}` on element <{}> is required.",
138            name,
139            self.name()
140        );
141        Err(self.error_emitter.emit(&message, self.position))
142    }
143}
144
145impl XmlParser<'_> {
146    pub fn from_path(path: &Path) -> Result<XmlParser<'_>, String> {
147        match File::open(path) {
148            Err(e) => Err(format!("Can't open file \"{}\": {}", path.display(), e)),
149            Ok(file) => Ok(XmlParser {
150                parser: EventReader::new(Box::new(BufReader::new(file))),
151                peek_event: None,
152                peek_position: TextPosition::new(),
153                error_emitter: Rc::new(ErrorEmitter {
154                    path: Some(path.to_owned()),
155                }),
156            }),
157        }
158    }
159
160    #[cfg(test)]
161    pub fn new<'r, R: 'r + Read>(read: R) -> XmlParser<'r> {
162        XmlParser {
163            parser: EventReader::new(Box::new(read)),
164            peek_event: None,
165            peek_position: TextPosition::new(),
166            error_emitter: Rc::new(ErrorEmitter { path: None }),
167        }
168    }
169
170    /// Returns an error that combines current position and given error message.
171    pub fn fail(&self, message: &str) -> String {
172        self.error_emitter.emit(message, self.position())
173    }
174
175    /// Returns an error that combines given error message and position.
176    pub fn fail_with_position(&self, message: &str, position: TextPosition) -> String {
177        self.error_emitter.emit(message, position)
178    }
179
180    pub fn unexpected_element(&self, elem: &Element) -> String {
181        let message = format!("Unexpected element <{}>", elem.name());
182        self.error_emitter.emit(&message, elem.position())
183    }
184
185    fn unexpected_event(&self, event: &XmlEvent) -> String {
186        let message = format!("Unexpected event {event:?}");
187        self.error_emitter.emit(&message, self.position())
188    }
189
190    pub fn position(&self) -> TextPosition {
191        match self.peek_event {
192            None => self.parser.position(),
193            Some(_) => self.peek_position,
194        }
195    }
196
197    /// Returns next XML event without consuming it.
198    fn peek_event(&mut self) -> &Result<XmlEvent, String> {
199        if self.peek_event.is_none() {
200            self.peek_event = Some(self.next_event_impl());
201            self.peek_position = self.parser.position();
202        }
203        self.peek_event.as_ref().unwrap()
204    }
205
206    /// Consumes and returns next XML event.
207    fn next_event(&mut self) -> Result<XmlEvent, String> {
208        match self.peek_event.take() {
209            None => self.next_event_impl(),
210            Some(e) => e,
211        }
212    }
213
214    /// Returns next XML event directly from parser.
215    fn next_event_impl(&mut self) -> Result<XmlEvent, String> {
216        loop {
217            match self.parser.next() {
218                // Ignore whitespace and comments by default.
219                Ok(XmlEvent::Whitespace(..) | XmlEvent::Comment(..)) => continue,
220                Ok(event) => return Ok(event),
221                Err(e) => return Err(self.error_emitter.emit_error(&e)),
222            }
223        }
224    }
225
226    pub fn document<R, F>(&mut self, f: F) -> Result<R, String>
227    where
228        F: FnOnce(&mut XmlParser<'_>, Document) -> Result<R, String>,
229    {
230        let doc = self.start_document()?;
231        let result = f(self, doc)?;
232        self.end_document()?;
233        Ok(result)
234    }
235
236    fn start_document(&mut self) -> Result<Document, String> {
237        match self.next_event()? {
238            XmlEvent::StartDocument { .. } => Ok(Document),
239            e => Err(self.unexpected_event(&e)),
240        }
241    }
242
243    fn end_document(&mut self) -> Result<(), String> {
244        match self.next_event()? {
245            XmlEvent::EndDocument { .. } => Ok(()),
246            e => Err(self.unexpected_event(&e)),
247        }
248    }
249
250    pub fn elements<R, F>(&mut self, mut f: F) -> Result<Vec<R>, String>
251    where
252        F: FnMut(&mut XmlParser<'_>, &Element) -> Result<R, String>,
253    {
254        let mut results = Vec::new();
255        loop {
256            match *self.peek_event() {
257                Ok(XmlEvent::StartElement { .. }) => {
258                    let element = self.start_element()?;
259                    results.push(f(self, &element)?);
260                    self.end_element()?;
261                }
262                _ => return Ok(results),
263            }
264        }
265    }
266
267    pub fn element_with_name<R, F>(&mut self, expected_name: &str, f: F) -> Result<R, String>
268    where
269        F: FnOnce(&mut XmlParser<'_>, &Element) -> Result<R, String>,
270    {
271        let elem = self.start_element()?;
272        if expected_name != elem.name.local_name {
273            return Err(self.unexpected_element(&elem));
274        }
275        let result = f(self, &elem)?;
276        self.end_element()?;
277        Ok(result)
278    }
279
280    fn start_element(&mut self) -> Result<Element, String> {
281        match self.next_event() {
282            Ok(XmlEvent::StartElement {
283                name, attributes, ..
284            }) => Ok(Element {
285                name,
286                attributes,
287                position: self.position(),
288                error_emitter: self.error_emitter.clone(),
289            }),
290            Ok(e) => Err(self.unexpected_event(&e)),
291            Err(e) => Err(e),
292        }
293    }
294
295    fn end_element(&mut self) -> Result<(), String> {
296        match self.next_event() {
297            Ok(XmlEvent::EndElement { .. }) => Ok(()),
298            Ok(e) => Err(self.unexpected_event(&e)),
299            Err(e) => Err(e),
300        }
301    }
302
303    pub fn text(&mut self) -> Result<String, String> {
304        let mut result = String::new();
305        loop {
306            match *self.peek_event() {
307                Ok(XmlEvent::Characters(..)) => {
308                    if let Ok(XmlEvent::Characters(s)) = self.next_event() {
309                        result.push_str(&s);
310                    }
311                }
312                Err(_) => {
313                    self.next_event()?;
314                    unreachable!();
315                }
316                _ if result.is_empty() => {
317                    return Err(self.fail("Expected text content"));
318                }
319                _ => break,
320            }
321        }
322        Ok(result)
323    }
324
325    /// Ignore everything within current element.
326    pub fn ignore_element(&mut self) -> Result<(), String> {
327        let mut depth = 1;
328        loop {
329            match *self.peek_event() {
330                Ok(XmlEvent::StartElement { .. }) => {
331                    // Ignore warning about unused result, we know event is OK.
332                    drop(self.next_event());
333                    depth += 1;
334                }
335                Ok(XmlEvent::EndElement { .. }) => {
336                    depth -= 1;
337                    if depth > 0 {
338                        drop(self.next_event());
339                    } else {
340                        return Ok(());
341                    }
342                }
343                Ok(_) => drop(self.next_event()),
344                Err(_) => return self.next_event().map(|_| ()),
345            }
346        }
347    }
348}
349
350#[cfg(test)]
351mod tests {
352
353    use super::*;
354
355    fn with_parser<F, R>(xml: &[u8], f: F) -> Result<R, String>
356    where
357        F: FnOnce(XmlParser<'_>) -> Result<R, String>,
358    {
359        f(XmlParser::new(xml))
360    }
361
362    #[test]
363    fn test_element_with_name() {
364        fn parse_with_root_name(xml: &[u8], root: &str) -> Result<(), String> {
365            with_parser(xml, |mut p| {
366                p.document(|p, _| p.element_with_name(root, |_, _elem| Ok(())))
367            })
368        }
369
370        let xml = br#"<?xml version="1.0"?>
371            <!-- a comment -->
372            <a>
373            </a>"#;
374
375        assert!(parse_with_root_name(xml, "a").is_ok());
376        assert!(parse_with_root_name(xml, "b").is_err());
377    }
378
379    #[test]
380    fn test_ignore_element() {
381        let xml = br#"<?xml version="1.0"?>
382            <a>
383                <b>
384                    <c/>
385                    <d/>
386                </b>
387                <b> some text content </b>
388            </a>"#;
389
390        with_parser(xml, |mut p| {
391            p.document(|p, _| p.element_with_name("a", |p, _| p.ignore_element()))
392        })
393        .unwrap();
394    }
395
396    #[test]
397    fn test_elements() {
398        let xml = br#"<?xml version="1.0"?>
399            <root>
400                <child name="a" />
401                <child name="b" />
402                <child name="c" />
403            </root>"#;
404
405        let result: String = with_parser(xml, |mut p| {
406            p.document(|p, _| {
407                p.element_with_name("root", |p, _| {
408                    p.elements(|_, elem| elem.attr_required("name").map(|s| s.to_owned()))
409                        .map(|v| v.join("."))
410                })
411            })
412        })
413        .unwrap();
414
415        assert_eq!("a.b.c", result);
416    }
417
418    #[test]
419    fn test_text() {
420        let xml = br#"<?xml version="1.0"?>
421            <x>hello world!</x>"#;
422
423        let result: String = with_parser(xml, |mut p| {
424            p.document(|p, _| p.element_with_name("x", |p, _| p.text()))
425        })
426        .unwrap();
427
428        assert_eq!("hello world!", &result);
429    }
430
431    #[test]
432    fn test_attr_required() {
433        let xml = br#"<?xml version="1.0"?>
434            <x a="1" b="2"></x>"#;
435
436        with_parser(xml, |mut p| {
437            p.document(|p, _| {
438                p.element_with_name("x", |_, elem| {
439                    assert!(elem.attr_required("a").is_ok());
440                    assert!(elem.attr_required("b").is_ok());
441                    assert!(elem.attr_required("c").is_err());
442                    assert!(elem.attr_required("d").is_err());
443                    Ok(())
444                })
445            })
446        })
447        .unwrap();
448    }
449
450    #[test]
451    fn test_attr_from_str() {
452        let xml = br#"<?xml version="1.0"?>
453            <x a="123" b="2a"></x>"#;
454
455        with_parser(xml, |mut p| {
456            p.document(|p, _| {
457                p.element_with_name("x", |_, elem| {
458                    assert_eq!(elem.attr_from_str::<usize>("a").unwrap(), Some(123));
459                    assert!(elem.attr_from_str::<usize>("b").is_err());
460                    Ok(())
461                })
462            })
463        })
464        .unwrap();
465    }
466}