1
//! The main XML parser.
2

            
3
use encoding_rs::Encoding;
4
use gio::{
5
    prelude::BufferedInputStreamExt, BufferedInputStream, Cancellable, ConverterInputStream,
6
    InputStream, ZlibCompressorFormat, ZlibDecompressor,
7
};
8
use glib::object::Cast;
9
use markup5ever::{
10
    expanded_name, local_name, namespace_url, ns, ExpandedName, LocalName, Namespace, QualName,
11
};
12
use std::cell::RefCell;
13
use std::collections::HashMap;
14
use std::rc::Rc;
15
use std::str;
16
use std::string::ToString;
17
use std::sync::Arc;
18
use xml5ever::buffer_queue::BufferQueue;
19
use xml5ever::tendril::format_tendril;
20
use xml5ever::tokenizer::{TagKind, Token, TokenSink, XmlTokenizer, XmlTokenizerOpts};
21

            
22
use crate::borrow_element_as;
23
use crate::css::{Origin, Stylesheet};
24
use crate::document::{Document, DocumentBuilder, LoadOptions};
25
use crate::error::{ImplementationLimit, LoadingError};
26
use crate::io::{self, IoError};
27
use crate::limits::{MAX_LOADED_ELEMENTS, MAX_XINCLUDE_DEPTH};
28
use crate::node::{Node, NodeBorrow};
29
use crate::rsvg_log;
30
use crate::session::Session;
31
use crate::style::StyleType;
32
use crate::url_resolver::AllowedUrl;
33

            
34
use xml2_load::Xml2Parser;
35

            
36
mod attributes;
37
mod xml2;
38
mod xml2_load;
39

            
40
use xml2::xmlEntityPtr;
41

            
42
pub use attributes::Attributes;
43

            
44
3087159
#[derive(Clone)]
45
enum Context {
46
    // Starting state
47
    Start,
48

            
49
    // Creating nodes for elements under the current node
50
    ElementCreation,
51

            
52
    // Inside <style>; accumulate text to include in a stylesheet
53
    Style,
54

            
55
    // An unsupported element inside a `<style>` element, to be ignored
56
    UnsupportedStyleChild,
57

            
58
    // Inside <xi:include>
59
12
    XInclude(XIncludeContext),
60

            
61
    // An unsupported element inside a <xi:include> context, to be ignored
62
    UnsupportedXIncludeChild,
63

            
64
    // Insie <xi::fallback>
65
4
    XIncludeFallback(XIncludeContext),
66

            
67
    // An XML parsing error was found.  We will no-op upon any further XML events.
68
181
    FatalError(LoadingError),
69
}
70

            
71
36
#[derive(Clone)]
72
struct XIncludeContext {
73
18
    need_fallback: bool,
74
}
75

            
76
extern "C" {
77
    // The original function takes an xmlNodePtr, but that is compatible
78
    // with xmlEntityPtr for the purposes of this function.
79
    fn xmlFreeNode(node: xmlEntityPtr);
80
}
81

            
82
/// This is to hold an xmlEntityPtr from libxml2; we just hold an opaque pointer
83
/// that is freed in impl Drop.
84
struct XmlEntity(xmlEntityPtr);
85

            
86
impl Drop for XmlEntity {
87
7
    fn drop(&mut self) {
88
        unsafe {
89
            // Even though we are freeing an xmlEntityPtr, historically the code has always
90
            // used xmlFreeNode() because that function actually does allow freeing entities.
91
            //
92
            // See https://gitlab.gnome.org/GNOME/libxml2/-/issues/731
93
            // for a possible memory leak on older versions of libxml2 when using
94
            // xmlFreeNode() instead of xmlFreeEntity() - the latter just became public
95
            // in librsvg-2.12.0.
96
7
            xmlFreeNode(self.0);
97
        }
98
7
    }
99
}
100

            
101
// Creates an ExpandedName from the XInclude namespace and a local_name
102
//
103
// The markup5ever crate doesn't have built-in namespaces for XInclude,
104
// so we make our own.
105
macro_rules! xinclude_name {
106
    ($local_name:expr) => {
107
1024759
        ExpandedName {
108
1024745
            ns: &Namespace::from("http://www.w3.org/2001/XInclude"),
109
1024782
            local: &LocalName::from($local_name),
110
        }
111
    };
112
}
113

            
114
/// Holds the state used for XML processing
115
///
116
/// These methods are called when an XML event is parsed out of the XML stream: `start_element`,
117
/// `end_element`, `characters`.
118
///
119
/// When an element starts, we push a corresponding `Context` into the `context_stack`.  Within
120
/// that context, all XML events will be forwarded to it, and processed in one of the `XmlHandler`
121
/// trait objects. Normally the context refers to a `NodeCreationContext` implementation which is
122
/// what creates normal graphical elements.
123
struct XmlStateInner {
124
    document_builder: DocumentBuilder,
125
    num_loaded_elements: usize,
126
    xinclude_depth: usize,
127
    context_stack: Vec<Context>,
128
    current_node: Option<Node>,
129

            
130
    // Note that neither XmlStateInner nor Xmlstate implement Drop.
131
    //
132
    // An XmlState is finally consumed in XmlState::build_document(), and that
133
    // function is responsible for freeing all the XmlEntityPtr from this field.
134
    //
135
    // (The structs cannot impl Drop because build_document()
136
    // destructures and consumes them at the same time.)
137
    entities: HashMap<String, XmlEntity>,
138
}
139

            
140
pub struct XmlState {
141
    inner: RefCell<XmlStateInner>,
142

            
143
    session: Session,
144
    load_options: Arc<LoadOptions>,
145
}
146

            
147
/// Errors returned from XmlState::acquire()
148
///
149
/// These follow the terminology from <https://www.w3.org/TR/xinclude/#terminology>
150
enum AcquireError {
151
    /// Resource could not be acquired (file not found), or I/O error.
152
    /// In this case, the `xi:fallback` can be used if present.
153
    ResourceError,
154

            
155
    /// Resource could not be parsed/decoded
156
    FatalError(String),
157
}
158

            
159
impl XmlStateInner {
160
3087171
    fn context(&self) -> Context {
161
        // We can unwrap since the stack is never empty
162
3087171
        self.context_stack.last().unwrap().clone()
163
3087171
    }
164
}
165

            
166
impl XmlState {
167
1110
    fn new(
168
        session: Session,
169
        document_builder: DocumentBuilder,
170
        load_options: Arc<LoadOptions>,
171
    ) -> XmlState {
172
1110
        XmlState {
173
1110
            inner: RefCell::new(XmlStateInner {
174
1138
                document_builder,
175
                num_loaded_elements: 0,
176
                xinclude_depth: 0,
177
1110
                context_stack: vec![Context::Start],
178
1110
                current_node: None,
179
1110
                entities: HashMap::new(),
180
            }),
181

            
182
1110
            session,
183
1110
            load_options,
184
        }
185
1110
    }
186

            
187
1164
    fn check_last_error(&self) -> Result<(), LoadingError> {
188
1164
        let inner = self.inner.borrow();
189

            
190
1164
        match inner.context() {
191
48
            Context::FatalError(e) => Err(e),
192
1116
            _ => Ok(()),
193
        }
194
1164
    }
195

            
196
1024764
    fn check_limits(&self) -> Result<(), ()> {
197
1024764
        if self.inner.borrow().num_loaded_elements > MAX_LOADED_ELEMENTS {
198
1
            self.error(LoadingError::LimitExceeded(
199
1
                ImplementationLimit::TooManyLoadedElements,
200
            ));
201
1
            Err(())
202
        } else {
203
1024763
            Ok(())
204
        }
205
1024764
    }
206

            
207
1024863
    pub fn start_element(&self, name: QualName, attrs: Attributes) -> Result<(), ()> {
208
1024863
        self.check_limits()?;
209

            
210
1024758
        let context = self.inner.borrow().context();
211

            
212
1024747
        if let Context::FatalError(_) = context {
213
7
            return Err(());
214
        }
215

            
216
1024744
        self.inner.borrow_mut().num_loaded_elements += 1;
217

            
218
1024751
        let new_context = match context {
219
1113
            Context::Start => self.element_creation_start_element(&name, attrs),
220
1023636
            Context::ElementCreation => self.element_creation_start_element(&name, attrs),
221

            
222
            Context::Style => self.inside_style_start_element(&name),
223
            Context::UnsupportedStyleChild => self.unsupported_style_start_element(&name),
224

            
225
2
            Context::XInclude(ref ctx) => self.inside_xinclude_start_element(ctx, &name),
226
            Context::UnsupportedXIncludeChild => self.unsupported_xinclude_start_element(&name),
227
            Context::XIncludeFallback(ref ctx) => {
228
                self.xinclude_fallback_start_element(ctx, &name, attrs)
229
            }
230

            
231
            Context::FatalError(_) => unreachable!(),
232
        };
233

            
234
1024694
        self.inner.borrow_mut().context_stack.push(new_context);
235

            
236
1024764
        Ok(())
237
1024698
    }
238

            
239
1024825
    pub fn end_element(&self, _name: QualName) {
240
1024825
        let context = self.inner.borrow().context();
241

            
242
1024754
        match context {
243
            Context::Start => panic!("end_element: XML handler stack is empty!?"),
244
1024617
            Context::ElementCreation => self.element_creation_end_element(),
245

            
246
42
            Context::Style => self.style_end_element(),
247
            Context::UnsupportedStyleChild => (),
248

            
249
            Context::XInclude(_) => (),
250
            Context::UnsupportedXIncludeChild => (),
251
            Context::XIncludeFallback(_) => (),
252

            
253
            Context::FatalError(_) => return,
254
        }
255

            
256
        // We can unwrap since start_element() always adds a context to the stack
257
1024652
        self.inner.borrow_mut().context_stack.pop().unwrap();
258
1024696
    }
259

            
260
1036624
    pub fn characters(&self, text: &str) {
261
1036624
        let context = self.inner.borrow().context();
262

            
263
1036557
        match context {
264
            Context::Start => {
265
                // This is character data before the first element, i.e. something like
266
                //  <?xml version="1.0" encoding="UTF-8"?><svg xmlns="http://www.w3.org/2000/svg"/>
267
                // ^ note the space here
268
                // libxml2 is not finished reading the file yet; it will emit an error
269
                // on its own when it finishes.  So, ignore this condition.
270
            }
271

            
272
1036491
            Context::ElementCreation => self.element_creation_characters(text),
273

            
274
61
            Context::Style => self.element_creation_characters(text),
275
            Context::UnsupportedStyleChild => (),
276

            
277
            Context::XInclude(_) => (),
278
            Context::UnsupportedXIncludeChild => (),
279
2
            Context::XIncludeFallback(ref ctx) => self.xinclude_fallback_characters(ctx, text),
280
            Context::FatalError(_) => (),
281
        }
282
1036512
    }
283

            
284
3
    pub fn processing_instruction(&self, target: &str, data: &str) {
285
17
        if target != "xml-stylesheet" {
286
            return;
287
        }
288

            
289
10
        if let Ok(pairs) = parse_xml_stylesheet_processing_instruction(data) {
290
10
            let mut alternate = None;
291
10
            let mut type_ = None;
292
10
            let mut href = None;
293

            
294
10
            for (att, value) in pairs {
295
4
                match att.as_str() {
296
4
                    "alternate" => alternate = Some(value),
297
4
                    "type" => type_ = Some(value),
298
2
                    "href" => href = Some(value),
299
                    _ => (),
300
                }
301
6
            }
302

            
303
2
            let mut inner = self.inner.borrow_mut();
304

            
305
2
            if type_.as_deref() != Some("text/css")
306
2
                || (alternate.is_some() && alternate.as_deref() != Some("no"))
307
            {
308
                rsvg_log!(
309
                    self.session,
310
                    "invalid parameters in XML processing instruction for stylesheet",
311
                );
312
                return;
313
            }
314

            
315
2
            if let Some(href) = href {
316
2
                if let Ok(aurl) = self.load_options.url_resolver.resolve_href(&href) {
317
2
                    if let Ok(stylesheet) =
318
1
                        Stylesheet::from_href(&aurl, Origin::Author, self.session.clone())
319
                    {
320
1
                        inner.document_builder.append_stylesheet(stylesheet);
321
1
                    } else {
322
                        // FIXME: https://www.w3.org/TR/xml-stylesheet/ does not seem to specify
323
                        // what to do if the stylesheet cannot be loaded, so here we ignore the error.
324
                        rsvg_log!(
325
                            self.session,
326
                            "could not create stylesheet from {} in XML processing instruction",
327
                            href
328
                        );
329
                    }
330
1
                } else {
331
                    rsvg_log!(
332
                        self.session,
333
                        "{} not allowed for xml-stylesheet in XML processing instruction",
334
                        href
335
                    );
336
                }
337
2
            } else {
338
                rsvg_log!(
339
                    self.session,
340
                    "xml-stylesheet processing instruction does not have href; ignoring"
341
                );
342
            }
343
2
        } else {
344
            self.error(LoadingError::XmlParseError(String::from(
345
                "invalid processing instruction data in xml-stylesheet",
346
            )));
347
        }
348
9
    }
349

            
350
7
    pub fn error(&self, e: LoadingError) {
351
7
        self.inner
352
            .borrow_mut()
353
            .context_stack
354
7
            .push(Context::FatalError(e));
355
7
    }
356

            
357
62
    pub fn entity_lookup(&self, entity_name: &str) -> Option<xmlEntityPtr> {
358
62
        self.inner
359
            .borrow()
360
            .entities
361
            .get(entity_name)
362
62
            .map(|entity| entity.0)
363
62
    }
364

            
365
7
    pub fn entity_insert(&self, entity_name: &str, entity: xmlEntityPtr) {
366
7
        let mut inner = self.inner.borrow_mut();
367

            
368
7
        inner
369
            .entities
370
14
            .insert(entity_name.to_string(), XmlEntity(entity));
371
7
    }
372

            
373
1024740
    fn element_creation_start_element(&self, name: &QualName, attrs: Attributes) -> Context {
374
2049497
        if name.expanded() == xinclude_name!("include") {
375
50
            self.xinclude_start_element(name, attrs)
376
        } else {
377
1024742
            let mut inner = self.inner.borrow_mut();
378

            
379
1024815
            let parent = inner.current_node.clone();
380
1024815
            let node = inner.document_builder.append_element(name, attrs, parent);
381
1024677
            inner.current_node = Some(node);
382

            
383
1024677
            if name.expanded() == expanded_name!(svg "style") {
384
42
                Context::Style
385
            } else {
386
1024601
                Context::ElementCreation
387
            }
388
1024643
        }
389
1024522
    }
390

            
391
1024659
    fn element_creation_end_element(&self) {
392
1024659
        let mut inner = self.inner.borrow_mut();
393
1024659
        let node = inner.current_node.take().unwrap();
394
1024665
        inner.current_node = node.parent();
395
1024665
    }
396

            
397
1036553
    fn element_creation_characters(&self, text: &str) {
398
1036563
        let mut inner = self.inner.borrow_mut();
399

            
400
1036553
        let mut parent = inner.current_node.clone().unwrap();
401
1036713
        inner.document_builder.append_characters(text, &mut parent);
402
1036503
    }
403

            
404
42
    fn style_end_element(&self) {
405
42
        self.add_inline_stylesheet();
406
42
        self.element_creation_end_element()
407
42
    }
408

            
409
42
    fn add_inline_stylesheet(&self) {
410
42
        let mut inner = self.inner.borrow_mut();
411
42
        let current_node = inner.current_node.as_ref().unwrap();
412

            
413
84
        let style_type = borrow_element_as!(current_node, Style).style_type();
414

            
415
84
        if style_type == StyleType::TextCss {
416
42
            let stylesheet_text = current_node
417
                .children()
418
42
                .map(|child| {
419
                    // Note that here we assume that the only children of <style>
420
                    // are indeed text nodes.
421
42
                    let child_borrow = child.borrow_chars();
422
42
                    child_borrow.get_string()
423
42
                })
424
                .collect::<String>();
425

            
426
84
            if let Ok(stylesheet) = Stylesheet::from_data(
427
42
                &stylesheet_text,
428
42
                &self.load_options.url_resolver,
429
42
                Origin::Author,
430
42
                self.session.clone(),
431
42
            ) {
432
42
                inner.document_builder.append_stylesheet(stylesheet);
433
42
            } else {
434
                rsvg_log!(self.session, "invalid inline stylesheet");
435
            }
436
42
        }
437
42
    }
438

            
439
    fn inside_style_start_element(&self, name: &QualName) -> Context {
440
        self.unsupported_style_start_element(name)
441
    }
442

            
443
    fn unsupported_style_start_element(&self, _name: &QualName) -> Context {
444
        Context::UnsupportedStyleChild
445
    }
446

            
447
252
    fn xinclude_start_element(&self, _name: &QualName, attrs: Attributes) -> Context {
448
252
        let mut href = None;
449
252
        let mut parse = None;
450
252
        let mut encoding = None;
451

            
452
252
        let ln_parse = LocalName::from("parse");
453

            
454
252
        for (attr, value) in attrs.iter() {
455
105
            match attr.expanded() {
456
50
                expanded_name!("", "href") => href = Some(value),
457
51
                ref v
458
102
                    if *v
459
51
                        == ExpandedName {
460
51
                            ns: &ns!(),
461
                            local: &ln_parse,
462
51
                        } =>
463
                {
464
47
                    parse = Some(value)
465
47
                }
466
4
                expanded_name!("", "encoding") => encoding = Some(value),
467
                _ => (),
468
            }
469
101
        }
470

            
471
50
        let need_fallback = match self.acquire(href, parse, encoding) {
472
4
            Ok(()) => false,
473
2
            Err(AcquireError::ResourceError) => true,
474
44
            Err(AcquireError::FatalError(s)) => {
475
44
                return Context::FatalError(LoadingError::XmlParseError(s))
476
            }
477
50
        };
478

            
479
6
        Context::XInclude(XIncludeContext { need_fallback })
480
50
    }
481

            
482
2
    fn inside_xinclude_start_element(&self, ctx: &XIncludeContext, name: &QualName) -> Context {
483
2
        if name.expanded() == xinclude_name!("fallback") {
484
2
            Context::XIncludeFallback(ctx.clone())
485
        } else {
486
            // https://www.w3.org/TR/xinclude/#include_element
487
            //
488
            // "Other content (text, processing instructions,
489
            // comments, elements not in the XInclude namespace,
490
            // descendants of child elements) is not constrained by
491
            // this specification and is ignored by the XInclude
492
            // processor"
493

            
494
            self.unsupported_xinclude_start_element(name)
495
        }
496
2
    }
497

            
498
    fn xinclude_fallback_start_element(
499
        &self,
500
        ctx: &XIncludeContext,
501
        name: &QualName,
502
        attrs: Attributes,
503
    ) -> Context {
504
        if ctx.need_fallback {
505
            if name.expanded() == xinclude_name!("include") {
506
                self.xinclude_start_element(name, attrs)
507
            } else {
508
                self.element_creation_start_element(name, attrs)
509
            }
510
        } else {
511
            Context::UnsupportedXIncludeChild
512
        }
513
    }
514

            
515
2
    fn xinclude_fallback_characters(&self, ctx: &XIncludeContext, text: &str) {
516
2
        if ctx.need_fallback && self.inner.borrow().current_node.is_some() {
517
            // We test for is_some() because with a bad "SVG" file like this:
518
            //
519
            //    <xi:include href="blah"><xi:fallback>foo</xi:fallback></xi:include>
520
            //
521
            // at the point we get "foo" here, there is no current_node because
522
            // no nodes have been created before the xi:include.
523
2
            self.element_creation_characters(text);
524
        }
525
2
    }
526

            
527
50
    fn acquire(
528
        &self,
529
        href: Option<&str>,
530
        parse: Option<&str>,
531
        encoding: Option<&str>,
532
    ) -> Result<(), AcquireError> {
533
50
        if let Some(href) = href {
534
50
            let aurl = self
535
                .load_options
536
                .url_resolver
537
                .resolve_href(href)
538
2
                .map_err(|e| {
539
                    // FIXME: should AlloweUrlError::UrlParseError be a fatal error,
540
                    // not a resource error?
541
2
                    rsvg_log!(self.session, "could not acquire \"{}\": {}", href, e);
542
2
                    AcquireError::ResourceError
543
4
                })?;
544

            
545
            // https://www.w3.org/TR/xinclude/#include_element
546
            //
547
            // "When omitted, the value of "xml" is implied (even in
548
            // the absence of a default value declaration). Values
549
            // other than "xml" and "text" are a fatal error."
550
48
            match parse {
551
48
                None | Some("xml") => self.include_xml(&aurl),
552

            
553
2
                Some("text") => self.acquire_text(&aurl, encoding),
554

            
555
                Some(v) => Err(AcquireError::FatalError(format!(
556
                    "unknown 'parse' attribute value: \"{v}\""
557
                ))),
558
            }
559
48
        } else {
560
            // The href attribute is not present.  Per
561
            // https://www.w3.org/TR/xinclude/#include_element we
562
            // should use the xpointer attribute, but we do not
563
            // support that yet.  So, we'll just say, "OK" and not
564
            // actually include anything.
565
            Ok(())
566
        }
567
50
    }
568

            
569
46
    fn include_xml(&self, aurl: &AllowedUrl) -> Result<(), AcquireError> {
570
46
        self.increase_xinclude_depth(aurl)?;
571

            
572
44
        let result = self.acquire_xml(aurl);
573

            
574
44
        self.decrease_xinclude_depth();
575

            
576
44
        result
577
46
    }
578

            
579
46
    fn increase_xinclude_depth(&self, aurl: &AllowedUrl) -> Result<(), AcquireError> {
580
46
        let mut inner = self.inner.borrow_mut();
581

            
582
92
        if inner.xinclude_depth == MAX_XINCLUDE_DEPTH {
583
2
            Err(AcquireError::FatalError(format!(
584
                "exceeded maximum level of nested xinclude in {aurl}"
585
            )))
586
        } else {
587
44
            inner.xinclude_depth += 1;
588
44
            Ok(())
589
        }
590
46
    }
591

            
592
44
    fn decrease_xinclude_depth(&self) {
593
44
        let mut inner = self.inner.borrow_mut();
594
44
        inner.xinclude_depth -= 1;
595
44
    }
596

            
597
2
    fn acquire_text(&self, aurl: &AllowedUrl, encoding: Option<&str>) -> Result<(), AcquireError> {
598
2
        let binary = io::acquire_data(aurl, None).map_err(|e| {
599
            rsvg_log!(self.session, "could not acquire \"{}\": {}", aurl, e);
600
            AcquireError::ResourceError
601
        })?;
602

            
603
2
        let encoding = encoding.unwrap_or("utf-8");
604

            
605
2
        let encoder = Encoding::for_label_no_replacement(encoding.as_bytes()).ok_or_else(|| {
606
            AcquireError::FatalError(format!("unknown encoding \"{encoding}\" for \"{aurl}\""))
607
        })?;
608

            
609
2
        let utf8_data = encoder
610
2
            .decode_without_bom_handling_and_without_replacement(&binary.data)
611
            .ok_or_else(|| {
612
                AcquireError::FatalError(format!("could not convert contents of \"{aurl}\" from character encoding \"{encoding}\""))
613
            })?;
614

            
615
2
        self.element_creation_characters(&utf8_data);
616
2
        Ok(())
617
2
    }
618

            
619
44
    fn acquire_xml(&self, aurl: &AllowedUrl) -> Result<(), AcquireError> {
620
        // FIXME: distinguish between "file not found" and "invalid XML"
621

            
622
46
        let stream = io::acquire_stream(aurl, None).map_err(|e| match e {
623
2
            IoError::BadDataUrl => AcquireError::FatalError(String::from("malformed data: URL")),
624
            _ => AcquireError::ResourceError,
625
4
        })?;
626

            
627
        // FIXME: pass a cancellable
628
82
        self.parse_from_stream(&stream, None).map_err(|e| match e {
629
            LoadingError::Io(_) => AcquireError::ResourceError,
630
40
            LoadingError::XmlParseError(s) => AcquireError::FatalError(s),
631
            _ => AcquireError::FatalError(String::from("unknown error")),
632
40
        })
633
44
    }
634

            
635
    // Parses XML from a stream into an XmlState.
636
    //
637
    // This can be called "in the middle" of an XmlState's processing status,
638
    // for example, when including another XML file via xi:include.
639
1164
    fn parse_from_stream(
640
        &self,
641
        stream: &gio::InputStream,
642
        cancellable: Option<&gio::Cancellable>,
643
    ) -> Result<(), LoadingError> {
644
1164
        Xml2Parser::from_stream(self, self.load_options.unlimited_size, stream, cancellable)
645
1154
            .and_then(|parser| parser.parse())
646
1164
            .and_then(|_: ()| self.check_last_error())
647
1164
    }
648

            
649
    fn unsupported_xinclude_start_element(&self, _name: &QualName) -> Context {
650
        Context::UnsupportedXIncludeChild
651
    }
652

            
653
1125
    fn build_document(
654
        self,
655
        stream: &gio::InputStream,
656
        cancellable: Option<&gio::Cancellable>,
657
    ) -> Result<Document, LoadingError> {
658
1125
        self.parse_from_stream(stream, cancellable)?;
659

            
660
        // consume self, then consume inner, then consume document_builder by calling .build()
661

            
662
1114
        let XmlState { inner, .. } = self;
663
1114
        let inner = inner.into_inner();
664

            
665
        let XmlStateInner {
666
1114
            document_builder, ..
667
        } = inner;
668
1114
        document_builder.build()
669
1125
    }
670
}
671

            
672
/// Temporary holding space for data in an XML processing instruction
673
#[derive(Default)]
674
struct ProcessingInstructionData {
675
    attributes: Vec<(String, String)>,
676
    error: bool,
677
}
678

            
679
struct ProcessingInstructionSink(Rc<RefCell<ProcessingInstructionData>>);
680

            
681
impl TokenSink for ProcessingInstructionSink {
682
3
    fn process_token(&mut self, token: Token) {
683
3
        let mut data = self.0.borrow_mut();
684

            
685
3
        match token {
686
3
            Token::TagToken(tag) if tag.kind == TagKind::EmptyTag => {
687
9
                for a in &tag.attrs {
688
6
                    let name = a.name.local.as_ref().to_string();
689
6
                    let value = a.value.to_string();
690

            
691
6
                    data.attributes.push((name, value));
692
6
                }
693
3
            }
694

            
695
            Token::ParseError(_) => data.error = true,
696

            
697
            _ => (),
698
        }
699
3
    }
700
}
701

            
702
// https://www.w3.org/TR/xml-stylesheet/
703
//
704
// The syntax for the xml-stylesheet processing instruction we support
705
// is this:
706
//
707
//   <?xml-stylesheet href="uri" alternate="no" type="text/css"?>
708
//
709
// XML parsers just feed us the raw data after the target name
710
// ("xml-stylesheet"), so we'll create a mini-parser with a hackish
711
// element just to extract the data as attributes.
712
3
fn parse_xml_stylesheet_processing_instruction(data: &str) -> Result<Vec<(String, String)>, ()> {
713
6
    let pi_data = Rc::new(RefCell::new(ProcessingInstructionData {
714
3
        attributes: Vec::new(),
715
        error: false,
716
    }));
717

            
718
3
    let mut queue = BufferQueue::default();
719
3
    queue.push_back(format_tendril!("<rsvg-hack {} />", data));
720

            
721
3
    let sink = ProcessingInstructionSink(pi_data.clone());
722

            
723
3
    let mut tokenizer = XmlTokenizer::new(sink, XmlTokenizerOpts::default());
724
3
    tokenizer.run(&mut queue);
725

            
726
3
    let pi_data = pi_data.borrow();
727

            
728
6
    if pi_data.error {
729
        Err(())
730
    } else {
731
3
        Ok(pi_data.attributes.clone())
732
    }
733
3
}
734

            
735
1129
pub fn xml_load_from_possibly_compressed_stream(
736
    session: Session,
737
    document_builder: DocumentBuilder,
738
    load_options: Arc<LoadOptions>,
739
    stream: &gio::InputStream,
740
    cancellable: Option<&gio::Cancellable>,
741
) -> Result<Document, LoadingError> {
742
1129
    let state = XmlState::new(session, document_builder, load_options);
743

            
744
1129
    let stream = get_input_stream_for_loading(stream, cancellable)?;
745

            
746
1125
    state.build_document(&stream, cancellable)
747
1129
}
748

            
749
// Header of a gzip data stream
750
const GZ_MAGIC_0: u8 = 0x1f;
751
const GZ_MAGIC_1: u8 = 0x8b;
752

            
753
1133
fn get_input_stream_for_loading(
754
    stream: &InputStream,
755
    cancellable: Option<&Cancellable>,
756
) -> Result<InputStream, LoadingError> {
757
    // detect gzipped streams (svgz)
758

            
759
1133
    let buffered = BufferedInputStream::new(stream);
760
1133
    let num_read = buffered.fill(2, cancellable)?;
761
1126
    if num_read < 2 {
762
        // FIXME: this string was localized in the original; localize it
763
4
        return Err(LoadingError::XmlParseError(String::from(
764
            "Input file is too short",
765
        )));
766
    }
767

            
768
1126
    let buf = buffered.peek_buffer();
769
1124
    assert!(buf.len() >= 2);
770
2236
    if buf[0..2] == [GZ_MAGIC_0, GZ_MAGIC_1] {
771
5
        let decomp = ZlibDecompressor::new(ZlibCompressorFormat::Gzip);
772
5
        let converter = ConverterInputStream::new(&buffered, &decomp);
773
5
        Ok(converter.upcast::<InputStream>())
774
5
    } else {
775
1120
        Ok(buffered.upcast::<InputStream>())
776
    }
777
1132
}
778

            
779
#[cfg(test)]
780
mod tests {
781
    use super::*;
782

            
783
    #[test]
784
2
    fn parses_processing_instruction_data() {
785
        let mut r =
786
1
            parse_xml_stylesheet_processing_instruction("foo=\"bar\" baz=\"beep\"").unwrap();
787
2
        r.sort_by(|a, b| a.0.cmp(&b.0));
788

            
789
2
        assert_eq!(
790
            r,
791
2
            vec![
792
1
                ("baz".to_string(), "beep".to_string()),
793
1
                ("foo".to_string(), "bar".to_string())
794
            ]
795
        );
796
2
    }
797
}