1
//! Determine which URLs are allowed for loading.
2

            
3
use std::fmt;
4
use std::ops::Deref;
5
use url::Url;
6

            
7
use crate::error::AllowedUrlError;
8

            
9
/// Decides which URLs are allowed to be loaded.
10
///
11
/// Currently only contains the base URL.
12
///
13
/// The plan is to add:
14
/// base_only:    Only allow to load content from the same base URL. By default
15
//                this restriction is enabled and requires to provide base_url.
16
/// include_xml:  Allows to use xi:include with XML. Enabled by default.
17
/// include_text: Allows to use xi:include with text. Enabled by default.
18
/// local_only:   Only allow to load content from the local filesystem.
19
///               Enabled by default.
20
12
#[derive(Clone)]
21
pub struct UrlResolver {
22
    /// Base URL; all relative references will be resolved with respect to this.
23
6
    pub base_url: Option<Url>,
24
}
25

            
26
impl UrlResolver {
27
    /// Creates a `UrlResolver` with defaults, and sets the `base_url`.
28
1246
    pub fn new(base_url: Option<Url>) -> Self {
29
1246
        UrlResolver { base_url }
30
1246
    }
31

            
32
    /// Decides which URLs are allowed to be loaded based on the presence of a base URL.
33
    ///
34
    /// This function implements the policy described in "Security and locations of
35
    /// referenced files" in the [crate
36
    /// documentation](index.html#security-and-locations-of-referenced-files).
37
397
    pub fn resolve_href(&self, href: &str) -> Result<AllowedUrl, AllowedUrlError> {
38
794
        let url = Url::options()
39
397
            .base_url(self.base_url.as_ref())
40
            .parse(href)
41
108
            .map_err(AllowedUrlError::UrlParseError)?;
42

            
43
        // Allow loads of data: from any location
44
289
        if url.scheme() == "data" {
45
37
            return Ok(AllowedUrl(url));
46
        }
47

            
48
        // Queries are not allowed.
49
252
        if url.query().is_some() {
50
2
            return Err(AllowedUrlError::NoQueriesAllowed);
51
        }
52

            
53
        // Fragment identifiers are not allowed.  They should have been stripped
54
        // upstream, by NodeId.
55
250
        if url.fragment().is_some() {
56
12
            return Err(AllowedUrlError::NoFragmentIdentifierAllowed);
57
        }
58

            
59
        // All other sources require a base url
60
238
        if self.base_url.is_none() {
61
1
            return Err(AllowedUrlError::BaseRequired);
62
        }
63

            
64
237
        let base_url = self.base_url.as_ref().unwrap();
65

            
66
        // Deny loads from differing URI schemes
67
237
        if url.scheme() != base_url.scheme() {
68
11
            return Err(AllowedUrlError::DifferentUriSchemes);
69
        }
70

            
71
        // resource: is allowed to load anything from other resources
72
226
        if url.scheme() == "resource" {
73
            return Ok(AllowedUrl(url));
74
        }
75

            
76
        // Non-file: isn't allowed to load anything
77
226
        if url.scheme() != "file" {
78
1
            return Err(AllowedUrlError::DisallowedScheme);
79
        }
80

            
81
        // The rest of this function assumes file: URLs; guard against
82
        // incorrect refactoring.
83
225
        assert!(url.scheme() == "file");
84

            
85
        // If we have a base_uri of "file:///foo/bar.svg", and resolve an href of ".",
86
        // Url.parse() will give us "file:///foo/".  We don't want that, so check
87
        // if the last path segment is empty - it will not be empty for a normal file.
88

            
89
225
        if let Some(segments) = url.path_segments() {
90
225
            if segments
91
                .last()
92
                .expect("URL path segments always contain at last 1 element")
93
                .is_empty()
94
            {
95
2
                return Err(AllowedUrlError::NotSiblingOrChildOfBaseFile);
96
            }
97
        } else {
98
            unreachable!("the file: URL cannot have an empty path");
99
        }
100

            
101
        // We have two file: URIs.  Now canonicalize them (remove .. and symlinks, etc.)
102
        // and see if the directories match
103

            
104
223
        let url_path = url
105
            .to_file_path()
106
            .map_err(|_| AllowedUrlError::InvalidPath)?;
107
223
        let base_path = base_url
108
            .to_file_path()
109
            .map_err(|_| AllowedUrlError::InvalidPath)?;
110

            
111
223
        let base_parent = base_path.parent();
112
223
        if base_parent.is_none() {
113
1
            return Err(AllowedUrlError::BaseIsRoot);
114
        }
115

            
116
222
        let base_parent = base_parent.unwrap();
117

            
118
222
        let path_canon = url_path
119
            .canonicalize()
120
50
            .map_err(|_| AllowedUrlError::CanonicalizationError)?;
121
197
        let parent_canon = base_parent
122
            .canonicalize()
123
10
            .map_err(|_| AllowedUrlError::CanonicalizationError)?;
124

            
125
383
        if path_canon.starts_with(parent_canon) {
126
            // Finally, convert the canonicalized path back to a URL.
127
191
            let path_to_url = Url::from_file_path(path_canon).unwrap();
128
191
            Ok(AllowedUrl(path_to_url))
129
        } else {
130
1
            Err(AllowedUrlError::NotSiblingOrChildOfBaseFile)
131
        }
132
397
    }
133
}
134

            
135
/// Wrapper for URLs which are allowed to be loaded
136
///
137
/// SVG files can reference other files (PNG/JPEG images, other SVGs,
138
/// CSS files, etc.).  This object is constructed by checking whether
139
/// a specified `href` (a possibly-relative filename, for example)
140
/// should be allowed to be loaded, given the base URL of the SVG
141
/// being loaded.
142
960
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
143
480
pub struct AllowedUrl(Url);
144

            
145
impl Deref for AllowedUrl {
146
    type Target = Url;
147

            
148
101
    fn deref(&self) -> &Url {
149
        &self.0
150
101
    }
151
}
152

            
153
impl fmt::Display for AllowedUrl {
154
2
    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
155
2
        self.0.fmt(f)
156
2
    }
157
}
158

            
159
#[cfg(test)]
160
mod tests {
161
    use super::*;
162

            
163
    use std::path::PathBuf;
164

            
165
    #[test]
166
2
    fn disallows_relative_file_with_no_base_file() {
167
1
        let url_resolver = UrlResolver::new(None);
168
1
        assert!(matches!(
169
1
            url_resolver.resolve_href("foo.svg"),
170
            Err(AllowedUrlError::UrlParseError(
171
                url::ParseError::RelativeUrlWithoutBase
172
            ))
173
        ));
174
2
    }
175

            
176
    #[test]
177
2
    fn disallows_different_schemes() {
178
1
        let url_resolver = UrlResolver::new(Some(
179
1
            Url::parse("http://example.com/malicious.svg").unwrap(),
180
        ));
181
1
        assert!(matches!(
182
1
            url_resolver.resolve_href("file:///etc/passwd"),
183
            Err(AllowedUrlError::DifferentUriSchemes)
184
        ));
185
2
    }
186

            
187
5
    fn make_file_uri(p: &str) -> String {
188
        if cfg!(windows) {
189
            format!("file:///c:{}", p)
190
        } else {
191
5
            format!("file://{}", p)
192
        }
193
5
    }
194

            
195
    #[test]
196
2
    fn disallows_base_is_root() {
197
1
        let url_resolver = UrlResolver::new(Some(Url::parse(&make_file_uri("/")).unwrap()));
198
1
        assert!(matches!(
199
1
            url_resolver.resolve_href("foo.svg"),
200
            Err(AllowedUrlError::BaseIsRoot)
201
        ));
202
2
    }
203

            
204
    #[test]
205
2
    fn disallows_non_file_scheme() {
206
1
        let url_resolver = UrlResolver::new(Some(Url::parse("http://foo.bar/baz.svg").unwrap()));
207
1
        assert!(matches!(
208
1
            url_resolver.resolve_href("foo.svg"),
209
            Err(AllowedUrlError::DisallowedScheme)
210
        ));
211
2
    }
212

            
213
    #[test]
214
2
    fn allows_data_url_with_no_base_file() {
215
1
        let url_resolver = UrlResolver::new(None);
216
2
        assert_eq!(
217
1
            url_resolver
218
                .resolve_href("")
219
                .unwrap()
220
                .as_ref(),
221
            "",
222
        );
223
2
    }
224

            
225
6
    fn url_from_test_fixtures(filename_relative_to_librsvg_srcdir: &str) -> Url {
226
6
        let path = PathBuf::from(filename_relative_to_librsvg_srcdir);
227
6
        let absolute = path
228
            .canonicalize()
229
            .expect("files from test fixtures are supposed to canonicalize");
230
6
        Url::from_file_path(absolute).unwrap()
231
6
    }
232

            
233
    #[test]
234
2
    fn allows_relative() {
235
1
        let base_url = url_from_test_fixtures("tests/fixtures/loading/bar.svg");
236
1
        let url_resolver = UrlResolver::new(Some(base_url));
237

            
238
1
        let resolved = url_resolver.resolve_href("foo.svg").unwrap();
239
1
        let resolved_str = resolved.as_str();
240
1
        assert!(resolved_str.ends_with("/loading/foo.svg"));
241
2
    }
242

            
243
    #[test]
244
2
    fn allows_sibling() {
245
1
        let url_resolver = UrlResolver::new(Some(url_from_test_fixtures(
246
            "tests/fixtures/loading/bar.svg",
247
        )));
248
1
        let resolved = url_resolver
249
1
            .resolve_href(url_from_test_fixtures("tests/fixtures/loading/foo.svg").as_str())
250
1
            .unwrap();
251

            
252
1
        let resolved_str = resolved.as_str();
253
1
        assert!(resolved_str.ends_with("/loading/foo.svg"));
254
2
    }
255

            
256
    #[test]
257
2
    fn allows_child_of_sibling() {
258
1
        let url_resolver = UrlResolver::new(Some(url_from_test_fixtures(
259
            "tests/fixtures/loading/bar.svg",
260
        )));
261
1
        let resolved = url_resolver
262
1
            .resolve_href(url_from_test_fixtures("tests/fixtures/loading/subdir/baz.svg").as_str())
263
1
            .unwrap();
264

            
265
1
        let resolved_str = resolved.as_str();
266
1
        assert!(resolved_str.ends_with("/loading/subdir/baz.svg"));
267
2
    }
268

            
269
    // Ignore on Windows since we test for /etc/passwd
270
    #[cfg(unix)]
271
    #[test]
272
2
    fn disallows_non_sibling() {
273
1
        let url_resolver = UrlResolver::new(Some(url_from_test_fixtures(
274
            "tests/fixtures/loading/bar.svg",
275
        )));
276
1
        assert!(matches!(
277
1
            url_resolver.resolve_href(&make_file_uri("/etc/passwd")),
278
            Err(AllowedUrlError::NotSiblingOrChildOfBaseFile)
279
        ));
280
2
    }
281

            
282
    #[test]
283
2
    fn disallows_queries() {
284
1
        let url_resolver = UrlResolver::new(Some(
285
1
            Url::parse(&make_file_uri("/example/bar.svg")).unwrap(),
286
1
        ));
287
1
        assert!(matches!(
288
1
            url_resolver.resolve_href(".?../../../../../../../../../../etc/passwd"),
289
            Err(AllowedUrlError::NoQueriesAllowed)
290
        ));
291
2
    }
292

            
293
    #[test]
294
2
    fn disallows_weird_relative_uris() {
295
1
        let url_resolver = UrlResolver::new(Some(
296
1
            Url::parse(&make_file_uri("/example/bar.svg")).unwrap(),
297
1
        ));
298

            
299
1
        assert!(url_resolver
300
            .resolve_href(".@../../../../../../../../../../etc/passwd")
301
            .is_err());
302
1
        assert!(url_resolver
303
            .resolve_href(".$../../../../../../../../../../etc/passwd")
304
            .is_err());
305
1
        assert!(url_resolver
306
            .resolve_href(".%../../../../../../../../../../etc/passwd")
307
            .is_err());
308
1
        assert!(url_resolver
309
            .resolve_href(".*../../../../../../../../../../etc/passwd")
310
            .is_err());
311
1
        assert!(url_resolver
312
            .resolve_href("~/../../../../../../../../../../etc/passwd")
313
            .is_err());
314
2
    }
315

            
316
    #[test]
317
2
    fn disallows_dot_sibling() {
318
1
        let url_resolver = UrlResolver::new(Some(
319
1
            Url::parse(&make_file_uri("/example/bar.svg")).unwrap(),
320
1
        ));
321

            
322
1
        assert!(matches!(
323
1
            url_resolver.resolve_href("."),
324
            Err(AllowedUrlError::NotSiblingOrChildOfBaseFile)
325
        ));
326
1
        assert!(matches!(
327
1
            url_resolver.resolve_href(".#../../../../../../../../../../etc/passwd"),
328
            Err(AllowedUrlError::NoFragmentIdentifierAllowed)
329
        ));
330
2
    }
331

            
332
    #[test]
333
2
    fn disallows_fragment() {
334
        // UrlResolver::resolve_href() explicitly disallows fragment identifiers.
335
        // This is because they should have been stripped before calling that function,
336
        // by NodeId or the Iri machinery.
337
        let url_resolver =
338
1
            UrlResolver::new(Some(Url::parse("https://example.com/foo.svg").unwrap()));
339

            
340
1
        assert!(matches!(
341
1
            url_resolver.resolve_href("bar.svg#fragment"),
342
            Err(AllowedUrlError::NoFragmentIdentifierAllowed)
343
        ));
344
2
    }
345

            
346
    #[cfg(windows)]
347
    #[test]
348
    fn invalid_url_from_test_suite() {
349
        // This is required for Url to panic.
350
        let resolver =
351
            UrlResolver::new(Some(Url::parse("file:///c:/foo.svg").expect("initial url")));
352
        // With this, it doesn't panic:
353
        //   let resolver = UrlResolver::new(None);
354

            
355
        // The following panics, when using a base URL
356
        //   match resolver.resolve_href("file://invalid.css") {
357
        // so, use a less problematic case, hopefully
358
        match resolver.resolve_href("file://") {
359
            Ok(_) => println!("yay!"),
360
            Err(e) => println!("err: {}", e),
361
        }
362
    }
363
}