1
use chrono::{DateTime, Utc};
2
use float_cmp::approx_eq;
3
use lopdf::{self, Dictionary, Object};
4
use predicates::prelude::*;
5
use predicates::reflection::{Case, Child, PredicateReflection, Product};
6
use std::cmp;
7
use std::fmt;
8

            
9
/// Checks that the variable of type [u8] can be parsed as a PDF file.
10
#[derive(Debug)]
11
pub struct PdfPredicate {}
12

            
13
impl PdfPredicate {
14
3
    pub fn with_page_count(self, num_pages: usize) -> DetailPredicate<Self> {
15
3
        DetailPredicate::<Self> {
16
            p: self,
17
3
            d: Detail::PageCount(num_pages),
18
        }
19
3
    }
20

            
21
13
    pub fn with_page_size(
22
13
        self,
23
        idx: usize,
24
        width_in_points: f32,
25
        height_in_points: f32,
26
    ) -> DetailPredicate<Self> {
27
13
        DetailPredicate::<Self> {
28
            p: self,
29
13
            d: Detail::PageSize(
30
13
                Dimensions {
31
                    w: width_in_points,
32
                    h: height_in_points,
33
                    unit: 1.0,
34
                },
35
                idx,
36
            ),
37
        }
38
13
    }
39

            
40
1
    pub fn with_creation_date(self, when: DateTime<Utc>) -> DetailPredicate<Self> {
41
1
        DetailPredicate::<Self> {
42
            p: self,
43
1
            d: Detail::CreationDate(when),
44
        }
45
1
    }
46

            
47
3
    pub fn with_link(self, link: &str) -> DetailPredicate<Self> {
48
3
        DetailPredicate::<Self> {
49
            p: self,
50
3
            d: Detail::Link(link.to_string()),
51
        }
52
3
    }
53

            
54
2
    pub fn with_text(self, text: &str) -> DetailPredicate<Self> {
55
2
        DetailPredicate::<Self> {
56
            p: self,
57
2
            d: Detail::Text(text.to_string()),
58
        }
59
2
    }
60

            
61
4
    pub fn with_version(self, version: &str) -> DetailPredicate<Self> {
62
4
        DetailPredicate::<Self> {
63
            p: self,
64
4
            d: Detail::Version(version.to_string()),
65
        }
66
4
    }
67
}
68

            
69
impl Predicate<[u8]> for PdfPredicate {
70
    fn eval(&self, data: &[u8]) -> bool {
71
        lopdf::Document::load_mem(data).is_ok()
72
    }
73

            
74
1
    fn find_case<'a>(&'a self, _expected: bool, data: &[u8]) -> Option<Case<'a>> {
75
1
        match lopdf::Document::load_mem(data) {
76
1
            Ok(_) => None,
77
            Err(e) => Some(Case::new(Some(self), false).add_product(Product::new("Error", e))),
78
        }
79
1
    }
80
}
81

            
82
impl PredicateReflection for PdfPredicate {}
83

            
84
impl fmt::Display for PdfPredicate {
85
    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
86
        write!(f, "is a PDF")
87
    }
88
}
89

            
90
/// Extends a PdfPredicate by a check for page count, page size or creation date.
91
#[derive(Debug)]
92
pub struct DetailPredicate<PdfPredicate> {
93
    p: PdfPredicate,
94
    d: Detail,
95
}
96

            
97
#[derive(Debug)]
98
enum Detail {
99
    PageCount(usize),
100
    PageSize(Dimensions, usize),
101
    CreationDate(DateTime<Utc>),
102
    Link(String),
103
    Text(String),
104
    Version(String),
105
}
106

            
107
/// A PDF page's dimensions from its `MediaBox`.
108
///
109
/// Note that `w` and `h` given in `UserUnit`, which is by default 1.0 = 1/72 inch.
110
#[derive(Debug)]
111
struct Dimensions {
112
    w: f32,
113
    h: f32,
114
    unit: f32, // UserUnit, in points (1/72 of an inch)
115
}
116

            
117
impl Dimensions {
118
13
    pub fn from_media_box(obj: &lopdf::Object, unit: Option<f32>) -> lopdf::Result<Dimensions> {
119
13
        let a = obj.as_array()?;
120
13
        Ok(Dimensions {
121
13
            w: a[2].as_float()?,
122
13
            h: a[3].as_float()?,
123
13
            unit: unit.unwrap_or(1.0),
124
        })
125
13
    }
126

            
127
26
    pub fn width_in_pt(&self) -> f32 {
128
26
        self.w * self.unit
129
26
    }
130

            
131
26
    pub fn height_in_pt(&self) -> f32 {
132
26
        self.h * self.unit
133
26
    }
134
}
135

            
136
impl fmt::Display for Dimensions {
137
    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
138
        write!(f, "{} pt x {} pt", self.width_in_pt(), self.height_in_pt())
139
    }
140
}
141

            
142
impl cmp::PartialEq for Dimensions {
143
13
    fn eq(&self, other: &Self) -> bool {
144
13
        approx_eq!(
145
            f32,
146
            self.width_in_pt(),
147
            other.width_in_pt(),
148
            epsilon = 0.0001
149
13
        ) && approx_eq!(
150
            f32,
151
            self.height_in_pt(),
152
            other.height_in_pt(),
153
            epsilon = 0.0001
154
        )
155
13
    }
156
}
157

            
158
impl cmp::Eq for Dimensions {}
159

            
160
trait Details {
161
    fn get_page_count(&self) -> usize;
162
    fn get_page_size(&self, idx: usize) -> Option<Dimensions>;
163
    fn get_creation_date(&self) -> Option<DateTime<Utc>>;
164
    fn get_from_trailer<'a>(&'a self, key: &[u8]) -> lopdf::Result<&'a lopdf::Object>;
165
    fn get_from_page<'a>(&'a self, idx: usize, key: &[u8]) -> lopdf::Result<&'a lopdf::Object>;
166
}
167

            
168
impl DetailPredicate<PdfPredicate> {
169
26
    fn eval_doc(&self, doc: &lopdf::Document) -> bool {
170
26
        match &self.d {
171
3
            Detail::PageCount(n) => doc.get_page_count() == *n,
172
26
            Detail::PageSize(d, idx) => doc.get_page_size(*idx).map_or(false, |dim| dim == *d),
173
2
            Detail::CreationDate(d) => doc.get_creation_date().map_or(false, |date| date == *d),
174
3
            Detail::Link(link) => document_has_link(doc, link),
175
2
            Detail::Text(text) => document_has_text(doc, text),
176
4
            Detail::Version(version) => document_has_version(doc, version),
177
        }
178
26
    }
179

            
180
26
    fn find_case_for_doc<'a>(&'a self, expected: bool, doc: &lopdf::Document) -> Option<Case<'a>> {
181
26
        if self.eval_doc(doc) == expected {
182
            let product = self.product_for_doc(doc);
183
            Some(Case::new(Some(self), false).add_product(product))
184
        } else {
185
26
            None
186
        }
187
26
    }
188

            
189
    fn product_for_doc(&self, doc: &lopdf::Document) -> Product {
190
        match &self.d {
191
            Detail::PageCount(_) => Product::new(
192
                "actual page count",
193
                format!("{} page(s)", doc.get_page_count()),
194
            ),
195
            Detail::PageSize(_, idx) => Product::new(
196
                "actual page size",
197
                match doc.get_page_size(*idx) {
198
                    Some(dim) => format!("{}", dim),
199
                    None => "None".to_string(),
200
                },
201
            ),
202
            Detail::CreationDate(_) => Product::new(
203
                "actual creation date",
204
                format!("{:?}", doc.get_creation_date()),
205
            ),
206
            Detail::Link(_) => Product::new(
207
                "actual link contents",
208
                "FIXME: who knows, but it's not what we expected".to_string(),
209
            ),
210
            Detail::Text(_) => {
211
                Product::new("actual text contents", doc.extract_text(&[1]).unwrap())
212
            }
213
            Detail::Version(_) => Product::new("actual version contents", doc.version.to_string()),
214
        }
215
    }
216
}
217

            
218
// Extensions to lopdf::Object; can be removed after lopdf 0.26
219
trait ObjExt {
220
    /// Get the object value as a float.
221
    /// Unlike as_f32() this will also cast an Integer to a Real.
222
    fn as_float(&self) -> lopdf::Result<f32>;
223
}
224

            
225
impl ObjExt for lopdf::Object {
226
    fn as_float(&self) -> lopdf::Result<f32> {
227
        match *self {
228
            lopdf::Object::Integer(ref value) => Ok(*value as f32),
229
            lopdf::Object::Real(ref value) => Ok(*value),
230
            _ => Err(lopdf::Error::Type),
231
        }
232
    }
233
}
234

            
235
impl Details for lopdf::Document {
236
3
    fn get_page_count(&self) -> usize {
237
3
        self.get_pages().len()
238
3
    }
239

            
240
13
    fn get_page_size(&self, idx: usize) -> Option<Dimensions> {
241
13
        match self.get_from_page(idx, b"MediaBox") {
242
13
            Ok(obj) => {
243
13
                let unit = self
244
                    .get_from_page(idx, b"UserUnit")
245
                    .and_then(ObjExt::as_float)
246
                    .ok();
247
13
                Dimensions::from_media_box(obj, unit).ok()
248
            }
249
            Err(_) => None,
250
        }
251
13
    }
252

            
253
1
    fn get_creation_date(&self) -> Option<DateTime<Utc>> {
254
1
        match self.get_from_trailer(b"CreationDate") {
255
2
            Ok(obj) => obj.as_datetime().map(|date| date.with_timezone(&Utc)),
256
            Err(_) => None,
257
        }
258
1
    }
259

            
260
1
    fn get_from_trailer<'a>(&'a self, key: &[u8]) -> lopdf::Result<&'a lopdf::Object> {
261
1
        let id = self.trailer.get(b"Info")?.as_reference()?;
262
1
        self.get_object(id)?.as_dict()?.get(key)
263
1
    }
264

            
265
26
    fn get_from_page<'a>(&'a self, idx: usize, key: &[u8]) -> lopdf::Result<&'a lopdf::Object> {
266
26
        let mut iter = self.page_iter();
267
44
        for _ in 0..idx {
268
18
            let _ = iter.next();
269
        }
270
26
        match iter.next() {
271
26
            Some(id) => self.get_object(id)?.as_dict()?.get(key),
272
            None => Err(lopdf::Error::ObjectNotFound),
273
        }
274
26
    }
275
}
276

            
277
impl Predicate<[u8]> for DetailPredicate<PdfPredicate> {
278
    fn eval(&self, data: &[u8]) -> bool {
279
        match lopdf::Document::load_mem(data) {
280
            Ok(doc) => self.eval_doc(&doc),
281
            _ => false,
282
        }
283
    }
284

            
285
26
    fn find_case<'a>(&'a self, expected: bool, data: &[u8]) -> Option<Case<'a>> {
286
26
        match lopdf::Document::load_mem(data) {
287
26
            Ok(doc) => self.find_case_for_doc(expected, &doc),
288
            Err(e) => Some(Case::new(Some(self), false).add_product(Product::new("Error", e))),
289
        }
290
26
    }
291
}
292

            
293
impl PredicateReflection for DetailPredicate<PdfPredicate> {
294
    fn children<'a>(&'a self) -> Box<dyn Iterator<Item = Child<'a>> + 'a> {
295
        let params = vec![Child::new("predicate", &self.p)];
296
        Box::new(params.into_iter())
297
    }
298
}
299

            
300
impl fmt::Display for DetailPredicate<PdfPredicate> {
301
    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
302
        match &self.d {
303
            Detail::PageCount(n) => write!(f, "is a PDF with {} page(s)", n),
304
            Detail::PageSize(d, _) => write!(f, "is a PDF sized {}", d),
305
            Detail::CreationDate(d) => write!(f, "is a PDF created {:?}", d),
306
            Detail::Link(l) => write!(f, "is a PDF with a link to {}", l),
307
            Detail::Text(t) => write!(f, "is a PDF with \"{}\" in its text content", t),
308
            Detail::Version(v) => write!(f, "is a PDF with version {}", v),
309
        }
310
    }
311
}
312

            
313
// This is an extremely trivial test for a string being present in the document's
314
// text objects.
315
2
fn document_has_text(document: &lopdf::Document, needle: &str) -> bool {
316
2
    if let Ok(haystack) = text_from_first_page(document) {
317
2
        haystack.contains(needle)
318
2
    } else {
319
        false
320
    }
321
2
}
322

            
323
4
fn document_has_version(document: &lopdf::Document, version_to_search: &str) -> bool {
324
4
    document.version == version_to_search
325
4
}
326

            
327
// We do a super simple test that a PDF actually contains an Annotation object
328
// with a particular link.  We don't test that this annotation is actually linked
329
// from a page; that would be nicer.
330
3
fn document_has_link(document: &lopdf::Document, link_text: &str) -> bool {
331
3
    document
332
        .objects
333
        .values()
334
14
        .any(|obj| object_is_annotation_with_link(obj, link_text))
335
3
}
336

            
337
14
fn object_is_annotation_with_link(object: &Object, link_text: &str) -> bool {
338
14
    object
339
        .as_dict()
340
14
        .map(|dict| dict_is_annotation(dict) && dict_has_a_with_link(dict, link_text))
341
        .unwrap_or(false)
342
14
}
343

            
344
14
fn dict_is_annotation(dict: &Dictionary) -> bool {
345
14
    dict.get(b"Type")
346
14
        .and_then(|type_val| type_val.as_name_str())
347
14
        .map(|name| name == "Annot")
348
        .unwrap_or(false)
349
14
}
350

            
351
4
fn dict_has_a_with_link(dict: &Dictionary, link_text: &str) -> bool {
352
4
    dict.get(b"A")
353
4
        .and_then(|obj| obj.as_dict())
354
4
        .and_then(|dict| dict.get(b"URI"))
355
4
        .and_then(|obj| obj.as_str())
356
4
        .map(|string| string == link_text.as_bytes())
357
        .unwrap_or(false)
358
4
}
359

            
360
2
fn text_from_first_page(doc: &lopdf::Document) -> lopdf::Result<String> {
361
    // This is extremely simplistic; lopdf just concatenates all the text in the page
362
    // into a single string.
363
2
    doc.extract_text(&[1])
364
2
}