-
Notifications
You must be signed in to change notification settings - Fork 0
/
QPDFObjectHandle.hh
1641 lines (1470 loc) · 68.6 KB
/
QPDFObjectHandle.hh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
// Copyright (c) 2005-2024 Jay Berkenbilt
//
// This file is part of qpdf.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// Versions of qpdf prior to version 7 were released under the terms
// of version 2.0 of the Artistic License. At your option, you may
// continue to consider qpdf to be licensed under those terms. Please
// see the manual for additional information.
#ifndef QPDFOBJECTHANDLE_HH
#define QPDFOBJECTHANDLE_HH
#include <qpdf/Constants.h>
#include <qpdf/DLL.h>
#include <qpdf/Types.h>
#include <functional>
#include <map>
#include <memory>
#include <set>
#include <string>
#include <vector>
#include <qpdf/Buffer.hh>
#include <qpdf/InputSource.hh>
#include <qpdf/JSON.hh>
#include <qpdf/PointerHolder.hh> // unused -- remove in qpdf 12 (see #785)
#include <qpdf/QPDFObjGen.hh>
#include <qpdf/QPDFTokenizer.hh>
class Pipeline;
class QPDF;
class QPDF_Array;
class QPDF_Bool;
class QPDF_Dictionary;
class QPDF_InlineImage;
class QPDF_Integer;
class QPDF_Name;
class QPDF_Null;
class QPDF_Operator;
class QPDF_Real;
class QPDF_Reserved;
class QPDF_Stream;
class QPDF_String;
class QPDFObject;
class QPDFTokenizer;
class QPDFExc;
class Pl_QPDFTokenizer;
class QPDFMatrix;
class QPDFParser;
class QPDFObjectHandle
{
friend class QPDFParser;
public:
// This class is used by replaceStreamData. It provides an alternative way of associating
// stream data with a stream. See comments on replaceStreamData and newStream for additional
// details.
class QPDF_DLL_CLASS StreamDataProvider
{
public:
QPDF_DLL
StreamDataProvider(bool supports_retry = false);
QPDF_DLL
virtual ~StreamDataProvider();
// The implementation of this function must write stream data to the given pipeline. The
// stream data must conform to whatever filters are explicitly associated with the stream.
// QPDFWriter may, in some cases, add compression, but if it does, it will update the
// filters as needed. Every call to provideStreamData for a given stream must write the same
// data. Note that, when writing linearized files, qpdf will call your provideStreamData
// twice, and if it generates different output, you risk generating invalid output or having
// qpdf throw an exception. The object ID and generation passed to this method are those
// that belong to the stream on behalf of which the provider is called. They may be ignored
// or used by the implementation for indexing or other purposes. This information is made
// available just to make it more convenient to use a single StreamDataProvider object to
// provide data for multiple streams.
// A few things to keep in mind:
//
// * Stream data providers must not modify any objects since they may be called after some
// parts of the file have already been written.
//
// * Since qpdf may call provideStreamData multiple times when writing linearized files, if
// the work done by your stream data provider is slow or computationally intensive, you
// might want to implement your own cache.
//
// * Once you have called replaceStreamData, the original stream data is no longer directly
// accessible from the stream, but this is easy to work around by copying the stream to
// a separate QPDF object. The qpdf library implements this very efficiently without
// actually making a copy of the stream data. You can find examples of this pattern in
// some of the examples, including pdf-custom-filter.cc and pdf-invert-images.cc.
// Prior to qpdf 10.0.0, it was not possible to handle errors the way pipeStreamData does or
// to pass back success. Starting in qpdf 10.0.0, those capabilities have been added by
// allowing an alternative provideStreamData to be implemented. You must implement at least
// one of the versions of provideStreamData below. If you implement the version that
// supports retry and returns a value, you should pass true as the value of supports_retry
// in the base class constructor. This will cause the library to call that version of the
// method, which should also return a boolean indicating whether it ran without errors.
QPDF_DLL
virtual void provideStreamData(QPDFObjGen const& og, Pipeline* pipeline);
QPDF_DLL
virtual bool provideStreamData(
QPDFObjGen const& og, Pipeline* pipeline, bool suppress_warnings, bool will_retry);
QPDF_DLL virtual void provideStreamData(int objid, int generation, Pipeline* pipeline);
QPDF_DLL virtual bool provideStreamData(
int objid, int generation, Pipeline* pipeline, bool suppress_warnings, bool will_retry);
QPDF_DLL
bool supportsRetry();
private:
bool supports_retry;
};
// The TokenFilter class provides a way to filter content streams in a lexically aware fashion.
// TokenFilters can be attached to streams using the addTokenFilter or addContentTokenFilter
// methods or can be applied on the spot by filterPageContents. You may also use
// Pl_QPDFTokenizer directly if you need full control.
//
// The handleToken method is called for each token, including the eof token, and then handleEOF
// is called at the very end. Handlers may call write (or writeToken) to pass data downstream.
// Please see examples/pdf-filter-tokens.cc and examples/pdf-count-strings.cc for examples of
// using TokenFilters.
//
// Please note that when you call token.getValue() on a token of type tt_string or tt_name, you
// get the canonical, "parsed" representation of the token. For a string, this means that there
// are no delimiters, and for a name, it means that all escaping (# followed by two hex digits)
// has been resolved. qpdf's internal representation of a name includes the leading slash. As
// such, you can't write the value of token.getValue() directly to output that is supposed to be
// valid PDF syntax. If you want to do that, you need to call writeToken() instead, or you can
// retrieve the token as it appeared in the input with token.getRawValue(). To construct a new
// string or name token from a canonical representation, use
// QPDFTokenizer::Token(QPDFTokenizer::tt_string, "parsed-str") or
// QPDFTokenizer::Token(QPDFTokenizer::tt_name,
// "/Canonical-Name"). Tokens created this way won't have a PDF-syntax raw value, but you can
// still write them with writeToken(). Example:
// writeToken(QPDFTokenizer::Token(QPDFTokenizer::tt_name, "/text/plain"))
// would write `/text#2fplain`, and
// writeToken(QPDFTokenizer::Token(QPDFTokenizer::tt_string, "a\\(b")) would write `(a\(b)`.
class QPDF_DLL_CLASS TokenFilter
{
public:
QPDF_DLL
TokenFilter() = default;
QPDF_DLL
virtual ~TokenFilter() = default;
virtual void handleToken(QPDFTokenizer::Token const&) = 0;
QPDF_DLL
virtual void handleEOF();
class PipelineAccessor
{
friend class Pl_QPDFTokenizer;
private:
static void
setPipeline(TokenFilter* f, Pipeline* p)
{
f->setPipeline(p);
}
};
protected:
QPDF_DLL
void write(char const* data, size_t len);
QPDF_DLL
void write(std::string const& str);
QPDF_DLL
void writeToken(QPDFTokenizer::Token const&);
private:
QPDF_DLL_PRIVATE
void setPipeline(Pipeline*);
Pipeline* pipeline;
};
// This class is used by parse to decrypt strings when reading an object that contains encrypted
// strings.
class StringDecrypter
{
public:
QPDF_DLL
virtual ~StringDecrypter() = default;
virtual void decryptString(std::string& val) = 0;
};
// This class is used by parsePageContents. Callers must instantiate a subclass of this with
// handlers defined to accept QPDFObjectHandles that are parsed from the stream.
class QPDF_DLL_CLASS ParserCallbacks
{
public:
QPDF_DLL
virtual ~ParserCallbacks() = default;
// One of the handleObject methods must be overridden.
QPDF_DLL
virtual void handleObject(QPDFObjectHandle);
QPDF_DLL
virtual void handleObject(QPDFObjectHandle, size_t offset, size_t length);
virtual void handleEOF() = 0;
// Override this if you want to know the full size of the contents, possibly after
// concatenation of multiple streams. This is called before the first call to handleObject.
QPDF_DLL
virtual void contentSize(size_t);
protected:
// Implementors may call this method during parsing to terminate parsing early. This method
// throws an exception that is caught by parsePageContents, so its effect is immediate.
QPDF_DLL
void terminateParsing();
};
// Convenience object for rectangles
class Rectangle
{
public:
Rectangle() :
llx(0.0),
lly(0.0),
urx(0.0),
ury(0.0)
{
}
Rectangle(double llx, double lly, double urx, double ury) :
llx(llx),
lly(lly),
urx(urx),
ury(ury)
{
}
double llx;
double lly;
double urx;
double ury;
};
// Convenience object for transformation matrices. See also QPDFMatrix. Unfortunately we can't
// replace this with QPDFMatrix because QPDFMatrix's default constructor creates the identity
// transform matrix and this one is all zeroes.
class Matrix
{
public:
Matrix() :
a(0.0),
b(0.0),
c(0.0),
d(0.0),
e(0.0),
f(0.0)
{
}
Matrix(double a, double b, double c, double d, double e, double f) :
a(a),
b(b),
c(c),
d(d),
e(e),
f(f)
{
}
double a;
double b;
double c;
double d;
double e;
double f;
};
QPDF_DLL
QPDFObjectHandle() = default;
QPDF_DLL
QPDFObjectHandle(QPDFObjectHandle const&) = default;
QPDF_DLL
QPDFObjectHandle& operator=(QPDFObjectHandle const&) = default;
#ifdef QPDF_FUTURE
QPDF_DLL
QPDFObjectHandle(QPDFObjectHandle&&) = default;
QPDF_DLL
QPDFObjectHandle& operator=(QPDFObjectHandle&&) = default;
#endif
QPDF_DLL
inline bool isInitialized() const;
// This method returns true if the QPDFObjectHandle objects point to exactly the same underlying
// object, meaning that changes to one are reflected in the other, or "if you paint one, the
// other one changes color." This does not perform a structural comparison of the contents of
// the objects.
QPDF_DLL
bool isSameObjectAs(QPDFObjectHandle const&) const;
// Return type code and type name of underlying object. These are useful for doing rapid type
// tests (like switch statements) or for testing and debugging.
QPDF_DLL
qpdf_object_type_e getTypeCode();
QPDF_DLL
char const* getTypeName();
// Exactly one of these will return true for any initialized object. Operator and InlineImage
// are only allowed in content streams.
QPDF_DLL
bool isBool();
QPDF_DLL
bool isNull();
QPDF_DLL
bool isInteger();
QPDF_DLL
bool isReal();
QPDF_DLL
bool isName();
QPDF_DLL
bool isString();
QPDF_DLL
bool isOperator();
QPDF_DLL
bool isInlineImage();
QPDF_DLL
bool isArray();
QPDF_DLL
bool isDictionary();
QPDF_DLL
bool isStream();
QPDF_DLL
bool isReserved();
// True for objects that are direct nulls. Does not attempt to resolve objects. This is intended
// for internal use, but it can be used as an efficient way to check for nulls that are not
// indirect objects.
QPDF_DLL
bool isDirectNull() const;
// This returns true in addition to the query for the specific type for indirect objects.
QPDF_DLL
inline bool isIndirect() const;
// This returns true for indirect objects from a QPDF that has been destroyed. Trying unparse
// such an object will throw a logic_error.
QPDF_DLL
bool isDestroyed();
// True for everything except array, dictionary, stream, word, and inline image.
QPDF_DLL
bool isScalar();
// True if the object is a name object representing the provided name.
QPDF_DLL
bool isNameAndEquals(std::string const& name);
// True if the object is a dictionary of the specified type and subtype, if any.
QPDF_DLL
bool isDictionaryOfType(std::string const& type, std::string const& subtype = "");
// True if the object is a stream of the specified type and subtype, if any.
QPDF_DLL
bool isStreamOfType(std::string const& type, std::string const& subtype = "");
// Public factory methods
// Wrap an object in an array if it is not already an array. This is a helper for cases in which
// something in a PDF may either be a single item or an array of items, which is a common idiom.
QPDF_DLL
QPDFObjectHandle wrapInArray();
// Construct an object of any type from a string representation of the object. Throws QPDFExc
// with an empty filename and an offset into the string if there is an error. Any indirect
// object syntax (obj gen R) will cause a logic_error exception to be thrown. If
// object_description is provided, it will appear in the message of any QPDFExc exception thrown
// for invalid syntax. See also the global `operator ""_qpdf` defined below.
QPDF_DLL
static QPDFObjectHandle
parse(std::string const& object_str, std::string const& object_description = "");
// Construct an object of any type from a string representation of the object. Indirect object
// syntax (obj gen R) is allowed and will create indirect references within the passed-in
// context. If object_description is provided, it will appear in the message of any QPDFExc
// exception thrown for invalid syntax. Note that you can't parse an indirect object reference
// all by itself as parse will stop at the end of the first complete object, which will just be
// the first number and will report that there is trailing data at the end of the string.
QPDF_DLL
static QPDFObjectHandle
parse(QPDF* context, std::string const& object_str, std::string const& object_description = "");
// Construct an object as above by reading from the given InputSource at its current position
// and using the tokenizer you supply. Indirect objects and encrypted strings are permitted.
// This method was intended to be called by QPDF for parsing objects that are ready from the
// object's input stream.
QPDF_DLL
static QPDFObjectHandle parse(
std::shared_ptr<InputSource> input,
std::string const& object_description,
QPDFTokenizer&,
bool& empty,
StringDecrypter* decrypter,
QPDF* context);
// Return the offset where the object was found when parsed. A negative value means that the
// object was created without parsing. If the object is in a stream, the offset is from the
// beginning of the stream. Otherwise, the offset is from the beginning of the file.
QPDF_DLL
qpdf_offset_t getParsedOffset();
// Older method: stream_or_array should be the value of /Contents from a page object. It's more
// convenient to just call QPDFPageObjectHelper::parsePageContents on the page object, and error
// messages will also be more useful because the page object information will be known.
QPDF_DLL
static void parseContentStream(QPDFObjectHandle stream_or_array, ParserCallbacks* callbacks);
// When called on a stream or stream array that is some page's content streams, do the same as
// pipePageContents. This method is a lower level way to do what
// QPDFPageObjectHelper::pipePageContents does, but it allows you to perform this operation on a
// contents object that is disconnected from a page object. The description argument should
// describe the containing page and is used in error messages. The all_description argument is
// initialized to something that could be used to describe the result of the pipeline. It is the
// description amended with the identifiers of the underlying objects. Please note that if there
// is an array of content streams, p->finish() is called after each stream. If you pass a
// pipeline that doesn't allow write() to be called after finish(), you can wrap it in an
// instance of Pl_Concatenate and then call manualFinish() on the Pl_Concatenate pipeline at the
// end.
QPDF_DLL
void
pipeContentStreams(Pipeline* p, std::string const& description, std::string& all_description);
// As of qpdf 8, it is possible to add custom token filters to a stream. The tokenized stream
// data is passed through the token filter after all original filters but before content stream
// normalization if requested. This is a low-level interface to add it to a stream. You will
// usually want to call QPDFPageObjectHelper::addContentTokenFilter instead, which can be
// applied to a page object, and which will automatically handle the case of pages whose
// contents are split across multiple streams.
QPDF_DLL
void addTokenFilter(std::shared_ptr<TokenFilter> token_filter);
// Legacy helpers for parsing content streams. These methods are not going away, but newer code
// should call the correspond methods in QPDFPageObjectHelper instead. The specification and
// behavior of these methods are the same as the identically named methods in that class, but
// newer functionality will be added there.
QPDF_DLL
void parsePageContents(ParserCallbacks* callbacks);
QPDF_DLL
void filterPageContents(TokenFilter* filter, Pipeline* next = nullptr);
// See comments for QPDFPageObjectHelper::pipeContents.
QPDF_DLL
void pipePageContents(Pipeline* p);
QPDF_DLL
void addContentTokenFilter(std::shared_ptr<TokenFilter> token_filter);
// End legacy content stream helpers
// Called on a stream to filter the stream as if it were page contents. This can be used to
// apply a TokenFilter to a form XObject, whose data is in the same format as a content stream.
QPDF_DLL
void filterAsContents(TokenFilter* filter, Pipeline* next = nullptr);
// Called on a stream to parse the stream as page contents. This can be used to parse a form
// XObject.
QPDF_DLL
void parseAsContents(ParserCallbacks* callbacks);
// Type-specific factories
QPDF_DLL
static QPDFObjectHandle newNull();
QPDF_DLL
static QPDFObjectHandle newBool(bool value);
QPDF_DLL
static QPDFObjectHandle newInteger(long long value);
QPDF_DLL
static QPDFObjectHandle newReal(std::string const& value);
QPDF_DLL
static QPDFObjectHandle
newReal(double value, int decimal_places = 0, bool trim_trailing_zeroes = true);
// Note about name objects: qpdf's internal representation of a PDF name is a sequence of bytes,
// excluding the NUL character, and starting with a slash. Name objects as represented in the
// PDF specification can contain characters escaped with #, but such escaping is not of concern
// when calling QPDFObjectHandle methods not directly relating to parsing. For example,
// newName("/text/plain").getName() and parse("/text#2fplain").getName() both return
// "/text/plain", while newName("/text/plain").unparse() and parse("/text#2fplain").unparse()
// both return "/text#2fplain". When working with the qpdf API for creating, retrieving, and
// modifying objects, you want to work with the internal, canonical representation. For names
// containing alphanumeric characters, dashes, and underscores, there is no difference between
// the two representations. For a lengthy discussion, see
// https://github.com/qpdf/qpdf/discussions/625.
QPDF_DLL
static QPDFObjectHandle newName(std::string const& name);
QPDF_DLL
static QPDFObjectHandle newString(std::string const& str);
// Create a string encoded from the given utf8-encoded string appropriately encoded to appear in
// PDF files outside of content streams, such as in document metadata form field values, page
// labels, outlines, and similar locations. We try ASCII first, then PDFDocEncoding, then UTF-16
// as needed to successfully encode all the characters.
QPDF_DLL
static QPDFObjectHandle newUnicodeString(std::string const& utf8_str);
QPDF_DLL
static QPDFObjectHandle newOperator(std::string const&);
QPDF_DLL
static QPDFObjectHandle newInlineImage(std::string const&);
QPDF_DLL
static QPDFObjectHandle newArray();
QPDF_DLL
static QPDFObjectHandle newArray(std::vector<QPDFObjectHandle> const& items);
QPDF_DLL
static QPDFObjectHandle newArray(Rectangle const&);
QPDF_DLL
static QPDFObjectHandle newArray(Matrix const&);
QPDF_DLL
static QPDFObjectHandle newArray(QPDFMatrix const&);
QPDF_DLL
static QPDFObjectHandle newDictionary();
QPDF_DLL
static QPDFObjectHandle newDictionary(std::map<std::string, QPDFObjectHandle> const& items);
// Create an array from a rectangle. Equivalent to the rectangle form of newArray.
QPDF_DLL
static QPDFObjectHandle newFromRectangle(Rectangle const&);
// Create an array from a matrix. Equivalent to the matrix form of newArray.
QPDF_DLL
static QPDFObjectHandle newFromMatrix(Matrix const&);
QPDF_DLL
static QPDFObjectHandle newFromMatrix(QPDFMatrix const&);
// Note: new stream creation methods have were added to the QPDF class starting with
// version 11.2.0. The ones in this class are here for backward compatibility.
// Create a new stream and associate it with the given qpdf object. A subsequent call must be
// made to replaceStreamData() to provide data for the stream. The stream's dictionary may be
// retrieved by calling getDict(), and the resulting dictionary may be modified. Alternatively,
// you can create a new dictionary and call replaceDict to install it. From QPDF 11.2, you can
// call QPDF::newStream() instead.
QPDF_DLL
static QPDFObjectHandle newStream(QPDF* qpdf);
// Create a new stream and associate it with the given qpdf object. Use the given buffer as the
// stream data. The stream dictionary's /Length key will automatically be set to the size of the
// data buffer. If additional keys are required, the stream's dictionary may be retrieved by
// calling getDict(), and the resulting dictionary may be modified. This method is just a
// convenient wrapper around the newStream() and replaceStreamData(). It is a convenience
// methods for streams that require no parameters beyond the stream length. Note that you don't
// have to deal with compression yourself if you use QPDFWriter. By default, QPDFWriter will
// automatically compress uncompressed stream data. Example programs are provided that
// illustrate this. From QPDF 11.2, you can call QPDF::newStream()
// instead.
QPDF_DLL
static QPDFObjectHandle newStream(QPDF* qpdf, std::shared_ptr<Buffer> data);
// Create new stream with data from string. This method will create a copy of the data rather
// than using the user-provided buffer as in the std::shared_ptr<Buffer> version of newStream.
// From QPDF 11.2, you can call QPDF::newStream() instead.
QPDF_DLL
static QPDFObjectHandle newStream(QPDF* qpdf, std::string const& data);
// A reserved object is a special sentinel used for qpdf to reserve a spot for an object that is
// going to be added to the QPDF object. Normally you don't have to use this type since you can
// just call QPDF::makeIndirectObject. However, in some cases, if you have to create objects
// with circular references, you may need to create a reserved object so that you can have a
// reference to it and then replace the object later. Reserved objects have the special
// property that they can't be resolved to direct objects. This makes it possible to replace a
// reserved object with a new object while preserving existing references to them. When you are
// ready to replace a reserved object with its replacement, use QPDF::replaceReserved for this
// purpose rather than the more general QPDF::replaceObject. It is an error to try to write a
// QPDF with QPDFWriter if it has any reserved objects in it. From QPDF 11.4, you can call
// QPDF::newReserved() instead.
QPDF_DLL
static QPDFObjectHandle newReserved(QPDF* qpdf);
// Provide an owning qpdf and object description. The library does this automatically with
// objects that are read from the input PDF and with objects that are created programmatically
// and inserted into the QPDF as a new indirect object. Most end user code will not need to call
// this. If an object has an owning qpdf and object description, it enables qpdf to give
// warnings with proper context in some cases where it would otherwise raise exceptions. It is
// okay to add objects without an owning_qpdf to objects that have one, but it is an error to
// have a QPDF contain objects with owning_qpdf set to something else. To add objects from
// another qpdf, use copyForeignObject instead.
QPDF_DLL
void setObjectDescription(QPDF* owning_qpdf, std::string const& object_description);
QPDF_DLL
bool hasObjectDescription();
// Accessor methods
//
// (Note: this comment is referenced in qpdf-c.h and the manual.)
//
// In PDF files, objects have specific types, but there is nothing that prevents PDF files from
// containing objects of types that aren't expected by the specification.
//
// There are two flavors of accessor methods:
//
// * getSomethingValue() returns the value and issues a type warning if the type is incorrect.
//
// * getValueAsSomething() returns false if the value is the wrong type. Otherwise, it returns
// true and initializes a reference of the appropriate type. These methods never issue type
// warnings.
//
// The getSomethingValue() accessors and some of the other methods expect objects of a
// particular type. Prior to qpdf 8, calling an accessor on a method of the wrong type, such as
// trying to get a dictionary key from an array, trying to get the string value of a number,
// etc., would throw an exception, but since qpdf 8, qpdf issues a warning and recovers using
// the following behavior:
//
// * Requesting a value of the wrong type (int value from string, array item from a scalar or
// dictionary, etc.) will return a zero-like value for that type: false for boolean, 0 for
// number, the empty string for string, or the null object for an object handle.
//
// * Accessing an array item that is out of bounds will return a null object.
//
// * Attempts to mutate an object of the wrong type (e.g., attempting to add a dictionary key to
// a scalar or array) will be ignored.
//
// When any of these fallback behaviors are used, qpdf issues a warning. Starting in qpdf 10.5,
// these warnings have the error code qpdf_e_object. Prior to 10.5, they had the error code
// qpdf_e_damaged_pdf. If the QPDFObjectHandle is associated with a QPDF object (as is the case
// for all objects whose origin was a PDF file), the warning is issued using the normal warning
// mechanism (as described in QPDF.hh), making it possible to suppress or otherwise detect them.
// If the QPDFObjectHandle is not associated with a QPDF object (meaning it was created
// programmatically), an exception will be thrown.
//
// The way to avoid getting any type warnings or exceptions, even when working with malformed
// PDF files, is to always check the type of a QPDFObjectHandle before accessing it (for
// example, make sure that isString() returns true before calling getStringValue()) and to
// always be sure that any array indices are in bounds.
//
// For additional discussion and rationale for this behavior, see the section in the QPDF manual
// entitled "Object Accessor Methods".
// Methods for bool objects
QPDF_DLL
bool getBoolValue();
QPDF_DLL
bool getValueAsBool(bool&);
// Methods for integer objects. Note: if an integer value is too big (too far away from zero in
// either direction) to fit in the requested return type, the maximum or minimum value for that
// return type may be returned. For example, on a system with 32-bit int, a numeric object with
// a value of 2^40 (or anything too big for 32 bits) will be returned as INT_MAX.
QPDF_DLL
long long getIntValue();
QPDF_DLL
bool getValueAsInt(long long&);
QPDF_DLL
int getIntValueAsInt();
QPDF_DLL
bool getValueAsInt(int&);
QPDF_DLL
unsigned long long getUIntValue();
QPDF_DLL
bool getValueAsUInt(unsigned long long&);
QPDF_DLL
unsigned int getUIntValueAsUInt();
QPDF_DLL
bool getValueAsUInt(unsigned int&);
// Methods for real objects
QPDF_DLL
std::string getRealValue();
QPDF_DLL
bool getValueAsReal(std::string&);
// Methods that work for both integer and real objects
QPDF_DLL
bool isNumber();
QPDF_DLL
double getNumericValue();
QPDF_DLL
bool getValueAsNumber(double&);
// Methods for name objects. The returned name value is in qpdf's canonical form with all
// escaping resolved. See comments for newName() for details.
QPDF_DLL
std::string getName();
QPDF_DLL
bool getValueAsName(std::string&);
// Methods for string objects
QPDF_DLL
std::string getStringValue();
QPDF_DLL
bool getValueAsString(std::string&);
// If a string starts with the UTF-16 marker, it is converted from UTF-16 to UTF-8. Otherwise,
// it is treated as a string encoded with PDF Doc Encoding. PDF Doc Encoding is identical to
// ISO-8859-1 except in the range from 0200 through 0240, where there is a mapping of characters
// to Unicode. QPDF versions prior to version 8.0.0 erroneously left characters in that range
// unmapped.
QPDF_DLL
std::string getUTF8Value();
QPDF_DLL
bool getValueAsUTF8(std::string&);
// Methods for content stream objects
QPDF_DLL
std::string getOperatorValue();
QPDF_DLL
bool getValueAsOperator(std::string&);
QPDF_DLL
std::string getInlineImageValue();
QPDF_DLL
bool getValueAsInlineImage(std::string&);
// Methods for array objects; see also name and array objects.
// Return an object that enables iteration over members. You can do
//
// for (auto iter: obj.aitems())
// {
// // iter is an array element
// }
class QPDFArrayItems;
QPDF_DLL
QPDFArrayItems aitems();
QPDF_DLL
int getArrayNItems();
QPDF_DLL
QPDFObjectHandle getArrayItem(int n);
// Note: QPDF arrays internally optimize memory for arrays containing lots of nulls. Calling
// getArrayAsVector may cause a lot of memory to be allocated for very large arrays with lots of
// nulls.
QPDF_DLL
std::vector<QPDFObjectHandle> getArrayAsVector();
QPDF_DLL
bool isRectangle();
// If the array is an array of four numeric values, return as a rectangle. Otherwise, return the
// rectangle [0, 0, 0, 0]
QPDF_DLL
Rectangle getArrayAsRectangle();
QPDF_DLL
bool isMatrix();
// If the array is an array of six numeric values, return as a matrix. Otherwise, return the
// matrix [1, 0, 0, 1, 0, 0]
QPDF_DLL
Matrix getArrayAsMatrix();
// Methods for dictionary objects. In all dictionary methods, keys are specified/represented as
// canonical name strings starting with a leading slash and not containing any PDF syntax
// escaping. See comments for getName() for details.
// Return an object that enables iteration over members. You can do
//
// for (auto iter: obj.ditems())
// {
// // iter.first is the key
// // iter.second is the value
// }
class QPDFDictItems;
QPDF_DLL
QPDFDictItems ditems();
// Return true if key is present. Keys with null values are treated as if they are not present.
// This is as per the PDF spec.
QPDF_DLL
bool hasKey(std::string const&);
// Return the value for the key. If the key is not present, null is returned.
QPDF_DLL
QPDFObjectHandle getKey(std::string const&);
// If the object is null, return null. Otherwise, call getKey(). This makes it easier to access
// lower-level dictionaries, as in
// auto font = page.getKeyIfDict("/Resources").getKeyIfDict("/Font");
QPDF_DLL
QPDFObjectHandle getKeyIfDict(std::string const&);
// Return all keys. Keys with null values are treated as if they are not present. This is as
// per the PDF spec.
QPDF_DLL
std::set<std::string> getKeys();
// Return dictionary as a map. Entries with null values are included.
QPDF_DLL
std::map<std::string, QPDFObjectHandle> getDictAsMap();
// Methods for name and array objects. The name value is in qpdf's canonical form with all
// escaping resolved. See comments for newName() for details.
QPDF_DLL
bool isOrHasName(std::string const&);
// Make all resources in a resource dictionary indirect. This just goes through all entries of
// top-level subdictionaries and converts any direct objects to indirect objects. This can be
// useful to call before mergeResources if it is going to be called multiple times to prevent
// resources from being copied multiple times.
QPDF_DLL
void makeResourcesIndirect(QPDF& owning_qpdf);
// Merge resource dictionaries. If the "conflicts" parameter is provided, conflicts in
// dictionary subitems are resolved, and "conflicts" is initialized to a map such that
// conflicts[resource_type][old_key] == [new_key]
//
// See also makeResourcesIndirect, which can be useful to call before calling this.
//
// This method does nothing if both this object and the other object are not dictionaries.
// Otherwise, it has following behavior, where "object" refers to the object whose method is
// invoked, and "other" refers to the argument:
//
// * For each key in "other" whose value is an array:
// * If "object" does not have that entry, shallow copy it.
// * Otherwise, if "object" has an array in the same place, append to that array any objects
// in "other"'s array that are not already present.
// * For each key in "other" whose value is a dictionary:
// * If "object" does not have that entry, shallow copy it.
// * Otherwise, for each key in the subdictionary:
// * If key is not present in "object"'s entry, shallow copy it if direct or just add it if
// indirect.
// * Otherwise, if conflicts are being detected:
// * If there is a key (oldkey) already in the dictionary that points to the same indirect
// destination as key, indicate that key was replaced by oldkey. This would happen if
// these two resource dictionaries have previously been merged.
// * Otherwise pick a new key (newkey) that is unique within the resource dictionary,
// store that in the resource dictionary with key's destination as its destination, and
// indicate that key was replaced by newkey.
//
// The primary purpose of this method is to facilitate merging of resource dictionaries that are
// supposed to have the same scope as each other. For example, this can be used to merge a form
// XObject's /Resources dictionary with a form field's /DR or to merge two /DR dictionaries. The
// "conflicts" parameter may be previously initialized. This method adds to whatever is already
// there, which can be useful when merging with multiple things.
QPDF_DLL
void mergeResources(
QPDFObjectHandle other,
std::map<std::string, std::map<std::string, std::string>>* conflicts = nullptr);
// Get all resource names from a resource dictionary. If this object is a dictionary, this
// method returns a set of all the keys in all top-level subdictionaries. For resources
// dictionaries, this is the collection of names that may be referenced in the content stream.
QPDF_DLL
std::set<std::string> getResourceNames();
// Find a unique name within a resource dictionary starting with a given prefix. This method
// works by appending a number to the given prefix. It searches starting with min_suffix and
// sets min_suffix to selected value upon return. This can be used to increase efficiency if
// adding multiple items with the same prefix. (Why doesn't it set min_suffix to the next
// number? Well, maybe you aren't going to actually use the name it returns.) If you are calling
// this multiple times on the same resource dictionary, you can initialize resource_names by
// calling getResourceNames(), incrementally update it as you add resources, and keep passing it
// in so that getUniqueResourceName doesn't have to traverse the resource dictionary each time
// it's called.
QPDF_DLL
std::string getUniqueResourceName(
std::string const& prefix,
int& min_suffix,
std::set<std::string>* resource_names = nullptr);
// A QPDFObjectHandle has an owning QPDF if it is associated with ("owned by") a specific QPDF
// object. Indirect objects always have an owning QPDF. Direct objects that are read from the
// input source will also have an owning QPDF. Programmatically created objects will only have
// one if setObjectDescription was called.
//
// When the QPDF object that owns an object is destroyed, the object is changed into a null, and
// its owner is cleared. Therefore you should not retain the value of an owning QPDF beyond the
// life of the QPDF. If in doubt, ask for it each time you need it.
// getOwningQPDF returns a pointer to the owning QPDF is the object has one. Otherwise, it
// returns a null pointer. Use this when you are able to handle the case of an object that
// doesn't have an owning QPDF.
QPDF_DLL
QPDF* getOwningQPDF() const;
// getQPDF, new in qpdf 11, returns a reference owning QPDF. If there is none, it throws a
// runtime_error. Use this when you know the object has to have an owning QPDF, such as when
// it's a known indirect object. Since streams are always indirect objects, this method can be
// used safely for streams. If error_msg is specified, it will be used at the contents of the
// runtime_error if there is now owner.
QPDF_DLL
QPDF& getQPDF(std::string const& error_msg = "") const;
// Create a shallow copy of an object as a direct object, but do not traverse across indirect
// object boundaries. That means that, for dictionaries and arrays, any keys or items that were
// indirect objects will still be indirect objects that point to the same place. In the
// strictest sense, this is not a shallow copy because it recursively descends arrays and
// dictionaries; it just doesn't cross over indirect objects. See also unsafeShallowCopy(). You
// can't copy a stream this way. See copyStream() instead.
QPDF_DLL
QPDFObjectHandle shallowCopy();
// Create a true shallow copy of an array or dictionary, just copying the immediate items
// (array) or keys (dictionary). This is "unsafe" because, if you *modify* any of the items in
// the copy, you are modifying the original, which is almost never what you want. However, if
// your intention is merely to *replace* top-level items or keys and not to modify lower-level
// items in the copy, this method is much faster than shallowCopy().
QPDF_DLL
QPDFObjectHandle unsafeShallowCopy();
// Create a copy of this stream. The new stream and the old stream are independent: after the
// copy, either the original or the copy's dictionary or data can be modified without affecting
// the other. This uses StreamDataProvider internally, so no unnecessary copies of the stream's
// data are made. If the source stream's data is already being provided by a StreamDataProvider,
// the new stream will use the same one, so you have to make sure your StreamDataProvider can
// handle that case. But if you're already using a StreamDataProvider, you probably don't need
// to call this method.
QPDF_DLL
QPDFObjectHandle copyStream();
// Mutator methods.
// Since qpdf 11: for mutators that may add or remove an item, there are additional versions
// whose names contain "AndGet" that return the added or removed item. For example:
//
// auto new_dict = dict.replaceKeyAndGetNew(
// "/New", QPDFObjectHandle::newDictionary());
//
// auto old_value = dict.replaceKeyAndGetOld(
// "/New", "(something)"_qpdf);
// Recursively copy this object, making it direct. An exception is thrown if a loop is detected.
// With allow_streams true, keep indirect object references to streams. Otherwise, throw an
// exception if any sub-object is a stream. Note that, when allow_streams is true and a stream
// is found, the resulting object is still associated with the containing qpdf. When
// allow_streams is false, the object will no longer be connected to the original QPDF object
// after this call completes successfully.
QPDF_DLL
void makeDirect(bool allow_streams = false);
// Mutator methods for array objects
QPDF_DLL
void setArrayItem(int, QPDFObjectHandle const&);
QPDF_DLL
void setArrayFromVector(std::vector<QPDFObjectHandle> const& items);
// Insert an item before the item at the given position ("at") so that it has that position
// after insertion. If "at" is equal to the size of the array, insert the item at the end.
QPDF_DLL
void insertItem(int at, QPDFObjectHandle const& item);
// Like insertItem but return the item that was inserted.
QPDF_DLL
QPDFObjectHandle insertItemAndGetNew(int at, QPDFObjectHandle const& item);
// Append an item to an array.
QPDF_DLL
void appendItem(QPDFObjectHandle const& item);
// Append an item, and return the newly added item.
QPDF_DLL
QPDFObjectHandle appendItemAndGetNew(QPDFObjectHandle const& item);
// Remove the item at that position, reducing the size of the array by one.
QPDF_DLL
void eraseItem(int at);
// Erase and item and return the item that was removed.
QPDF_DLL
QPDFObjectHandle eraseItemAndGetOld(int at);
// Mutator methods for dictionary objects
// Replace value of key, adding it if it does not exist. If value is null, remove the key.
QPDF_DLL
void replaceKey(std::string const& key, QPDFObjectHandle const& value);
// Replace value of key and return the value.
QPDF_DLL
QPDFObjectHandle replaceKeyAndGetNew(std::string const& key, QPDFObjectHandle const& value);
// Replace value of key and return the old value, or null if the key was previously not present.
QPDF_DLL
QPDFObjectHandle replaceKeyAndGetOld(std::string const& key, QPDFObjectHandle const& value);
// Remove key, doing nothing if key does not exist.
QPDF_DLL
void removeKey(std::string const& key);
// Remove key and return the old value. If the old value didn't exist, return a null object.
QPDF_DLL
QPDFObjectHandle removeKeyAndGetOld(std::string const& key);
// ABI: Remove in qpdf 12
[[deprecated("use replaceKey -- it does the same thing")]] QPDF_DLL void
replaceOrRemoveKey(std::string const& key, QPDFObjectHandle const&);
// Methods for stream objects
QPDF_DLL
QPDFObjectHandle getDict();
// By default, or if true passed, QPDFWriter will attempt to filter a stream based on decode
// level, whether compression is enabled, and its ability to filter. Passing false will prevent
// QPDFWriter from attempting to filter the stream even if it can. This includes both decoding
// and compressing. This makes it possible for you to prevent QPDFWriter from uncompressing and
// recompressing a stream that it knows how to operate on for any application-specific reason,
// such as that you have already optimized its filtering. Note that this doesn't affect any
// other ways to get the stream's data, such as pipeStreamData or getStreamData.
QPDF_DLL
void setFilterOnWrite(bool);
QPDF_DLL
bool getFilterOnWrite();
// If addTokenFilter has been called for this stream, then the original data should be
// considered to be modified. This means we should avoid optimizations such as not filtering a
// stream that is already compressed.
QPDF_DLL
bool isDataModified();
// Returns filtered (uncompressed) stream data. Throws an exception if the stream is filtered
// and we can't decode it.
QPDF_DLL
std::shared_ptr<Buffer> getStreamData(qpdf_stream_decode_level_e level = qpdf_dl_generalized);
// Returns unfiltered (raw) stream data.
QPDF_DLL
std::shared_ptr<Buffer> getRawStreamData();
// Write stream data through the given pipeline. A null pipeline value may be used if all you
// want to do is determine whether a stream is filterable and would be filtered based on the