-
Notifications
You must be signed in to change notification settings - Fork 9
/
Copy pathZipCompress.cpp
2829 lines (2410 loc) · 110 KB
/
ZipCompress.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
#include <windows.h>
#include <stdio.h>
#include <tchar.h>
#include "ZipCompress.h"
// THIS FILE is almost entirely based upon code by info-zip.
// It has been modified by Lucian Wischik. The modifications
// were a complete rewrite of the bit of code that generates the
// layout of the zipfile, and support for zipping to/from memory
// or handles or pipes or pagefile or diskfiles, encryption, unicode.
// The original code may be found at http://www.info-zip.org
// The original copyright text follows.
//
//
//
// This is version 1999-Oct-05 of the Info-ZIP copyright and license.
// The definitive version of this document should be available at
// ftp://ftp.cdrom.com/pub/infozip/license.html indefinitely.
//
// Copyright (c) 1990-1999 Info-ZIP. All rights reserved.
//
// For the purposes of this copyright and license, "Info-ZIP" is defined as
// the following set of individuals:
//
// Mark Adler, John Bush, Karl Davis, Harald Denker, Jean-Michel Dubois,
// Jean-loup Gailly, Hunter Goatley, Ian Gorman, Chris Herborth, Dirk Haase,
// Greg Hartwig, Robert Heath, Jonathan Hudson, Paul Kienitz, David Kirschbaum,
// Johnny Lee, Onno van der Linden, Igor Mandrichenko, Steve P. Miller,
// Sergio Monesi, Keith Owens, George Petrov, Greg Roelofs, Kai Uwe Rommel,
// Steve Salisbury, Dave Smith, Christian Spieler, Antoine Verheijen,
// Paul von Behren, Rich Wales, Mike White
//
// This software is provided "as is," without warranty of any kind, express
// or implied. In no event shall Info-ZIP or its contributors be held liable
// for any direct, indirect, incidental, special or consequential damages
// arising out of the use of or inability to use this software.
//
// Permission is granted to anyone to use this software for any purpose,
// including commercial applications, and to alter it and redistribute it
// freely, subject to the following restrictions:
//
// 1. Redistributions of source code must retain the above copyright notice,
// definition, disclaimer, and this list of conditions.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, definition, disclaimer, and this list of conditions in
// documentation and/or other materials provided with the distribution.
//
// 3. Altered versions--including, but not limited to, ports to new operating
// systems, existing ports with new graphical interfaces, and dynamic,
// shared, or static library versions--must be plainly marked as such
// and must not be misrepresented as being the original source. Such
// altered versions also must not be misrepresented as being Info-ZIP
// releases--including, but not limited to, labeling of the altered
// versions with the names "Info-ZIP" (or any variation thereof, including,
// but not limited to, different capitalizations), "Pocket UnZip," "WiZ"
// or "MacZip" without the explicit permission of Info-ZIP. Such altered
// versions are further prohibited from misrepresentative use of the
// Zip-Bugs or Info-ZIP e-mail addresses or of the Info-ZIP URL(s).
//
// 4. Info-ZIP retains the right to use the names "Info-ZIP," "Zip," "UnZip,"
// "WiZ," "Pocket UnZip," "Pocket Zip," and "MacZip" for its own source and
// binary releases.
//
typedef unsigned char uch; // unsigned 8-bit value
typedef unsigned short ush; // unsigned 16-bit value
typedef unsigned long ulg; // unsigned 32-bit value
typedef size_t extent; // file size
typedef unsigned Pos; // must be at least 32 bits
typedef unsigned IPos; // A Pos is an index in the character window. Pos is used only for parameter passing
#ifndef EOF
#define EOF (-1)
#endif
// Error return values. The values 0..4 and 12..18 follow the conventions
// of PKZIP. The values 4..10 are all assigned to "insufficient memory"
// by PKZIP, so the codes 5..10 are used here for other purposes.
#define ZE_MISS -1 // used by procname(), zipbare()
#define ZE_OK 0 // success
#define ZE_EOF 2 // unexpected end of zip file
#define ZE_FORM 3 // zip file structure error
#define ZE_MEM 4 // out of memory
#define ZE_LOGIC 5 // internal logic error
#define ZE_BIG 6 // entry too large to split
#define ZE_NOTE 7 // invalid comment format
#define ZE_TEST 8 // zip test (-T) failed or out of memory
#define ZE_ABORT 9 // user interrupt or termination
#define ZE_TEMP 10 // error using a temp file
#define ZE_READ 11 // read or seek error
#define ZE_NONE 12 // nothing to do
#define ZE_NAME 13 // missing or empty zip file
#define ZE_WRITE 14 // error writing to a file
#define ZE_CREAT 15 // couldn't open to write
#define ZE_PARMS 16 // bad command line
#define ZE_OPEN 18 // could not open a specified file to read
#define ZE_MAXERR 18 // the highest error number
// internal file attribute
#define UNKNOWN (-1)
#define BINARY 0
#define ASCII 1
#define BEST -1 // Use best method (deflation or store)
#define STORE 0 // Store method
#define DEFLATE 8 // Deflation method
#define CRCVAL_INITIAL 0L
// MSDOS file or directory attributes
#define MSDOS_HIDDEN_ATTR 0x02
#define MSDOS_DIR_ATTR 0x10
// Lengths of headers after signatures in bytes
#define LOCHEAD 26
#define CENHEAD 42
#define ENDHEAD 18
// Definitions for extra field handling:
#define EB_HEADSIZE 4 /* length of a extra field block header */
#define EB_LEN 2 /* offset of data length field in header */
#define EB_UT_MINLEN 1 /* minimal UT field contains Flags byte */
#define EB_UT_FLAGS 0 /* byte offset of Flags field */
#define EB_UT_TIME1 1 /* byte offset of 1st time value */
#define EB_UT_FL_MTIME (1 << 0) /* mtime present */
#define EB_UT_FL_ATIME (1 << 1) /* atime present */
#define EB_UT_FL_CTIME (1 << 2) /* ctime present */
#define EB_UT_LEN(n) (EB_UT_MINLEN + 4 * (n))
#define EB_L_UT_SIZE (EB_HEADSIZE + EB_UT_LEN(3))
#define EB_C_UT_SIZE (EB_HEADSIZE + EB_UT_LEN(1))
// Macros for writing machine integers to little-endian format
#define PUTSH(a,f) {char _putsh_c=(char)((a)&0xff); wfunc(param,&_putsh_c,1); _putsh_c=(char)((a)>>8); wfunc(param,&_putsh_c,1);}
#define PUTLG(a,f) {PUTSH((a) & 0xffff,(f)) PUTSH((a) >> 16,(f))}
// -- Structure of a ZIP file --
// Signatures for zip file information headers
#define LOCSIG 0x04034b50L
#define CENSIG 0x02014b50L
#define ENDSIG 0x06054b50L
#define EXTLOCSIG 0x08074b50L
#define MIN_MATCH 3
#define MAX_MATCH 258
// The minimum and maximum match lengths
#define WSIZE (0x8000)
// Maximum window size = 32K. If you are really short of memory, compile
// with a smaller WSIZE but this reduces the compression ratio for files
// of size > WSIZE. WSIZE must be a power of two in the current implementation.
//
#define MIN_LOOKAHEAD (MAX_MATCH+MIN_MATCH+1)
// Minimum amount of lookahead, except at the end of the input file.
// See deflate.c for comments about the MIN_MATCH+1.
//
#define MAX_DIST (WSIZE-MIN_LOOKAHEAD)
// In order to simplify the code, particularly on 16 bit machines, match
// distances are limited to MAX_DIST instead of WSIZE.
//
#define ZIP_HANDLE 1
#define ZIP_FILENAME 2
#define ZIP_MEMORY 3
#define ZIP_FOLDER 4
// ===========================================================================
// Constants
//
#define MAX_BITS 15
// All codes must not exceed MAX_BITS bits
#define MAX_BL_BITS 7
// Bit length codes must not exceed MAX_BL_BITS bits
#define LENGTH_CODES 29
// number of length codes, not counting the special END_BLOCK code
#define LITERALS 256
// number of literal bytes 0..255
#define END_BLOCK 256
// end of block literal code
#define L_CODES (LITERALS+1+LENGTH_CODES)
// number of Literal or Length codes, including the END_BLOCK code
#define D_CODES 30
// number of distance codes
#define BL_CODES 19
// number of codes used to transfer the bit lengths
#define STORED_BLOCK 0
#define STATIC_TREES 1
#define DYN_TREES 2
// The three kinds of block type
#define LIT_BUFSIZE 0x8000
#define DIST_BUFSIZE LIT_BUFSIZE
// Sizes of match buffers for literals/lengths and distances. There are
// 4 reasons for limiting LIT_BUFSIZE to 64K:
// - frequencies can be kept in 16 bit counters
// - if compression is not successful for the first block, all input data is
// still in the window so we can still emit a stored block even when input
// comes from standard input. (This can also be done for all blocks if
// LIT_BUFSIZE is not greater than 32K.)
// - if compression is not successful for a file smaller than 64K, we can
// even emit a stored file instead of a stored block (saving 5 bytes).
// - creating new Huffman trees less frequently may not provide fast
// adaptation to changes in the input data statistics. (Take for
// example a binary file with poorly compressible code followed by
// a highly compressible string table.) Smaller buffer sizes give
// fast adaptation but have of course the overhead of transmitting trees
// more frequently.
// - I can't count above 4
// The current code is general and allows DIST_BUFSIZE < LIT_BUFSIZE (to save
// memory at the expense of compression). Some optimizations would be possible
// if we rely on DIST_BUFSIZE == LIT_BUFSIZE.
//
#define REP_3_6 16
// repeat previous bit length 3-6 times (2 bits of repeat count)
#define REPZ_3_10 17
// repeat a zero length 3-10 times (3 bits of repeat count)
#define REPZ_11_138 18
// repeat a zero length 11-138 times (7 bits of repeat count)
#define HEAP_SIZE (2*L_CODES+1)
// maximum heap size
// ===========================================================================
// Local data used by the "bit string" routines.
//
#define Buf_size (8 * 2*sizeof(char))
// Number of bits used within bi_buf. (bi_buf may be implemented on
// more than 16 bits on some systems.)
// Output a 16 bit value to the bit stream, lower (oldest) byte first
#define PUTSHORT(state,w) \
{ if (state.bs.out_offset >= state.bs.out_size-1) \
state.flush_outbuf(state.param,state.bs.out_buf, &state.bs.out_offset); \
state.bs.out_buf[state.bs.out_offset++] = (char) ((w) & 0xff); \
state.bs.out_buf[state.bs.out_offset++] = (char) ((ush)(w) >> 8); \
}
#define PUTBYTE(state,b) \
{ if (state.bs.out_offset >= state.bs.out_size) \
state.flush_outbuf(state.param,state.bs.out_buf, &state.bs.out_offset); \
state.bs.out_buf[state.bs.out_offset++] = (char) (b); \
}
// DEFLATE.CPP HEADER
#define HASH_BITS 15
// For portability to 16 bit machines, do not use values above 15.
#define HASH_SIZE (unsigned)(1<<HASH_BITS)
#define HASH_MASK (HASH_SIZE-1)
#define WMASK (WSIZE-1)
// HASH_SIZE and WSIZE must be powers of two
#define NIL 0
// Tail of hash chains
#define FAST 4
#define SLOW 2
// speed options for the general purpose bit flag
#define TOO_FAR 4096
// Matches of length 3 are discarded if their distance exceeds TOO_FAR
#define EQUAL 0
// result of memcmp for equal strings
// ===========================================================================
// Local data used by the "longest match" routines.
#define H_SHIFT ((HASH_BITS+MIN_MATCH-1)/MIN_MATCH)
// Number of bits by which ins_h and del_h must be shifted at each
// input step. It must be such that after MIN_MATCH steps, the oldest
// byte no longer takes part in the hash key, that is:
// H_SHIFT * MIN_MATCH >= HASH_BITS
#define max_insert_length max_lazy_match
// Insert new strings in the hash table only if the match length
// is not greater than this length. This saves time but degrades compression.
// max_insert_length is used only for compression levels <= 3.
const int extra_lbits[LENGTH_CODES] // extra bits for each length code
= {0,0,0,0,0,0,0,0,1,1,1,1,2,2,2,2,3,3,3,3,4,4,4,4,5,5,5,5,0};
const int extra_dbits[D_CODES] // extra bits for each distance code
= {0,0,0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,8,8,9,9,10,10,11,11,12,12,13,13};
const int extra_blbits[BL_CODES]// extra bits for each bit length code
= {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,3,7};
const uch bl_order[BL_CODES] = {16,17,18,0,8,7,9,6,10,5,11,4,12,3,13,2,14,1,15};
// The lengths of the bit length codes are sent in order of decreasing
// probability, to avoid transmitting the lengths for unused bit length codes.
typedef struct config {
ush good_length; // reduce lazy search above this match length
ush max_lazy; // do not perform lazy search above this match length
ush nice_length; // quit search above this match length
ush max_chain;
} config;
// Values for max_lazy_match, good_match, nice_match and max_chain_length,
// depending on the desired pack level (0..9). The values given below have
// been tuned to exclude worst case performance for pathological files.
// Better values may be found for specific files.
//
const config configuration_table[10] = {
// good lazy nice chain
{0, 0, 0, 0}, // 0 store only
{4, 4, 8, 4}, // 1 maximum speed, no lazy matches
{4, 5, 16, 8}, // 2
{4, 6, 32, 32}, // 3
{4, 4, 16, 16}, // 4 lazy matches */
{8, 16, 32, 32}, // 5
{8, 16, 128, 128}, // 6
{8, 32, 128, 256}, // 7
{32, 128, 258, 1024}, // 8
{32, 258, 258, 4096}};// 9 maximum compression */
// Note: the deflate() code requires max_lazy >= MIN_MATCH and max_chain >= 4
// For deflate_fast() (levels <= 3) good is ignored and lazy has a different meaning.
// Data structure describing a single value and its code string.
typedef struct ct_data {
union {
ush freq; // frequency count
ush code; // bit string
} fc;
union {
ush dad; // father node in Huffman tree
ush len; // length of bit string
} dl;
} ct_data;
typedef struct tree_desc {
ct_data *dyn_tree; // the dynamic tree
ct_data *static_tree; // corresponding static tree or NULL
const int *extra_bits; // extra bits for each code or NULL
int extra_base; // base index for extra_bits
int elems; // max number of elements in the tree
int max_length; // max bit length for the codes
int max_code; // largest code with non zero frequency
} tree_desc;
class TTreeState
{ public:
TTreeState();
ct_data dyn_ltree[HEAP_SIZE]; // literal and length tree
ct_data dyn_dtree[2*D_CODES+1]; // distance tree
ct_data static_ltree[L_CODES+2]; // the static literal tree...
// ... Since the bit lengths are imposed, there is no need for the L_CODES
// extra codes used during heap construction. However the codes 286 and 287
// are needed to build a canonical tree (see ct_init below).
ct_data static_dtree[D_CODES]; // the static distance tree...
// ... (Actually a trivial tree since all codes use 5 bits.)
ct_data bl_tree[2*BL_CODES+1]; // Huffman tree for the bit lengths
tree_desc l_desc;
tree_desc d_desc;
tree_desc bl_desc;
ush bl_count[MAX_BITS+1]; // number of codes at each bit length for an optimal tree
int heap[2*L_CODES+1]; // heap used to build the Huffman trees
int heap_len; // number of elements in the heap
int heap_max; // element of largest frequency
// The sons of heap[n] are heap[2*n] and heap[2*n+1]. heap[0] is not used.
// The same heap array is used to build all trees.
uch depth[2*L_CODES+1];
// Depth of each subtree used as tie breaker for trees of equal frequency
uch length_code[MAX_MATCH-MIN_MATCH+1];
// length code for each normalized match length (0 == MIN_MATCH)
uch dist_code[512];
// distance codes. The first 256 values correspond to the distances
// 3 .. 258, the last 256 values correspond to the top 8 bits of
// the 15 bit distances.
int base_length[LENGTH_CODES];
// First normalized length for each code (0 = MIN_MATCH)
int base_dist[D_CODES];
// First normalized distance for each code (0 = distance of 1)
uch far l_buf[LIT_BUFSIZE]; // buffer for literals/lengths
ush far d_buf[DIST_BUFSIZE]; // buffer for distances
uch flag_buf[(LIT_BUFSIZE/8)];
// flag_buf is a bit array distinguishing literals from lengths in
// l_buf, and thus indicating the presence or absence of a distance.
unsigned last_lit; // running index in l_buf
unsigned last_dist; // running index in d_buf
unsigned last_flags; // running index in flag_buf
uch flags; // current flags not yet saved in flag_buf
uch flag_bit; // current bit used in flags
// bits are filled in flags starting at bit 0 (least significant).
// Note: these flags are overkill in the current code since we don't
// take advantage of DIST_BUFSIZE == LIT_BUFSIZE.
ulg opt_len; // bit length of current block with optimal trees
ulg static_len; // bit length of current block with static trees
ulg cmpr_bytelen; // total byte length of compressed file
ulg cmpr_len_bits; // number of bits past 'cmpr_bytelen'
ulg input_len; // total byte length of input file
// input_len is for debugging only since we can get it by other means.
ush *file_type; // pointer to UNKNOWN, BINARY or ASCII
// int *file_method; // pointer to DEFLATE or STORE
};
TTreeState::TTreeState()
{ tree_desc a = {dyn_ltree, static_ltree, extra_lbits, LITERALS+1, L_CODES, MAX_BITS, 0}; l_desc = a;
tree_desc b = {dyn_dtree, static_dtree, extra_dbits, 0, D_CODES, MAX_BITS, 0}; d_desc = b;
tree_desc c = {bl_tree, NULL, extra_blbits, 0, BL_CODES, MAX_BL_BITS, 0}; bl_desc = c;
last_lit=0;
last_dist=0;
last_flags=0;
}
class TBitState
{ public:
int flush_flg;
//
unsigned bi_buf;
// Output buffer. bits are inserted starting at the bottom (least significant
// bits). The width of bi_buf must be at least 16 bits.
int bi_valid;
// Number of valid bits in bi_buf. All bits above the last valid bit
// are always zero.
char *out_buf;
// Current output buffer.
unsigned out_offset;
// Current offset in output buffer.
// On 16 bit machines, the buffer is limited to 64K.
unsigned out_size;
// Size of current output buffer
ulg bits_sent; // bit length of the compressed data only needed for debugging???
};
class TDeflateState
{ public:
TDeflateState() {window_size=0;}
uch window[2L*WSIZE];
// Sliding window. Input bytes are read into the second half of the window,
// and move to the first half later to keep a dictionary of at least WSIZE
// bytes. With this organization, matches are limited to a distance of
// WSIZE-MAX_MATCH bytes, but this ensures that IO is always
// performed with a length multiple of the block size. Also, it limits
// the window size to 64K, which is quite useful on MSDOS.
// To do: limit the window size to WSIZE+CBSZ if SMALL_MEM (the code would
// be less efficient since the data would have to be copied WSIZE/CBSZ times)
Pos prev[WSIZE];
// Link to older string with same hash index. To limit the size of this
// array to 64K, this link is maintained only for the last 32K strings.
// An index in this array is thus a window index modulo 32K.
Pos head[HASH_SIZE];
// Heads of the hash chains or NIL. If your compiler thinks that
// HASH_SIZE is a dynamic value, recompile with -DDYN_ALLOC.
ulg window_size;
// window size, 2*WSIZE except for MMAP or BIG_MEM, where it is the
// input file length plus MIN_LOOKAHEAD.
long block_start;
// window position at the beginning of the current output block. Gets
// negative when the window is moved backwards.
int sliding;
// Set to false when the input file is already in memory
unsigned ins_h; // hash index of string to be inserted
unsigned int prev_length;
// Length of the best match at previous step. Matches not greater than this
// are discarded. This is used in the lazy match evaluation.
unsigned strstart; // start of string to insert
unsigned match_start; // start of matching string
int eofile; // flag set at end of input file
unsigned lookahead; // number of valid bytes ahead in window
unsigned max_chain_length;
// To speed up deflation, hash chains are never searched beyond this length.
// A higher limit improves compression ratio but degrades the speed.
unsigned int max_lazy_match;
// Attempt to find a better match only when the current match is strictly
// smaller than this value. This mechanism is used only for compression
// levels >= 4.
unsigned good_match;
// Use a faster search when the previous match is longer than this
int nice_match; // Stop searching when current match exceeds this
};
typedef __int64 lutime_t; // define it ourselves since we don't include time.h
typedef struct iztimes {
lutime_t atime,mtime,ctime;
} iztimes; // access, modify, create times
typedef struct zlist {
ush vem, ver, flg, how; // See central header in zipfile.c for what vem..off are
ulg tim, crc, siz, len;
extent nam, ext, cext, com; // offset of ext must be >= LOCHEAD
ush dsk, att, lflg; // offset of lflg must be >= LOCHEAD
ulg atx, off;
char name[MAX_PATH]; // File name in zip file
char *extra; // Extra field (set only if ext != 0)
char *cextra; // Extra in central (set only if cext != 0)
char *comment; // Comment (set only if com != 0)
char iname[MAX_PATH]; // Internal file name after cleanup
char zname[MAX_PATH]; // External version of internal name
int mark; // Marker for files to operate on
int trash; // Marker for files to delete
int dosflag; // Set to force MSDOS file attributes
struct zlist far *nxt; // Pointer to next header in list
} TZipFileInfo;
struct TState;
typedef unsigned (*READFUNC)(TState &state, char *buf,unsigned size);
typedef unsigned (*FLUSHFUNC)(void *param, const char *buf, unsigned *size);
typedef unsigned (*WRITEFUNC)(void *param, const char *buf, unsigned size);
struct TState
{ void *param;
int level; bool seekable;
READFUNC readfunc; FLUSHFUNC flush_outbuf;
TTreeState ts; TBitState bs; TDeflateState ds;
const char *err;
};
void Assert(TState &state,bool cond, const char *msg)
{ if (cond) return;
state.err=msg;
}
void __cdecl Trace(const char *x, ...) {va_list paramList; va_start(paramList, x); paramList; va_end(paramList);}
void __cdecl Tracec(bool ,const char *x, ...) {va_list paramList; va_start(paramList, x); paramList; va_end(paramList);}
// ===========================================================================
// Local (static) routines in this file.
//
void init_block (TState &);
void pqdownheap (TState &,ct_data *tree, int k);
void gen_bitlen (TState &,tree_desc *desc);
void gen_codes (TState &state,ct_data *tree, int max_code);
void build_tree (TState &,tree_desc *desc);
void scan_tree (TState &,ct_data *tree, int max_code);
void send_tree (TState &state,ct_data *tree, int max_code);
int build_bl_tree (TState &);
void send_all_trees (TState &state,int lcodes, int dcodes, int blcodes);
void compress_block (TState &state,ct_data *ltree, ct_data *dtree);
void set_file_type (TState &);
void send_bits (TState &state, int value, int length);
unsigned bi_reverse (unsigned code, int len);
void bi_windup (TState &state);
void copy_block (TState &state,char *buf, unsigned len, int header);
#define send_code(state, c, tree) send_bits(state, tree[c].fc.code, tree[c].dl.len)
// Send a code of the given tree. c and tree must not have side effects
// alternatively...
//#define send_code(state, c, tree)
// { if (state.verbose>1) fprintf(stderr,"\ncd %3d ",(c));
// send_bits(state, tree[c].fc.code, tree[c].dl.len); }
#define d_code(dist) ((dist) < 256 ? state.ts.dist_code[dist] : state.ts.dist_code[256+((dist)>>7)])
// Mapping from a distance to a distance code. dist is the distance - 1 and
// must not have side effects. dist_code[256] and dist_code[257] are never used.
#define Max(a,b) (a >= b ? a : b)
/* the arguments must not have side effects */
/* ===========================================================================
* Allocate the match buffer, initialize the various tables and save the
* location of the internal file attribute (ascii/binary) and method
* (DEFLATE/STORE).
*/
void ct_init(TState &state, ush *attr)
{
int n; /* iterates over tree elements */
int bits; /* bit counter */
int length; /* length value */
int code; /* code value */
int dist; /* distance index */
state.ts.file_type = attr;
//state.ts.file_method = method;
state.ts.cmpr_bytelen = state.ts.cmpr_len_bits = 0L;
state.ts.input_len = 0L;
if (state.ts.static_dtree[0].dl.len != 0) return; /* ct_init already called */
/* Initialize the mapping length (0..255) -> length code (0..28) */
length = 0;
for (code = 0; code < LENGTH_CODES-1; code++) {
state.ts.base_length[code] = length;
for (n = 0; n < (1<<extra_lbits[code]); n++) {
state.ts.length_code[length++] = (uch)code;
}
}
Assert(state,length == 256, "ct_init: length != 256");
/* Note that the length 255 (match length 258) can be represented
* in two different ways: code 284 + 5 bits or code 285, so we
* overwrite length_code[255] to use the best encoding:
*/
state.ts.length_code[length-1] = (uch)code;
/* Initialize the mapping dist (0..32K) -> dist code (0..29) */
dist = 0;
for (code = 0 ; code < 16; code++) {
state.ts.base_dist[code] = dist;
for (n = 0; n < (1<<extra_dbits[code]); n++) {
state.ts.dist_code[dist++] = (uch)code;
}
}
Assert(state,dist == 256, "ct_init: dist != 256");
dist >>= 7; /* from now on, all distances are divided by 128 */
for ( ; code < D_CODES; code++) {
state.ts.base_dist[code] = dist << 7;
for (n = 0; n < (1<<(extra_dbits[code]-7)); n++) {
state.ts.dist_code[256 + dist++] = (uch)code;
}
}
Assert(state,dist == 256, "ct_init: 256+dist != 512");
/* Construct the codes of the static literal tree */
for (bits = 0; bits <= MAX_BITS; bits++) state.ts.bl_count[bits] = 0;
n = 0;
while (n <= 143) state.ts.static_ltree[n++].dl.len = 8, state.ts.bl_count[8]++;
while (n <= 255) state.ts.static_ltree[n++].dl.len = 9, state.ts.bl_count[9]++;
while (n <= 279) state.ts.static_ltree[n++].dl.len = 7, state.ts.bl_count[7]++;
while (n <= 287) state.ts.static_ltree[n++].dl.len = 8, state.ts.bl_count[8]++;
/* fc.codes 286 and 287 do not exist, but we must include them in the
* tree construction to get a canonical Huffman tree (longest code
* all ones)
*/
gen_codes(state,(ct_data *)state.ts.static_ltree, L_CODES+1);
/* The static distance tree is trivial: */
for (n = 0; n < D_CODES; n++) {
state.ts.static_dtree[n].dl.len = 5;
state.ts.static_dtree[n].fc.code = (ush)bi_reverse(n, 5);
}
/* Initialize the first block of the first file: */
init_block(state);
}
/* ===========================================================================
* Initialize a new block.
*/
void init_block(TState &state)
{
int n; /* iterates over tree elements */
/* Initialize the trees. */
for (n = 0; n < L_CODES; n++) state.ts.dyn_ltree[n].fc.freq = 0;
for (n = 0; n < D_CODES; n++) state.ts.dyn_dtree[n].fc.freq = 0;
for (n = 0; n < BL_CODES; n++) state.ts.bl_tree[n].fc.freq = 0;
state.ts.dyn_ltree[END_BLOCK].fc.freq = 1;
state.ts.opt_len = state.ts.static_len = 0L;
state.ts.last_lit = state.ts.last_dist = state.ts.last_flags = 0;
state.ts.flags = 0; state.ts.flag_bit = 1;
}
#define SMALLEST 1
/* Index within the heap array of least frequent node in the Huffman tree */
/* ===========================================================================
* Remove the smallest element from the heap and recreate the heap with
* one less element. Updates heap and heap_len.
*/
#define pqremove(tree, top) \
{\
top = state.ts.heap[SMALLEST]; \
state.ts.heap[SMALLEST] = state.ts.heap[state.ts.heap_len--]; \
pqdownheap(state,tree, SMALLEST); \
}
/* ===========================================================================
* Compares to subtrees, using the tree depth as tie breaker when
* the subtrees have equal frequency. This minimizes the worst case length.
*/
#define smaller(tree, n, m) \
(tree[n].fc.freq < tree[m].fc.freq || \
(tree[n].fc.freq == tree[m].fc.freq && state.ts.depth[n] <= state.ts.depth[m]))
/* ===========================================================================
* Restore the heap property by moving down the tree starting at node k,
* exchanging a node with the smallest of its two sons if necessary, stopping
* when the heap property is re-established (each father smaller than its
* two sons).
*/
void pqdownheap(TState &state,ct_data *tree, int k)
{
int v = state.ts.heap[k];
int j = k << 1; /* left son of k */
int htemp; /* required because of bug in SASC compiler */
while (j <= state.ts.heap_len) {
/* Set j to the smallest of the two sons: */
if (j < state.ts.heap_len && smaller(tree, state.ts.heap[j+1], state.ts.heap[j])) j++;
/* Exit if v is smaller than both sons */
htemp = state.ts.heap[j];
if (smaller(tree, v, htemp)) break;
/* Exchange v with the smallest son */
state.ts.heap[k] = htemp;
k = j;
/* And continue down the tree, setting j to the left son of k */
j <<= 1;
}
state.ts.heap[k] = v;
}
/* ===========================================================================
* Compute the optimal bit lengths for a tree and update the total bit length
* for the current block.
* IN assertion: the fields freq and dad are set, heap[heap_max] and
* above are the tree nodes sorted by increasing frequency.
* OUT assertions: the field len is set to the optimal bit length, the
* array bl_count contains the frequencies for each bit length.
* The length opt_len is updated; static_len is also updated if stree is
* not null.
*/
void gen_bitlen(TState &state,tree_desc *desc)
{
ct_data *tree = desc->dyn_tree;
const int *extra = desc->extra_bits;
int base = desc->extra_base;
int max_code = desc->max_code;
int max_length = desc->max_length;
ct_data *stree = desc->static_tree;
int h; /* heap index */
int n, m; /* iterate over the tree elements */
int bits; /* bit length */
int xbits; /* extra bits */
ush f; /* frequency */
int overflow = 0; /* number of elements with bit length too large */
for (bits = 0; bits <= MAX_BITS; bits++) state.ts.bl_count[bits] = 0;
/* In a first pass, compute the optimal bit lengths (which may
* overflow in the case of the bit length tree).
*/
tree[state.ts.heap[state.ts.heap_max]].dl.len = 0; /* root of the heap */
for (h = state.ts.heap_max+1; h < HEAP_SIZE; h++) {
n = state.ts.heap[h];
bits = tree[tree[n].dl.dad].dl.len + 1;
if (bits > max_length) bits = max_length, overflow++;
tree[n].dl.len = (ush)bits;
/* We overwrite tree[n].dl.dad which is no longer needed */
if (n > max_code) continue; /* not a leaf node */
state.ts.bl_count[bits]++;
xbits = 0;
if (n >= base) xbits = extra[n-base];
f = tree[n].fc.freq;
state.ts.opt_len += (ulg)f * (bits + xbits);
if (stree) state.ts.static_len += (ulg)f * (stree[n].dl.len + xbits);
}
if (overflow == 0) return;
Trace("\nbit length overflow\n");
/* This happens for example on obj2 and pic of the Calgary corpus */
/* Find the first bit length which could increase: */
do {
bits = max_length-1;
while (state.ts.bl_count[bits] == 0) bits--;
state.ts.bl_count[bits]--; /* move one leaf down the tree */
state.ts.bl_count[bits+1] += (ush)2; /* move one overflow item as its brother */
state.ts.bl_count[max_length]--;
/* The brother of the overflow item also moves one step up,
* but this does not affect bl_count[max_length]
*/
overflow -= 2;
} while (overflow > 0);
/* Now recompute all bit lengths, scanning in increasing frequency.
* h is still equal to HEAP_SIZE. (It is simpler to reconstruct all
* lengths instead of fixing only the wrong ones. This idea is taken
* from 'ar' written by Haruhiko Okumura.)
*/
for (bits = max_length; bits != 0; bits--) {
n = state.ts.bl_count[bits];
while (n != 0) {
m = state.ts.heap[--h];
if (m > max_code) continue;
if (tree[m].dl.len != (ush)bits) {
Trace("code %d bits %d->%d\n", m, tree[m].dl.len, bits);
state.ts.opt_len += ((long)bits-(long)tree[m].dl.len)*(long)tree[m].fc.freq;
tree[m].dl.len = (ush)bits;
}
n--;
}
}
}
/* ===========================================================================
* Generate the codes for a given tree and bit counts (which need not be
* optimal).
* IN assertion: the array bl_count contains the bit length statistics for
* the given tree and the field len is set for all tree elements.
* OUT assertion: the field code is set for all tree elements of non
* zero code length.
*/
void gen_codes (TState &state, ct_data *tree, int max_code)
{
ush next_code[MAX_BITS+1]; /* next code value for each bit length */
ush code = 0; /* running code value */
int bits; /* bit index */
int n; /* code index */
/* The distribution counts are first used to generate the code values
* without bit reversal.
*/
for (bits = 1; bits <= MAX_BITS; bits++) {
next_code[bits] = code = (ush)((code + state.ts.bl_count[bits-1]) << 1);
}
/* Check that the bit counts in bl_count are consistent. The last code
* must be all ones.
*/
Assert(state,code + state.ts.bl_count[MAX_BITS]-1 == (1<< ((ush) MAX_BITS)) - 1,
"inconsistent bit counts");
Trace("\ngen_codes: max_code %d ", max_code);
for (n = 0; n <= max_code; n++) {
int len = tree[n].dl.len;
if (len == 0) continue;
/* Now reverse the bits */
tree[n].fc.code = (ush)bi_reverse(next_code[len]++, len);
//Tracec(tree != state.ts.static_ltree, "\nn %3d %c l %2d c %4x (%x) ", n, (isgraph(n) ? n : ' '), len, tree[n].fc.code, next_code[len]-1);
}
}
/* ===========================================================================
* Construct one Huffman tree and assigns the code bit strings and lengths.
* Update the total bit length for the current block.
* IN assertion: the field freq is set for all tree elements.
* OUT assertions: the fields len and code are set to the optimal bit length
* and corresponding code. The length opt_len is updated; static_len is
* also updated if stree is not null. The field max_code is set.
*/
void build_tree(TState &state,tree_desc *desc)
{
ct_data *tree = desc->dyn_tree;
ct_data *stree = desc->static_tree;
int elems = desc->elems;
int n, m; /* iterate over heap elements */
int max_code = -1; /* largest code with non zero frequency */
int node = elems; /* next internal node of the tree */
/* Construct the initial heap, with least frequent element in
* heap[SMALLEST]. The sons of heap[n] are heap[2*n] and heap[2*n+1].
* heap[0] is not used.
*/
state.ts.heap_len = 0, state.ts.heap_max = HEAP_SIZE;
for (n = 0; n < elems; n++) {
if (tree[n].fc.freq != 0) {
state.ts.heap[++state.ts.heap_len] = max_code = n;
state.ts.depth[n] = 0;
} else {
tree[n].dl.len = 0;
}
}
/* The pkzip format requires that at least one distance code exists,
* and that at least one bit should be sent even if there is only one
* possible code. So to avoid special checks later on we force at least
* two codes of non zero frequency.
*/
while (state.ts.heap_len < 2) {
int newcp = state.ts.heap[++state.ts.heap_len] = (max_code < 2 ? ++max_code : 0);
tree[newcp].fc.freq = 1;
state.ts.depth[newcp] = 0;
state.ts.opt_len--; if (stree) state.ts.static_len -= stree[newcp].dl.len;
/* new is 0 or 1 so it does not have extra bits */
}
desc->max_code = max_code;
/* The elements heap[heap_len/2+1 .. heap_len] are leaves of the tree,
* establish sub-heaps of increasing lengths:
*/
for (n = state.ts.heap_len/2; n >= 1; n--) pqdownheap(state,tree, n);
/* Construct the Huffman tree by repeatedly combining the least two
* frequent nodes.
*/
do {
pqremove(tree, n); /* n = node of least frequency */
m = state.ts.heap[SMALLEST]; /* m = node of next least frequency */
state.ts.heap[--state.ts.heap_max] = n; /* keep the nodes sorted by frequency */
state.ts.heap[--state.ts.heap_max] = m;
/* Create a new node father of n and m */
tree[node].fc.freq = (ush)(tree[n].fc.freq + tree[m].fc.freq);
state.ts.depth[node] = (uch) (Max(state.ts.depth[n], state.ts.depth[m]) + 1);
tree[n].dl.dad = tree[m].dl.dad = (ush)node;
/* and insert the new node in the heap */
state.ts.heap[SMALLEST] = node++;
pqdownheap(state,tree, SMALLEST);
} while (state.ts.heap_len >= 2);
state.ts.heap[--state.ts.heap_max] = state.ts.heap[SMALLEST];
/* At this point, the fields freq and dad are set. We can now
* generate the bit lengths.
*/
gen_bitlen(state,(tree_desc *)desc);
/* The field len is now set, we can generate the bit codes */
gen_codes (state,(ct_data *)tree, max_code);
}
/* ===========================================================================
* Scan a literal or distance tree to determine the frequencies of the codes