forked from vsespb/mt-aws-glacier
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathMetaData.pm
175 lines (143 loc) · 5.07 KB
/
MetaData.pm
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
# mt-aws-glacier - Amazon Glacier sync client
# Copyright (C) 2012-2013 Victor Efimov
# http://mt-aws.com (also http://vs-dev.com) [email protected]
# License: GPLv3
#
# This file is part of "mt-aws-glacier"
#
# mt-aws-glacier is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# mt-aws-glacier is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
package MetaData;
use strict;
use warnings;
use utf8;
use Encode;
use MIME::Base64;
use JSON::XS;
use POSIX;
use Time::Local;
=pod
MT-AWS-GLACIER metadata format (x-amz-archive-description field).
Version 'mt1'
x-amz-archive-description = 'mt1' <space> base64url(json_utf8({'filename': FILENAME, 'mtime': iso8601(MTIME)}))
Input data:
FILENAME (character string)
Is a relative filename (no leading slash). Filename is taken from file system and treated as a character sequence
with known encoding.
MTIME (time)
is file last modification time with 1 second resolution. Can be below Y1970.
Internal representation is epoch time, so it can be any valid epoch time, including negative values. On some system it's
32bit signed, another are 64bit signed, for some filesystems it's 34 bit signed etc.
Function definitions:
base64url() input - byte sequence, output - byte sequence
Is Base64 URL algorithm: http://en.wikipedia.org/wiki/Base64#URL_applications
basically it's base64 but with '=' padding removed, characters '+', '/' replaced with '-', '_' resp. and no new lines.
json_utf8() - input - Hash, output - byte sequence
JSON string in UTF-8 representation. Can contain not-escaped UTF-8 characters. Will not contain linefeed. Hash objects are unordered.
isoO8601() - input - time, output - character string
ISOO8601 time in the following format YYYYMMDDTHHMMSSZ. Only UTC filezone. No leap seconds supported.
When encoding isoO8601() mt-aws-glacier will not store leap seconds. When decoding from isoO8601 leap seconds are not supported (yet).
{'filename': FILENAME, 'mtime': iso8601(MTIME)}
Hash with two keys: 'filename' and 'mtime'. Correspond to JSON 'Object'.
Note, that according to this spec. Same (FILENAME,MTIME) values can produce different x-amz-archive-description, as JSON hash is unordered.
=cut
my $meta_coder = ($JSON::XS::VERSION >= 1.4) ?
JSON::XS->new->utf8->max_depth(1)->max_size(1024) : # some additional abuse-protection
JSON::XS->new->utf8; # it's still protected by length checking below
sub meta_decode
{
my ($str) = @_;
my ($marker, $b64) = split(' ', $str);
if ($marker eq 'mt1') {
return (undef, undef) unless length($b64) <= 1024;
return _decode_json(_decode_utf8(_decode_b64($b64)));
} else {
return (undef, undef);
}
}
sub _decode_b64
{
my ($str) = @_;
my $res = eval {
$str =~ tr{-_}{+/};
my $padding_n = length($str) % 4;
$str .= ('=' x (4 - $padding_n) ) if $padding_n;
MIME::Base64::decode_base64($str);
};
return $@ eq '' ? $res : undef;
}
sub _decode_utf8
{
my ($str) = @_;
return undef unless defined $str;
my $res = eval {
decode("UTF-8", $str, Encode::DIE_ON_ERR|Encode::LEAVE_SRC)
};
return $@ eq '' ? $res : undef;
}
sub _decode_json
{
my ($str) = @_;
return undef unless defined $str;
my $h = eval {
$meta_coder->decode($str)
};
if ($@ ne '') {
return (undef, undef);
} else {
return (undef, undef) unless defined($h->{filename}) && defined($h->{mtime});
my $iso8601 = _parse_iso8601($h->{mtime});
return undef unless $iso8601;
return ($h->{filename}, $iso8601);
}
}
sub meta_encode
{
my ($relfilename, $mtime) = @_;
return undef unless defined($mtime) && defined($relfilename);
my $res = "mt1 "._encode_b64(_encode_utf8(_encode_json($relfilename, $mtime)));
return undef if length($res) > 1024;
return $res;
}
sub _encode_b64
{
my ($str) = @_;
my $res = MIME::Base64::encode_base64($str,'');
$res =~ s/=+\z//;
$res =~ tr{+/}{-_};
return $res;
}
sub _encode_utf8
{
my ($str) = @_;
return encode("UTF-8",$str,Encode::DIE_ON_ERR|Encode::LEAVE_SRC);
}
sub _encode_json
{
my ($relfilename, $mtime) = @_;
$meta_coder->encode({
mtime => strftime("%Y%m%dT%H%M%SZ", gmtime($mtime)),
filename => $relfilename
}),
}
sub _parse_iso8601 # Implementing this as I don't want to have non-core dependencies
{
my ($str) = @_;
return undef unless $str =~ /^\s*(\d{4})[\-\s]*(\d{2})[\-\s]*(\d{2})\s*T\s*(\d{2})[\:\s]*(\d{2})[\:\s]*(\d{2})[\,\.\d]{0,10}\s*Z\s*$/i; # _some_ iso8601 support for now
my ($year, $month, $day, $hour, $min, $sec) = ($1,$2,$3,$4,$5,$6);
$sec = 59 if $sec == 60; # TODO: dirty workaround to avoid dealing with leap seconds
my $res = eval { timegm($sec,$min,$hour,$day,$month - 1,$year) };
return ($@ ne ' ') ? $res : undef;
}
1;
__END__