-
Notifications
You must be signed in to change notification settings - Fork 21
/
preprocess.py
456 lines (366 loc) · 15.1 KB
/
preprocess.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
# Copyright 2019 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# pylint: disable=missing-docstring
"""Preprocessing methods.
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import functools
import numpy as np
import tensorflow.compat.v1 as tf
import inception_preprocessing as inception_pp
import utils
COLOR_PALETTE_PATH = ("/cns/vz-d/home/dune/representation/"
"color_palette.npy")
def crop(image, is_training, crop_size):
h, w, c = crop_size[0], crop_size[1], image.shape[-1]
if is_training:
return tf.random_crop(image, [h, w, c])
else:
# Central crop for now. (See Table 5 in Appendix of
# https://arxiv.org/pdf/1703.07737.pdf for why)
dy = (tf.shape(image)[0] - h)//2
dx = (tf.shape(image)[1] - w)//2
return tf.image.crop_to_bounding_box(image, dy, dx, h, w)
def get_inception_preprocess(is_training, im_size):
def _inception_preprocess(data):
data["image"] = inception_pp.preprocess_image(
data["image"], im_size[0], im_size[1], is_training,
add_image_summaries=False)
return data
return _inception_preprocess
def get_resize_small(smaller_size):
"""Resizes the smaller side to `smaller_size` keeping aspect ratio."""
def _resize_small_pp(data):
image = data["image"]
# A single image: HWC
# A batch of images: BHWC
h, w = tf.shape(image)[-3], tf.shape(image)[-2]
# Figure out the necessary h/w.
ratio = tf.to_float(smaller_size) / tf.to_float(tf.minimum(h, w))
h = tf.to_int32(tf.round(tf.to_float(h) * ratio))
w = tf.to_int32(tf.round(tf.to_float(w) * ratio))
# NOTE: use align_corners=False for AREA resize, but True for Bilinear.
# See also https://github.com/tensorflow/tensorflow/issues/6720
static_rank = len(image.get_shape().as_list())
if static_rank == 3: # A single image: HWC
data["image"] = tf.image.resize_area(image[None], [h, w])[0]
elif static_rank == 4: # A batch of images: BHWC
data["image"] = tf.image.resize_area(image, [h, w])
return data
return _resize_small_pp
def get_multi_crop(crop_size):
"""Get multiple crops for test."""
def _crop(image, offset, size):
return tf.image.crop_to_bounding_box(image, offset[0], offset[1], size[0],
size[1])
def _multi_crop_pp(data):
image = data["image"]
h, w, c = crop_size[0], crop_size[1], image.shape[-1]
tl = (0, 0)
tr = (0, tf.shape(image)[1] - w)
bl = (tf.shape(image)[0] - h, 0)
br = (tf.shape(image)[0] - h, tf.shape(image)[1] - w)
c = ((tf.shape(image)[0] - h) // 2, (tf.shape(image)[1] - w) // 2)
data["image"] = tf.stack([
_crop(image, c, crop_size),
_crop(image, tl, crop_size),
_crop(image, tr, crop_size),
_crop(image, bl, crop_size),
_crop(image, br, crop_size)
])
return data
return _multi_crop_pp
def get_crop(is_training, crop_size):
"""Returns a random (or central at test-time) crop of `crop_size`."""
def _crop_pp(data):
crop_fn = functools.partial(
crop, is_training=is_training, crop_size=crop_size)
data["image"] = utils.tf_apply_to_image_or_images(crop_fn, data["image"])
return data
return _crop_pp
def inception_crop(image, **kw):
"""Perform an "inception crop", without resize."""
begin, size, _ = tf.image.sample_distorted_bounding_box(
tf.shape(image), tf.zeros([0, 0, 4], tf.float32),
use_image_if_no_bounding_boxes=True, **kw)
crop = tf.slice(image, begin, size)
# Unfortunately, the above operation loses the depth-dimension. So we need
# to Restore it the manual way.
crop.set_shape([None, None, image.shape[-1]])
return crop
def get_inception_crop(is_training, **kw):
# kw of interest are: aspect_ratio_range, area_range.
# Note that image is not resized yet here.
def _inception_crop_pp(data):
if is_training:
data["image"] = inception_crop(data["image"], **kw)
else:
# TODO(lbeyer): Maybe do 87.5%-crop in test-mode by default?
tf.logging.warn("inception_crop pre-processing keeps the full image in "
"eval mode for now. Contact lbeyer@ with your use-case "
"and propose a reasonable default behaviour.")
return data
return _inception_crop_pp
def get_random_flip_lr(is_training):
def _random_flip_lr_pp(data):
if is_training:
data["image"] = utils.tf_apply_to_image_or_images(
tf.image.random_flip_left_right, data["image"])
return data
return _random_flip_lr_pp
def get_resize_preprocess(fn_args, is_training):
# This checks if the string "randomize_method" is present anywhere in the
# args. If it is, during training, enable randomization, but not during test.
# That's so that a call can look like `resize(256, randomize_method)` or
# `resize(randomize_method, 256, 128)` and they all work as expected.
try:
fn_args.remove("randomize_method")
randomize_resize_method = is_training
except ValueError:
randomize_resize_method = False
im_size = utils.str2intlist(fn_args, 2)
def _resize(image, method, align_corners):
def _process():
# The resized_images are of type float32 and might fall outside of range
# [0, 255].
resized = tf.cast(
tf.image.resize_images(
image, im_size, method, align_corners=align_corners),
dtype=tf.float32)
return resized
return _process
def _resize_pp(data):
im = data["image"]
if randomize_resize_method:
# pick random resizing method
r = tf.random_uniform([], 0, 3, dtype=tf.int32)
im = tf.case({
tf.equal(r, tf.cast(0, r.dtype)):
_resize(im, tf.image.ResizeMethod.BILINEAR, True),
tf.equal(r, tf.cast(1, r.dtype)):
_resize(im, tf.image.ResizeMethod.NEAREST_NEIGHBOR, True),
tf.equal(r, tf.cast(2, r.dtype)):
_resize(im, tf.image.ResizeMethod.BICUBIC, True),
# NOTE: use align_corners=False for AREA resize, but True for the
# others. See https://github.com/tensorflow/tensorflow/issues/6720
tf.equal(r, tf.cast(3, r.dtype)):
_resize(im, tf.image.ResizeMethod.AREA, False),
})
else:
im = tf.image.resize_images(im, im_size)
data["image"] = im
return data
return _resize_pp
def get_rotate_preprocess(create_labels=True):
"""Returns a function that does 90deg rotations and sets according labels."""
def _four_rots(img):
# We use our own instead of tf.image.rot90 because that one broke
# internally shortly before deadline...
return tf.stack([
img,
tf.transpose(tf.reverse_v2(img, [1]), [1, 0, 2]),
tf.reverse_v2(img, [0, 1]),
tf.reverse_v2(tf.transpose(img, [1, 0, 2]), [1]),
])
def _rotate_pp(data):
# Create labels in the same structure as images!
if create_labels:
data["label"] = utils.tf_apply_to_image_or_images(
lambda _: tf.constant([0, 1, 2, 3]), data["image"], dtype=tf.int32)
data["image"] = utils.tf_apply_to_image_or_images(_four_rots, data["image"])
return data
return _rotate_pp
def get_copy_label_preprocess(new_name):
"""Returns a function that copies labels."""
def _copy_label_pp(data):
data[new_name] = data["label"]
return data
return _copy_label_pp
def get_value_range_preprocess(vmin=-1, vmax=1, dtype=tf.float32):
"""Returns a function that sends [0,255] image to [vmin,vmax]."""
def _value_range_pp(data):
img = tf.cast(data["image"], dtype)
img = vmin + (img / tf.constant(255.0, dtype)) * (vmax - vmin)
data["image"] = img
return data
return _value_range_pp
def get_hsvnoise_preprocess(sv_pow=(-2.0, 2.0), sv_mul=(-0.5, 0.5),
sv_add=(-0.1, 0.1), h_add=(-0.1, 0.1)):
"""Returns a function that randomises HSV similarly to the Exemplar paper.
Requires the input to still be in [0-255] range.
Transforms the input to HSV, applies rnd(mul)*S**(2**rnd(pow)) + rnd(add) to
the S and V channels independently, and H + rnd(add) to the H channel, then
converts back to RGB in float [0-255].
Args:
sv_pow: The min/max powers of two to which to take S/V.
sv_mul: The min/max powers of two with which to scale S/V.
sv_add: The min/max shift of S/V.
h_add: The min/max shift of hue.
Returns:
A function applying random HSV augmentation to its input.
"""
rnd = lambda *a: tf.random.uniform((), *a)
rnd2 = lambda *a: tf.random.uniform((2,), *a)
def _hsvnoise(rgb):
hsv = tf.image.rgb_to_hsv(rgb / 255.0) # Needs [0 1] input.
h, sv = hsv[..., :1], hsv[..., 1:]
h = tf.floormod(1. + h + rnd(*h_add), 1.) # color cycle.
pow_, mul, add = 2.0**rnd2(*sv_pow), 2.0**rnd2(*sv_mul), rnd2(*sv_add)
sv = sv**pow_ * mul + add
hsv = tf.clip_by_value(tf.concat([h, sv], axis=-1), 0, 1)
return tf.image.hsv_to_rgb(hsv) * 255.0
def _hsvnoise_pp(data):
data["image"] = utils.tf_apply_to_image_or_images(_hsvnoise, data["image"])
return data
return _hsvnoise_pp
def get_standardize_preprocess():
def _standardize_pp(data):
data["image"] = tf.image.per_image_standardization(data["image"])
return data
return _standardize_pp
def get_inception_crop_patches(resize_size, num_patches):
def _inception_crop_patches(img):
return tf.stack([
tf.image.resize(inception_crop(img), resize_size)
for _ in range(num_patches)
])
def _inception_crop_patches_pp(data):
# The output becomes float32 because of the tf.image.resize.
data["image"] = utils.tf_apply_to_image_or_images(
_inception_crop_patches, data["image"], dtype=tf.float32)
return data
return _inception_crop_patches_pp
def get_inception_preprocess_patches(is_training, resize_size, num_patches):
def _inception_preprocess_patches(data):
patches = []
for _ in range(num_patches):
patches.append(
inception_pp.preprocess_image(
data["image"],
resize_size[0],
resize_size[1],
is_training,
add_image_summaries=False))
patches = tf.stack(patches)
data["image"] = patches
return data
return _inception_preprocess_patches
def get_to_gray_preprocess(grayscale_probability):
def _to_gray(image):
# Transform to grayscale by taking the mean of RGB.
return tf.tile(tf.reduce_mean(image, axis=2, keepdims=True), [1, 1, 3])
def _to_gray_pp(data):
data["image"] = utils.tf_apply_to_image_or_images(
lambda img: utils.tf_apply_with_probability( # pylint:disable=g-long-lambda
grayscale_probability, _to_gray, img),
data["image"])
return data
return _to_gray_pp
def get_preprocess_fn(pp_pipeline, is_training):
"""Returns preprocessing function.
The minilanguage is as follows:
fn1|fn2(arg, arg2,...)|fn3(key1=val1, key2=val2, ...)|...
And describes the successive application of the various `fn`s to the input,
where each function can optionally have one or more arguments, which are
either positional or key/value, as dictated by the `fn`.
Args:
pp_pipeline: A string describing the pre-processing pipeline.
is_training: Whether this should be run in train or eval mode.
Returns:
preprocessing function
Raises:
ValueError: if preprocessing function name is unknown
"""
def _fn(data):
def parse_fn(fn_call):
"""Parses the fn(arg1,arg2,...) and fn(a=1,b=2) structures.
Args:
fn_call: string, the function call as a string.
Returns:
The function name, and either a list (possibly empty) or a dict,
depending on the syntax of the function call.
"""
if "(" in fn_call:
fn_name, fn_args = fn_call.split("(")
if "=" in fn_args:
fn_args = dict(kv.split("=") for kv in fn_args[:-1].split(","))
return fn_name, {k.strip(): v.strip() for k, v in fn_args.items()}
else:
return fn_name, [a.strip() for a in fn_args[:-1].split(",")]
else:
return fn_call, []
def get(list_, index, default):
"""Return element at `index` in `list_` or the `default`."""
try:
return list_[index]
except IndexError:
return default
def expand(fn_name, args):
if fn_name == "plain_preprocess":
yield lambda x: x
elif fn_name == "0_to_1":
yield get_value_range_preprocess(0, 1)
elif fn_name == "-1_to_1":
yield get_value_range_preprocess(-1, 1)
elif fn_name == "value_range":
yield get_value_range_preprocess(*map(float, args))
elif fn_name == "resize":
yield get_resize_preprocess(args, is_training)
elif fn_name == "resize_small":
yield get_resize_small(int(args[0]))
elif fn_name == "crop":
yield get_crop(is_training, utils.str2intlist(args, 2))
elif fn_name == "central_crop":
yield get_crop(False, utils.str2intlist(args, 2))
elif fn_name == "multi_crop":
yield get_multi_crop(utils.str2intlist(args, 2))
elif fn_name == "inception_crop":
yield get_inception_crop(is_training)
elif fn_name == "flip_lr":
yield get_random_flip_lr(is_training)
elif fn_name == "hsvnoise":
# TODO(lbeyer): expose the parameters? Or maybe just a scale parameter?
yield get_hsvnoise_preprocess(*args)
elif fn_name == "crop_inception_preprocess_patches":
npatch = int(args[0])
size = utils.str2intlist(args[1:], 2)
yield get_inception_preprocess_patches(is_training, size, npatch)
elif fn_name == "crop_inception_patches":
npatch = int(args[0])
size = utils.str2intlist(args[1:], 2)
yield get_inception_crop_patches(size, npatch)
elif fn_name == "to_gray":
yield get_to_gray_preprocess(float(get(args, 0, 1.0)))
elif fn_name == "standardize":
yield get_standardize_preprocess()
elif fn_name == "rotate":
yield get_rotate_preprocess()
elif fn_name == "copy_label":
yield get_copy_label_preprocess(get(args, 0, "copy_label"))
# Below this line specific combos decomposed.
# It would be nice to move them to the configs at some point.
elif fn_name == "inception_preprocess":
yield get_inception_preprocess(is_training, utils.str2intlist(args, 2))
else:
raise ValueError("Not supported preprocessing %s" % fn_name)
# Apply all the individual steps in sequence.
tf.logging.info("Data before pre-processing:\n%s", data)
for fn_name in pp_pipeline.split("|"):
for p in expand(*parse_fn(fn_name.strip())):
data = p(data)
tf.logging.info("Data after `%s`:\n%s", p, data)
return data
return _fn