imagenet_utils.py
Go to the documentation of this file.
1 """Utilities for ImageNet data preprocessing & prediction decoding.
2 """
3 from __future__ import absolute_import
4 from __future__ import division
5 from __future__ import print_function
6 
7 import json
8 import warnings
9 import numpy as np
10 
11 from keras.utils.data_utils import get_file
12 from keras import backend as K
13 
14 CLASS_INDEX = None
15 CLASS_INDEX_PATH = 'https://s3.amazonaws.com/deep-learning-models/image-models/imagenet_class_index.json'
16 
17 # Global tensor of imagenet mean for preprocessing symbolic inputs
18 _IMAGENET_MEAN = None
19 
20 
21 def _preprocess_numpy_input(x, data_format, mode):
22  """Preprocesses a Numpy array encoding a batch of images.
23 
24  # Arguments
25  x: Input array, 3D or 4D.
26  data_format: Data format of the image array.
27  mode: One of "caffe", "tf" or "torch".
28  - caffe: will convert the images from RGB to BGR,
29  then will zero-center each color channel with
30  respect to the ImageNet dataset,
31  without scaling.
32  - tf: will scale pixels between -1 and 1,
33  sample-wise.
34  - torch: will scale pixels between 0 and 1 and then
35  will normalize each channel with respect to the
36  ImageNet dataset.
37 
38  # Returns
39  Preprocessed Numpy array.
40  """
41  if not issubclass(x.dtype.type, np.floating):
42  x = x.astype(K.floatx(), copy=False)
43 
44  if mode == 'tf':
45  x /= 127.5
46  x -= 1.
47  return x
48 
49  if mode == 'torch':
50  x /= 255.
51  mean = [0.485, 0.456, 0.406]
52  std = [0.229, 0.224, 0.225]
53  else:
54  if data_format == 'channels_first':
55  # 'RGB'->'BGR'
56  if x.ndim == 3:
57  x = x[::-1, ...]
58  else:
59  x = x[:, ::-1, ...]
60  else:
61  # 'RGB'->'BGR'
62  x = x[..., ::-1]
63  mean = [103.939, 116.779, 123.68]
64  std = None
65 
66  # Zero-center by mean pixel
67  if data_format == 'channels_first':
68  if x.ndim == 3:
69  x[0, :, :] -= mean[0]
70  x[1, :, :] -= mean[1]
71  x[2, :, :] -= mean[2]
72  if std is not None:
73  x[0, :, :] /= std[0]
74  x[1, :, :] /= std[1]
75  x[2, :, :] /= std[2]
76  else:
77  x[:, 0, :, :] -= mean[0]
78  x[:, 1, :, :] -= mean[1]
79  x[:, 2, :, :] -= mean[2]
80  if std is not None:
81  x[:, 0, :, :] /= std[0]
82  x[:, 1, :, :] /= std[1]
83  x[:, 2, :, :] /= std[2]
84  else:
85  x[..., 0] -= mean[0]
86  x[..., 1] -= mean[1]
87  x[..., 2] -= mean[2]
88  if std is not None:
89  x[..., 0] /= std[0]
90  x[..., 1] /= std[1]
91  x[..., 2] /= std[2]
92  return x
93 
94 
95 def _preprocess_symbolic_input(x, data_format, mode):
96  """Preprocesses a tensor encoding a batch of images.
97 
98  # Arguments
99  x: Input tensor, 3D or 4D.
100  data_format: Data format of the image tensor.
101  mode: One of "caffe", "tf" or "torch".
102  - caffe: will convert the images from RGB to BGR,
103  then will zero-center each color channel with
104  respect to the ImageNet dataset,
105  without scaling.
106  - tf: will scale pixels between -1 and 1,
107  sample-wise.
108  - torch: will scale pixels between 0 and 1 and then
109  will normalize each channel with respect to the
110  ImageNet dataset.
111 
112  # Returns
113  Preprocessed tensor.
114  """
115  global _IMAGENET_MEAN
116 
117  if mode == 'tf':
118  x /= 127.5
119  x -= 1.
120  return x
121 
122  if mode == 'torch':
123  x /= 255.
124  mean = [0.485, 0.456, 0.406]
125  std = [0.229, 0.224, 0.225]
126  else:
127  if data_format == 'channels_first':
128  # 'RGB'->'BGR'
129  if K.ndim(x) == 3:
130  x = x[::-1, ...]
131  else:
132  x = x[:, ::-1, ...]
133  else:
134  # 'RGB'->'BGR'
135  x = x[..., ::-1]
136  mean = [103.939, 116.779, 123.68]
137  std = None
138 
139  if _IMAGENET_MEAN is None:
140  _IMAGENET_MEAN = K.constant(-np.array(mean))
141 
142  # Zero-center by mean pixel
143  if K.dtype(x) != K.dtype(_IMAGENET_MEAN):
144  x = K.bias_add(x, K.cast(_IMAGENET_MEAN, K.dtype(x)), data_format)
145  else:
146  x = K.bias_add(x, _IMAGENET_MEAN, data_format)
147  if std is not None:
148  x /= std
149  return x
150 
151 
152 def preprocess_input(x, data_format=None, mode='caffe'):
153  """Preprocesses a tensor or Numpy array encoding a batch of images.
154 
155  # Arguments
156  x: Input Numpy or symbolic tensor, 3D or 4D.
157  The preprocessed data is written over the input data
158  if the data types are compatible. To avoid this
159  behaviour, `numpy.copy(x)` can be used.
160  data_format: Data format of the image tensor/array.
161  mode: One of "caffe", "tf" or "torch".
162  - caffe: will convert the images from RGB to BGR,
163  then will zero-center each color channel with
164  respect to the ImageNet dataset,
165  without scaling.
166  - tf: will scale pixels between -1 and 1,
167  sample-wise.
168  - torch: will scale pixels between 0 and 1 and then
169  will normalize each channel with respect to the
170  ImageNet dataset.
171 
172  # Returns
173  Preprocessed tensor or Numpy array.
174 
175  # Raises
176  ValueError: In case of unknown `data_format` argument.
177  """
178  if data_format is None:
179  data_format = K.image_data_format()
180  if data_format not in {'channels_first', 'channels_last'}:
181  raise ValueError('Unknown data_format ' + str(data_format))
182 
183  if isinstance(x, np.ndarray):
184  return _preprocess_numpy_input(x, data_format=data_format, mode=mode)
185  else:
186  return _preprocess_symbolic_input(x, data_format=data_format,
187  mode=mode)
188 
189 
190 def decode_predictions(preds, top=5):
191  """Decodes the prediction of an ImageNet model.
192 
193  # Arguments
194  preds: Numpy tensor encoding a batch of predictions.
195  top: Integer, how many top-guesses to return.
196 
197  # Returns
198  A list of lists of top class prediction tuples
199  `(class_name, class_description, score)`.
200  One list of tuples per sample in batch input.
201 
202  # Raises
203  ValueError: In case of invalid shape of the `pred` array
204  (must be 2D).
205  """
206  global CLASS_INDEX
207  if len(preds.shape) != 2 or preds.shape[1] != 1000:
208  raise ValueError('`decode_predictions` expects '
209  'a batch of predictions '
210  '(i.e. a 2D array of shape (samples, 1000)). '
211  'Found array with shape: ' + str(preds.shape))
212  if CLASS_INDEX is None:
213  fpath = get_file('imagenet_class_index.json',
214  CLASS_INDEX_PATH,
215  cache_subdir='models',
216  file_hash='c2c37ea517e94d9795004a39431a14cb')
217  with open(fpath) as f:
218  CLASS_INDEX = json.load(f)
219  results = []
220  for pred in preds:
221  top_indices = pred.argsort()[-top:][::-1]
222  result = [tuple(CLASS_INDEX[str(i)]) + (pred[i],) for i in top_indices]
223  result.sort(key=lambda x: x[2], reverse=True)
224  results.append(result)
225  return results
226 
227 
228 def _obtain_input_shape(input_shape,
229  default_size,
230  min_size,
231  data_format,
232  require_flatten,
233  weights=None):
234  """Internal utility to compute/validate a model's input shape.
235 
236  # Arguments
237  input_shape: Either None (will return the default network input shape),
238  or a user-provided shape to be validated.
239  default_size: Default input width/height for the model.
240  min_size: Minimum input width/height accepted by the model.
241  data_format: Image data format to use.
242  require_flatten: Whether the model is expected to
243  be linked to a classifier via a Flatten layer.
244  weights: One of `None` (random initialization)
245  or 'imagenet' (pre-training on ImageNet).
246  If weights='imagenet' input channels must be equal to 3.
247 
248  # Returns
249  An integer shape tuple (may include None entries).
250 
251  # Raises
252  ValueError: In case of invalid argument values.
253  """
254  if weights != 'imagenet' and input_shape and len(input_shape) == 3:
255  if data_format == 'channels_first':
256  if input_shape[0] not in {1, 3}:
257  warnings.warn(
258  'This model usually expects 1 or 3 input channels. '
259  'However, it was passed an input_shape with ' +
260  str(input_shape[0]) + ' input channels.')
261  default_shape = (input_shape[0], default_size, default_size)
262  else:
263  if input_shape[-1] not in {1, 3}:
264  warnings.warn(
265  'This model usually expects 1 or 3 input channels. '
266  'However, it was passed an input_shape with ' +
267  str(input_shape[-1]) + ' input channels.')
268  default_shape = (default_size, default_size, input_shape[-1])
269  else:
270  if data_format == 'channels_first':
271  default_shape = (3, default_size, default_size)
272  else:
273  default_shape = (default_size, default_size, 3)
274  if weights == 'imagenet' and require_flatten:
275  if input_shape is not None:
276  if input_shape != default_shape:
277  raise ValueError('When setting`include_top=True` '
278  'and loading `imagenet` weights, '
279  '`input_shape` should be ' +
280  str(default_shape) + '.')
281  return default_shape
282  if input_shape:
283  if data_format == 'channels_first':
284  if input_shape is not None:
285  if len(input_shape) != 3:
286  raise ValueError(
287  '`input_shape` must be a tuple of three integers.')
288  if input_shape[0] != 3 and weights == 'imagenet':
289  raise ValueError('The input must have 3 channels; got '
290  '`input_shape=' + str(input_shape) + '`')
291  if ((input_shape[1] is not None and input_shape[1] < min_size) or
292  (input_shape[2] is not None and input_shape[2] < min_size)):
293  raise ValueError('Input size must be at least ' +
294  str(min_size) + 'x' + str(min_size) + '; got '
295  '`input_shape=' + str(input_shape) + '`')
296  else:
297  if input_shape is not None:
298  if len(input_shape) != 3:
299  raise ValueError(
300  '`input_shape` must be a tuple of three integers.')
301  if input_shape[-1] != 3 and weights == 'imagenet':
302  raise ValueError('The input must have 3 channels; got '
303  '`input_shape=' + str(input_shape) + '`')
304  if ((input_shape[0] is not None and input_shape[0] < min_size) or
305  (input_shape[1] is not None and input_shape[1] < min_size)):
306  raise ValueError('Input size must be at least ' +
307  str(min_size) + 'x' + str(min_size) + '; got '
308  '`input_shape=' + str(input_shape) + '`')
309  else:
310  if require_flatten:
311  input_shape = default_shape
312  else:
313  if data_format == 'channels_first':
314  input_shape = (3, None, None)
315  else:
316  input_shape = (None, None, 3)
317  if require_flatten:
318  if None in input_shape:
319  raise ValueError('If `include_top` is True, '
320  'you should specify a static `input_shape`. '
321  'Got `input_shape=' + str(input_shape) + '`')
322  return input_shape
int open(const char *, int)
Opens a file descriptor.
def _preprocess_numpy_input(x, data_format, mode)
def _obtain_input_shape(input_shape, default_size, min_size, data_format, require_flatten, weights=None)
def decode_predictions(preds, top=5)
def _preprocess_symbolic_input(x, data_format, mode)
def preprocess_input(x, data_format=None, mode='caffe')
static QCString str