se_mobilenet.py
Go to the documentation of this file.
1 """SE MobileNet v1 models for Keras.
2 
3 # Reference
4 - [MobileNets: Efficient Convolutional Neural Networks for
5  Mobile Vision Applications](https://arxiv.org/pdf/1704.04861.pdf))
6 """
7 from __future__ import print_function
8 from __future__ import absolute_import
9 from __future__ import division
10 
11 import warnings
12 
13 from keras.models import Model
14 from keras.layers import Input
15 from keras.layers import Activation
16 from keras.layers import Dropout
17 from keras.layers import Reshape
18 from keras.layers import BatchNormalization
19 from keras.layers import GlobalAveragePooling2D
20 from keras.layers import GlobalMaxPooling2D
21 from keras.layers import Conv2D
22 from keras import initializers
23 from keras import regularizers
24 from keras import constraints
25 from keras.utils import conv_utils
26 from keras.utils.data_utils import get_file
27 from keras.engine.topology import get_source_inputs
28 from keras.engine import InputSpec
29 from keras.applications import imagenet_utils
30 from keras.applications.imagenet_utils import _obtain_input_shape
31 from keras.applications.imagenet_utils import decode_predictions
32 from keras import backend as K
33 
34 from se import squeeze_excite_block
35 
36 
37 def relu6(x):
38  return K.relu(x, max_value=6)
39 
40 
42  """Preprocesses a numpy array encoding a batch of images.
43  # Arguments
44  x: a 4D numpy array consists of RGB values within [0, 255].
45  # Returns
46  Preprocessed array.
47  """
48  return imagenet_utils.preprocess_input(x, mode='tf')
49 
50 
51 class DepthwiseConv2D(Conv2D):
52  """Depthwise separable 2D convolution.
53  Depthwise Separable convolutions consists in performing
54  just the first step in a depthwise spatial convolution
55  (which acts on each input channel separately).
56  The `depth_multiplier` argument controls how many
57  output channels are generated per input channel in the depthwise step.
58  # Arguments
59  kernel_size: An integer or tuple/list of 2 integers, specifying the
60  width and height of the 2D convolution window.
61  Can be a single integer to specify the same value for
62  all spatial dimensions.
63  strides: An integer or tuple/list of 2 integers,
64  specifying the strides of the convolution along the width and height.
65  Can be a single integer to specify the same value for
66  all spatial dimensions.
67  Specifying any stride value != 1 is incompatible with specifying
68  any `dilation_rate` value != 1.
69  padding: one of `'valid'` or `'same'` (case-insensitive).
70  depth_multiplier: The number of depthwise convolution output channels
71  for each input channel.
72  The total number of depthwise convolution output
73  channels will be equal to `filters_in * depth_multiplier`.
74  data_format: A string,
75  one of `channels_last` (default) or `channels_first`.
76  The ordering of the dimensions in the inputs.
77  `channels_last` corresponds to inputs with shape
78  `(batch, height, width, channels)` while `channels_first`
79  corresponds to inputs with shape
80  `(batch, channels, height, width)`.
81  It defaults to the `image_data_format` value found in your
82  Keras config file at `~/.keras/keras.json`.
83  If you never set it, then it will be 'channels_last'.
84  activation: Activation function to use
85  (see [activations](../activations.md)).
86  If you don't specify anything, no activation is applied
87  (ie. 'linear' activation: `a(x) = x`).
88  use_bias: Boolean, whether the layer uses a bias vector.
89  depthwise_initializer: Initializer for the depthwise kernel matrix
90  (see [initializers](../initializers.md)).
91  bias_initializer: Initializer for the bias vector
92  (see [initializers](../initializers.md)).
93  depthwise_regularizer: Regularizer function applied to
94  the depthwise kernel matrix
95  (see [regularizer](../regularizers.md)).
96  bias_regularizer: Regularizer function applied to the bias vector
97  (see [regularizer](../regularizers.md)).
98  activity_regularizer: Regularizer function applied to
99  the output of the layer (its 'activation').
100  (see [regularizer](../regularizers.md)).
101  depthwise_constraint: Constraint function applied to
102  the depthwise kernel matrix
103  (see [constraints](../constraints.md)).
104  bias_constraint: Constraint function applied to the bias vector
105  (see [constraints](../constraints.md)).
106  # Input shape
107  4D tensor with shape:
108  `[batch, channels, rows, cols]` if data_format='channels_first'
109  or 4D tensor with shape:
110  `[batch, rows, cols, channels]` if data_format='channels_last'.
111  # Output shape
112  4D tensor with shape:
113  `[batch, filters, new_rows, new_cols]` if data_format='channels_first'
114  or 4D tensor with shape:
115  `[batch, new_rows, new_cols, filters]` if data_format='channels_last'.
116  `rows` and `cols` values might have changed due to padding.
117  """
118 
119  def __init__(self,
120  kernel_size,
121  strides=(1, 1),
122  padding='valid',
123  depth_multiplier=1,
124  data_format=None,
125  activation=None,
126  use_bias=True,
127  depthwise_initializer='glorot_uniform',
128  bias_initializer='zeros',
129  depthwise_regularizer=None,
130  bias_regularizer=None,
131  activity_regularizer=None,
132  depthwise_constraint=None,
133  bias_constraint=None,
134  **kwargs):
135  super(DepthwiseConv2D, self).__init__(
136  filters=None,
137  kernel_size=kernel_size,
138  strides=strides,
139  padding=padding,
140  data_format=data_format,
141  activation=activation,
142  use_bias=use_bias,
143  bias_regularizer=bias_regularizer,
144  activity_regularizer=activity_regularizer,
145  bias_constraint=bias_constraint,
146  **kwargs)
147  self.depth_multiplier = depth_multiplier
148  self.depthwise_initializer = initializers.get(depthwise_initializer)
149  self.depthwise_regularizer = regularizers.get(depthwise_regularizer)
150  self.depthwise_constraint = constraints.get(depthwise_constraint)
151  self.bias_initializer = initializers.get(bias_initializer)
152 
153  def build(self, input_shape):
154  if len(input_shape) < 4:
155  raise ValueError('Inputs to `DepthwiseConv2D` should have rank 4. '
156  'Received input shape:', str(input_shape))
157  if self.data_format == 'channels_first':
158  channel_axis = 1
159  else:
160  channel_axis = 3
161  if input_shape[channel_axis] is None:
162  raise ValueError('The channel dimension of the inputs to '
163  '`DepthwiseConv2D` '
164  'should be defined. Found `None`.')
165  input_dim = int(input_shape[channel_axis])
166  depthwise_kernel_shape = (self.kernel_size[0],
167  self.kernel_size[1],
168  input_dim,
169  self.depth_multiplier)
170 
171  self.depthwise_kernel = self.add_weight(
172  shape=depthwise_kernel_shape,
173  initializer=self.depthwise_initializer,
174  name='depthwise_kernel',
175  regularizer=self.depthwise_regularizer,
176  constraint=self.depthwise_constraint)
177 
178  if self.use_bias:
179  self.bias = self.add_weight(shape=(input_dim * self.depth_multiplier,),
180  initializer=self.bias_initializer,
181  name='bias',
182  regularizer=self.bias_regularizer,
183  constraint=self.bias_constraint)
184  else:
185  self.bias = None
186  # Set input spec.
187  self.input_spec = InputSpec(ndim=4, axes={channel_axis: input_dim})
188  self.built = True
189 
190  def call(self, inputs, training=None):
191  outputs = K.depthwise_conv2d(
192  inputs,
193  self.depthwise_kernel,
194  strides=self.strides,
195  padding=self.padding,
196  dilation_rate=self.dilation_rate,
197  data_format=self.data_format)
198 
199  if self.bias:
200  outputs = K.bias_add(
201  outputs,
202  self.bias,
203  data_format=self.data_format)
204 
205  if self.activation is not None:
206  return self.activation(outputs)
207 
208  return outputs
209 
210  def compute_output_shape(self, input_shape):
211  if self.data_format == 'channels_first':
212  rows = input_shape[2]
213  cols = input_shape[3]
214  out_filters = input_shape[1] * self.depth_multiplier
215  elif self.data_format == 'channels_last':
216  rows = input_shape[1]
217  cols = input_shape[2]
218  out_filters = input_shape[3] * self.depth_multiplier
219 
220  rows = conv_utils.conv_output_length(rows, self.kernel_size[0],
221  self.padding,
222  self.strides[0])
223  cols = conv_utils.conv_output_length(cols, self.kernel_size[1],
224  self.padding,
225  self.strides[1])
226 
227  if self.data_format == 'channels_first':
228  return (input_shape[0], out_filters, rows, cols)
229  elif self.data_format == 'channels_last':
230  return (input_shape[0], rows, cols, out_filters)
231 
232  def get_config(self):
233  config = super(DepthwiseConv2D, self).get_config()
234  config.pop('filters')
235  config.pop('kernel_initializer')
236  config.pop('kernel_regularizer')
237  config.pop('kernel_constraint')
238  config['depth_multiplier'] = self.depth_multiplier
239  config['depthwise_initializer'] = initializers.serialize(self.depthwise_initializer)
240  config['depthwise_regularizer'] = regularizers.serialize(self.depthwise_regularizer)
241  config['depthwise_constraint'] = constraints.serialize(self.depthwise_constraint)
242  return config
243 
244 
245 def SEMobileNet(input_shape=None,
246  alpha=1.0,
247  depth_multiplier=1,
248  dropout=1e-3,
249  include_top=True,
250  weights=None,
251  input_tensor=None,
252  pooling=None,
253  classes=1000):
254  """Instantiates the SE-MobileNet architecture.
255  Note that only TensorFlow is supported for now,
256  therefore it only works with the data format
257  `image_data_format='channels_last'` in your Keras config
258  at `~/.keras/keras.json`.
259  To load a MobileNet model via `load_model`, import the custom
260  objects `relu6` and `DepthwiseConv2D` and pass them to the
261  `custom_objects` parameter.
262  E.g.
263  model = load_model('mobilenet.h5', custom_objects={
264  'relu6': mobilenet.relu6,
265  'DepthwiseConv2D': mobilenet.DepthwiseConv2D})
266  # Arguments
267  input_shape: optional shape tuple, only to be specified
268  if `include_top` is False (otherwise the input shape
269  has to be `(224, 224, 3)` (with `channels_last` data format)
270  or (3, 224, 224) (with `channels_first` data format).
271  It should have exactly 3 inputs channels,
272  and width and height should be no smaller than 32.
273  E.g. `(200, 200, 3)` would be one valid value.
274  alpha: controls the width of the network.
275  - If `alpha` < 1.0, proportionally decreases the number
276  of filters in each layer.
277  - If `alpha` > 1.0, proportionally increases the number
278  of filters in each layer.
279  - If `alpha` = 1, default number of filters from the paper
280  are used at each layer.
281  depth_multiplier: depth multiplier for depthwise convolution
282  (also called the resolution multiplier)
283  dropout: dropout rate
284  include_top: whether to include the fully-connected
285  layer at the top of the network.
286  weights: `None` (random initialization) or
287  `imagenet` (ImageNet weights)
288  input_tensor: optional Keras tensor (i.e. output of
289  `layers.Input()`)
290  to use as image input for the model.
291  pooling: Optional pooling mode for feature extraction
292  when `include_top` is `False`.
293  - `None` means that the output of the model
294  will be the 4D tensor output of the
295  last convolutional layer.
296  - `avg` means that global average pooling
297  will be applied to the output of the
298  last convolutional layer, and thus
299  the output of the model will be a
300  2D tensor.
301  - `max` means that global max pooling will
302  be applied.
303  classes: optional number of classes to classify images
304  into, only to be specified if `include_top` is True, and
305  if no `weights` argument is specified.
306  # Returns
307  A Keras model instance.
308  # Raises
309  ValueError: in case of invalid argument for `weights`,
310  or invalid input shape.
311  RuntimeError: If attempting to run this model with a
312  backend that does not support separable convolutions.
313  """
314 
315  if K.backend() != 'tensorflow':
316  raise RuntimeError('Only TensorFlow backend is currently supported, '
317  'as other backends do not support '
318  'depthwise convolution.')
319 
320  if weights not in {'imagenet', None}:
321  raise ValueError('The `weights` argument should be either '
322  '`None` (random initialization) or `imagenet` '
323  '(pre-training on ImageNet).')
324 
325  if weights == 'imagenet' and include_top and classes != 1000:
326  raise ValueError('If using `weights` as ImageNet with `include_top` '
327  'as true, `classes` should be 1000')
328 
329  # Determine proper input shape and default size.
330  if input_shape is None:
331  default_size = 224
332  else:
333  if K.image_data_format() == 'channels_first':
334  rows = input_shape[1]
335  cols = input_shape[2]
336  else:
337  rows = input_shape[0]
338  cols = input_shape[1]
339 
340  if rows == cols and rows in [128, 160, 192, 224]:
341  default_size = rows
342  else:
343  default_size = 224
344 
345  input_shape = _obtain_input_shape(input_shape,
346  default_size=default_size,
347  min_size=32,
348  data_format=K.image_data_format(),
349  require_flatten=include_top,
350  weights=weights)
351 
352  if K.image_data_format() == 'channels_last':
353  row_axis, col_axis = (0, 1)
354  else:
355  row_axis, col_axis = (1, 2)
356  rows = input_shape[row_axis]
357  cols = input_shape[col_axis]
358 
359  if input_tensor is None:
360  img_input = Input(shape=input_shape)
361  else:
362  if not K.is_keras_tensor(input_tensor):
363  img_input = Input(tensor=input_tensor, shape=input_shape)
364  else:
365  img_input = input_tensor
366 
367  x = _conv_block(img_input, 32, alpha, strides=(2, 2))
368  x = _depthwise_conv_block(x, 64, alpha, depth_multiplier, block_id=1)
369 
370  x = _depthwise_conv_block(x, 128, alpha, depth_multiplier,
371  strides=(2, 2), block_id=2)
372  x = _depthwise_conv_block(x, 128, alpha, depth_multiplier, block_id=3)
373 
374  x = _depthwise_conv_block(x, 256, alpha, depth_multiplier,
375  strides=(2, 2), block_id=4)
376  x = _depthwise_conv_block(x, 256, alpha, depth_multiplier, block_id=5)
377 
378  x = _depthwise_conv_block(x, 512, alpha, depth_multiplier,
379  strides=(2, 2), block_id=6)
380  x = _depthwise_conv_block(x, 512, alpha, depth_multiplier, block_id=7)
381  x = _depthwise_conv_block(x, 512, alpha, depth_multiplier, block_id=8)
382  x = _depthwise_conv_block(x, 512, alpha, depth_multiplier, block_id=9)
383  x = _depthwise_conv_block(x, 512, alpha, depth_multiplier, block_id=10)
384  x = _depthwise_conv_block(x, 512, alpha, depth_multiplier, block_id=11)
385 
386  x = _depthwise_conv_block(x, 1024, alpha, depth_multiplier,
387  strides=(2, 2), block_id=12)
388  x = _depthwise_conv_block(x, 1024, alpha, depth_multiplier, block_id=13)
389 
390  if include_top:
391  if K.image_data_format() == 'channels_first':
392  shape = (int(1024 * alpha), 1, 1)
393  else:
394  shape = (1, 1, int(1024 * alpha))
395 
396  x = GlobalAveragePooling2D()(x)
397  x = Reshape(shape, name='reshape_n_1')(x)
398  x = Dropout(dropout, name='dropout')(x)
399  x = Conv2D(classes, (1, 1),
400  padding='same', name='conv_preds')(x)
401  x = Activation('softmax', name='act_softmax')(x)
402  x = Reshape((classes,), name='reshape_final')(x)
403  else:
404  if pooling == 'avg':
405  x = GlobalAveragePooling2D()(x)
406  elif pooling == 'max':
407  x = GlobalMaxPooling2D()(x)
408 
409  # Ensure that the model takes into account
410  # any potential predecessors of `input_tensor`.
411  if input_tensor is not None:
412  inputs = get_source_inputs(input_tensor)
413  else:
414  inputs = img_input
415 
416  # Create model.
417  model = Model(inputs, x, name='se_mobilenet_%0.2f_%s' % (alpha, rows))
418 
419  return model
420 
421 
422 def _conv_block(inputs, filters, alpha, kernel=(3, 3), strides=(1, 1)):
423  """Adds an initial convolution layer (with batch normalization and relu6).
424  # Arguments
425  inputs: Input tensor of shape `(rows, cols, 3)`
426  (with `channels_last` data format) or
427  (3, rows, cols) (with `channels_first` data format).
428  It should have exactly 3 inputs channels,
429  and width and height should be no smaller than 32.
430  E.g. `(224, 224, 3)` would be one valid value.
431  filters: Integer, the dimensionality of the output space
432  (i.e. the number output of filters in the convolution).
433  alpha: controls the width of the network.
434  - If `alpha` < 1.0, proportionally decreases the number
435  of filters in each layer.
436  - If `alpha` > 1.0, proportionally increases the number
437  of filters in each layer.
438  - If `alpha` = 1, default number of filters from the paper
439  are used at each layer.
440  kernel: An integer or tuple/list of 2 integers, specifying the
441  width and height of the 2D convolution window.
442  Can be a single integer to specify the same value for
443  all spatial dimensions.
444  strides: An integer or tuple/list of 2 integers,
445  specifying the strides of the convolution along the width and height.
446  Can be a single integer to specify the same value for
447  all spatial dimensions.
448  Specifying any stride value != 1 is incompatible with specifying
449  any `dilation_rate` value != 1.
450  # Input shape
451  4D tensor with shape:
452  `(samples, channels, rows, cols)` if data_format='channels_first'
453  or 4D tensor with shape:
454  `(samples, rows, cols, channels)` if data_format='channels_last'.
455  # Output shape
456  4D tensor with shape:
457  `(samples, filters, new_rows, new_cols)` if data_format='channels_first'
458  or 4D tensor with shape:
459  `(samples, new_rows, new_cols, filters)` if data_format='channels_last'.
460  `rows` and `cols` values might have changed due to stride.
461  # Returns
462  Output tensor of block.
463  """
464  channel_axis = 1 if K.image_data_format() == 'channels_first' else -1
465  filters = int(filters * alpha)
466  x = Conv2D(filters, kernel,
467  padding='same',
468  use_bias=False,
469  strides=strides,
470  name='conv1')(inputs)
471  x = BatchNormalization(axis=channel_axis, name='conv1_bn')(x)
472  return Activation(relu6, name='conv1_relu')(x)
473 
474 
475 def _depthwise_conv_block(inputs, pointwise_conv_filters, alpha,
476  depth_multiplier=1, strides=(1, 1), block_id=1):
477  """Adds a depthwise convolution block.
478  A depthwise convolution block consists of a depthwise conv,
479  batch normalization, relu6, pointwise convolution,
480  batch normalization and relu6 activation.
481  # Arguments
482  inputs: Input tensor of shape `(rows, cols, channels)`
483  (with `channels_last` data format) or
484  (channels, rows, cols) (with `channels_first` data format).
485  pointwise_conv_filters: Integer, the dimensionality of the output space
486  (i.e. the number output of filters in the pointwise convolution).
487  alpha: controls the width of the network.
488  - If `alpha` < 1.0, proportionally decreases the number
489  of filters in each layer.
490  - If `alpha` > 1.0, proportionally increases the number
491  of filters in each layer.
492  - If `alpha` = 1, default number of filters from the paper
493  are used at each layer.
494  depth_multiplier: The number of depthwise convolution output channels
495  for each input channel.
496  The total number of depthwise convolution output
497  channels will be equal to `filters_in * depth_multiplier`.
498  strides: An integer or tuple/list of 2 integers,
499  specifying the strides of the convolution along the width and height.
500  Can be a single integer to specify the same value for
501  all spatial dimensions.
502  Specifying any stride value != 1 is incompatible with specifying
503  any `dilation_rate` value != 1.
504  block_id: Integer, a unique identification designating the block number.
505  # Input shape
506  4D tensor with shape:
507  `(batch, channels, rows, cols)` if data_format='channels_first'
508  or 4D tensor with shape:
509  `(batch, rows, cols, channels)` if data_format='channels_last'.
510  # Output shape
511  4D tensor with shape:
512  `(batch, filters, new_rows, new_cols)` if data_format='channels_first'
513  or 4D tensor with shape:
514  `(batch, new_rows, new_cols, filters)` if data_format='channels_last'.
515  `rows` and `cols` values might have changed due to stride.
516  # Returns
517  Output tensor of block.
518  """
519  channel_axis = 1 if K.image_data_format() == 'channels_first' else -1
520  pointwise_conv_filters = int(pointwise_conv_filters * alpha)
521 
522  x = DepthwiseConv2D((3, 3),
523  padding='same',
524  depth_multiplier=depth_multiplier,
525  strides=strides,
526  use_bias=False,
527  name='conv_dw_%d' % block_id)(inputs)
528  x = BatchNormalization(axis=channel_axis, name='conv_dw_%d_bn' % block_id)(x)
529  x = Activation(relu6, name='conv_dw_%d_relu' % block_id)(x)
530 
531  x = Conv2D(pointwise_conv_filters, (1, 1),
532  padding='same',
533  use_bias=False,
534  strides=(1, 1),
535  name='conv_pw_%d' % block_id)(x)
536  x = BatchNormalization(axis=channel_axis, name='conv_pw_%d_bn' % block_id)(x)
537  x = Activation(relu6, name='conv_pw_%d_relu' % block_id)(x)
538 
539  # squeeze and excite block
540  x = squeeze_excite_block(x)
541  return x
def SEMobileNet(input_shape=None, alpha=1.0, depth_multiplier=1, dropout=1e-3, include_top=True, weights=None, input_tensor=None, pooling=None, classes=1000)
def call(self, inputs, training=None)
def _depthwise_conv_block(inputs, pointwise_conv_filters, alpha, depth_multiplier=1, strides=(1, 1), block_id=1)
def squeeze_excite_block(input, ratio=16)
Definition: se.py:5
def relu6(x)
Definition: se_mobilenet.py:37
def _obtain_input_shape(input_shape, default_size, min_size, data_format, require_flatten, weights=None)
def __init__(self, kernel_size, strides=(1, 1), padding='valid', depth_multiplier=1, data_format=None, activation=None, use_bias=True, depthwise_initializer='glorot_uniform', bias_initializer='zeros', depthwise_regularizer=None, bias_regularizer=None, activity_regularizer=None, depthwise_constraint=None, bias_constraint=None, kwargs)
def preprocess_input(x)
Definition: se_mobilenet.py:41
def compute_output_shape(self, input_shape)
def preprocess_input(x, data_format=None, mode='caffe')
def _conv_block(inputs, filters, alpha, kernel=(3, 3), strides=(1, 1))
static QCString str
def build(self, input_shape)