1 """SE MobileNet v1 models for Keras. 4 - [MobileNets: Efficient Convolutional Neural Networks for 5 Mobile Vision Applications](https://arxiv.org/pdf/1704.04861.pdf)) 7 from __future__
import print_function
8 from __future__
import absolute_import
9 from __future__
import division
13 from keras.models
import Model
14 from keras.layers
import Input
15 from keras.layers
import Activation
16 from keras.layers
import Dropout
17 from keras.layers
import Reshape
18 from keras.layers
import BatchNormalization
19 from keras.layers
import GlobalAveragePooling2D
20 from keras.layers
import GlobalMaxPooling2D
21 from keras.layers
import Conv2D
22 from keras
import initializers
23 from keras
import regularizers
24 from keras
import constraints
25 from keras.utils
import conv_utils
26 from keras.utils.data_utils
import get_file
27 from keras.engine.topology
import get_source_inputs
28 from keras.engine
import InputSpec
29 from keras.applications
import imagenet_utils
30 from keras.applications.imagenet_utils
import _obtain_input_shape
31 from keras.applications.imagenet_utils
import decode_predictions
32 from keras
import backend
as K
34 from se
import squeeze_excite_block
38 return K.relu(x, max_value=6)
42 """Preprocesses a numpy array encoding a batch of images. 44 x: a 4D numpy array consists of RGB values within [0, 255]. 52 """Depthwise separable 2D convolution. 53 Depthwise Separable convolutions consists in performing 54 just the first step in a depthwise spatial convolution 55 (which acts on each input channel separately). 56 The `depth_multiplier` argument controls how many 57 output channels are generated per input channel in the depthwise step. 59 kernel_size: An integer or tuple/list of 2 integers, specifying the 60 width and height of the 2D convolution window. 61 Can be a single integer to specify the same value for 62 all spatial dimensions. 63 strides: An integer or tuple/list of 2 integers, 64 specifying the strides of the convolution along the width and height. 65 Can be a single integer to specify the same value for 66 all spatial dimensions. 67 Specifying any stride value != 1 is incompatible with specifying 68 any `dilation_rate` value != 1. 69 padding: one of `'valid'` or `'same'` (case-insensitive). 70 depth_multiplier: The number of depthwise convolution output channels 71 for each input channel. 72 The total number of depthwise convolution output 73 channels will be equal to `filters_in * depth_multiplier`. 74 data_format: A string, 75 one of `channels_last` (default) or `channels_first`. 76 The ordering of the dimensions in the inputs. 77 `channels_last` corresponds to inputs with shape 78 `(batch, height, width, channels)` while `channels_first` 79 corresponds to inputs with shape 80 `(batch, channels, height, width)`. 81 It defaults to the `image_data_format` value found in your 82 Keras config file at `~/.keras/keras.json`. 83 If you never set it, then it will be 'channels_last'. 84 activation: Activation function to use 85 (see [activations](../activations.md)). 86 If you don't specify anything, no activation is applied 87 (ie. 'linear' activation: `a(x) = x`). 88 use_bias: Boolean, whether the layer uses a bias vector. 89 depthwise_initializer: Initializer for the depthwise kernel matrix 90 (see [initializers](../initializers.md)). 91 bias_initializer: Initializer for the bias vector 92 (see [initializers](../initializers.md)). 93 depthwise_regularizer: Regularizer function applied to 94 the depthwise kernel matrix 95 (see [regularizer](../regularizers.md)). 96 bias_regularizer: Regularizer function applied to the bias vector 97 (see [regularizer](../regularizers.md)). 98 activity_regularizer: Regularizer function applied to 99 the output of the layer (its 'activation'). 100 (see [regularizer](../regularizers.md)). 101 depthwise_constraint: Constraint function applied to 102 the depthwise kernel matrix 103 (see [constraints](../constraints.md)). 104 bias_constraint: Constraint function applied to the bias vector 105 (see [constraints](../constraints.md)). 107 4D tensor with shape: 108 `[batch, channels, rows, cols]` if data_format='channels_first' 109 or 4D tensor with shape: 110 `[batch, rows, cols, channels]` if data_format='channels_last'. 112 4D tensor with shape: 113 `[batch, filters, new_rows, new_cols]` if data_format='channels_first' 114 or 4D tensor with shape: 115 `[batch, new_rows, new_cols, filters]` if data_format='channels_last'. 116 `rows` and `cols` values might have changed due to padding. 127 depthwise_initializer=
'glorot_uniform',
128 bias_initializer=
'zeros',
129 depthwise_regularizer=
None,
130 bias_regularizer=
None,
131 activity_regularizer=
None,
132 depthwise_constraint=
None,
133 bias_constraint=
None,
135 super(DepthwiseConv2D, self).
__init__(
137 kernel_size=kernel_size,
140 data_format=data_format,
141 activation=activation,
143 bias_regularizer=bias_regularizer,
144 activity_regularizer=activity_regularizer,
145 bias_constraint=bias_constraint,
154 if len(input_shape) < 4:
155 raise ValueError(
'Inputs to `DepthwiseConv2D` should have rank 4. ' 156 'Received input shape:',
str(input_shape))
161 if input_shape[channel_axis]
is None:
162 raise ValueError(
'The channel dimension of the inputs to ' 164 'should be defined. Found `None`.')
165 input_dim =
int(input_shape[channel_axis])
166 depthwise_kernel_shape = (self.kernel_size[0],
172 shape=depthwise_kernel_shape,
174 name=
'depthwise_kernel',
182 regularizer=self.bias_regularizer,
183 constraint=self.bias_constraint)
187 self.
input_spec = InputSpec(ndim=4, axes={channel_axis: input_dim})
190 def call(self, inputs, training=None):
191 outputs = K.depthwise_conv2d(
194 strides=self.strides,
195 padding=self.padding,
196 dilation_rate=self.dilation_rate,
200 outputs = K.bias_add(
205 if self.activation
is not None:
206 return self.activation(outputs)
212 rows = input_shape[2]
213 cols = input_shape[3]
216 rows = input_shape[1]
217 cols = input_shape[2]
220 rows = conv_utils.conv_output_length(rows, self.kernel_size[0],
223 cols = conv_utils.conv_output_length(cols, self.kernel_size[1],
228 return (input_shape[0], out_filters, rows, cols)
230 return (input_shape[0], rows, cols, out_filters)
233 config = super(DepthwiseConv2D, self).
get_config()
234 config.pop(
'filters')
235 config.pop(
'kernel_initializer')
236 config.pop(
'kernel_regularizer')
237 config.pop(
'kernel_constraint')
254 """Instantiates the SE-MobileNet architecture. 255 Note that only TensorFlow is supported for now, 256 therefore it only works with the data format 257 `image_data_format='channels_last'` in your Keras config 258 at `~/.keras/keras.json`. 259 To load a MobileNet model via `load_model`, import the custom 260 objects `relu6` and `DepthwiseConv2D` and pass them to the 261 `custom_objects` parameter. 263 model = load_model('mobilenet.h5', custom_objects={ 264 'relu6': mobilenet.relu6, 265 'DepthwiseConv2D': mobilenet.DepthwiseConv2D}) 267 input_shape: optional shape tuple, only to be specified 268 if `include_top` is False (otherwise the input shape 269 has to be `(224, 224, 3)` (with `channels_last` data format) 270 or (3, 224, 224) (with `channels_first` data format). 271 It should have exactly 3 inputs channels, 272 and width and height should be no smaller than 32. 273 E.g. `(200, 200, 3)` would be one valid value. 274 alpha: controls the width of the network. 275 - If `alpha` < 1.0, proportionally decreases the number 276 of filters in each layer. 277 - If `alpha` > 1.0, proportionally increases the number 278 of filters in each layer. 279 - If `alpha` = 1, default number of filters from the paper 280 are used at each layer. 281 depth_multiplier: depth multiplier for depthwise convolution 282 (also called the resolution multiplier) 283 dropout: dropout rate 284 include_top: whether to include the fully-connected 285 layer at the top of the network. 286 weights: `None` (random initialization) or 287 `imagenet` (ImageNet weights) 288 input_tensor: optional Keras tensor (i.e. output of 290 to use as image input for the model. 291 pooling: Optional pooling mode for feature extraction 292 when `include_top` is `False`. 293 - `None` means that the output of the model 294 will be the 4D tensor output of the 295 last convolutional layer. 296 - `avg` means that global average pooling 297 will be applied to the output of the 298 last convolutional layer, and thus 299 the output of the model will be a 301 - `max` means that global max pooling will 303 classes: optional number of classes to classify images 304 into, only to be specified if `include_top` is True, and 305 if no `weights` argument is specified. 307 A Keras model instance. 309 ValueError: in case of invalid argument for `weights`, 310 or invalid input shape. 311 RuntimeError: If attempting to run this model with a 312 backend that does not support separable convolutions. 315 if K.backend() !=
'tensorflow':
316 raise RuntimeError(
'Only TensorFlow backend is currently supported, ' 317 'as other backends do not support ' 318 'depthwise convolution.')
320 if weights
not in {
'imagenet',
None}:
321 raise ValueError(
'The `weights` argument should be either ' 322 '`None` (random initialization) or `imagenet` ' 323 '(pre-training on ImageNet).')
325 if weights ==
'imagenet' and include_top
and classes != 1000:
326 raise ValueError(
'If using `weights` as ImageNet with `include_top` ' 327 'as true, `classes` should be 1000')
330 if input_shape
is None:
333 if K.image_data_format() ==
'channels_first':
334 rows = input_shape[1]
335 cols = input_shape[2]
337 rows = input_shape[0]
338 cols = input_shape[1]
340 if rows == cols
and rows
in [128, 160, 192, 224]:
346 default_size=default_size,
348 data_format=K.image_data_format(),
349 require_flatten=include_top,
352 if K.image_data_format() ==
'channels_last':
353 row_axis, col_axis = (0, 1)
355 row_axis, col_axis = (1, 2)
356 rows = input_shape[row_axis]
357 cols = input_shape[col_axis]
359 if input_tensor
is None:
360 img_input = Input(shape=input_shape)
362 if not K.is_keras_tensor(input_tensor):
363 img_input = Input(tensor=input_tensor, shape=input_shape)
365 img_input = input_tensor
367 x =
_conv_block(img_input, 32, alpha, strides=(2, 2))
371 strides=(2, 2), block_id=2)
375 strides=(2, 2), block_id=4)
379 strides=(2, 2), block_id=6)
387 strides=(2, 2), block_id=12)
391 if K.image_data_format() ==
'channels_first':
392 shape = (
int(1024 * alpha), 1, 1)
394 shape = (1, 1,
int(1024 * alpha))
396 x = GlobalAveragePooling2D()(x)
397 x = Reshape(shape, name=
'reshape_n_1')(x)
398 x = Dropout(dropout, name=
'dropout')(x)
399 x = Conv2D(classes, (1, 1),
400 padding=
'same', name=
'conv_preds')(x)
401 x = Activation(
'softmax', name=
'act_softmax')(x)
402 x = Reshape((classes,), name=
'reshape_final')(x)
405 x = GlobalAveragePooling2D()(x)
406 elif pooling ==
'max':
407 x = GlobalMaxPooling2D()(x)
411 if input_tensor
is not None:
412 inputs = get_source_inputs(input_tensor)
417 model = Model(inputs, x, name=
'se_mobilenet_%0.2f_%s' % (alpha, rows))
422 def _conv_block(inputs, filters, alpha, kernel=(3, 3), strides=(1, 1)):
423 """Adds an initial convolution layer (with batch normalization and relu6). 425 inputs: Input tensor of shape `(rows, cols, 3)` 426 (with `channels_last` data format) or 427 (3, rows, cols) (with `channels_first` data format). 428 It should have exactly 3 inputs channels, 429 and width and height should be no smaller than 32. 430 E.g. `(224, 224, 3)` would be one valid value. 431 filters: Integer, the dimensionality of the output space 432 (i.e. the number output of filters in the convolution). 433 alpha: controls the width of the network. 434 - If `alpha` < 1.0, proportionally decreases the number 435 of filters in each layer. 436 - If `alpha` > 1.0, proportionally increases the number 437 of filters in each layer. 438 - If `alpha` = 1, default number of filters from the paper 439 are used at each layer. 440 kernel: An integer or tuple/list of 2 integers, specifying the 441 width and height of the 2D convolution window. 442 Can be a single integer to specify the same value for 443 all spatial dimensions. 444 strides: An integer or tuple/list of 2 integers, 445 specifying the strides of the convolution along the width and height. 446 Can be a single integer to specify the same value for 447 all spatial dimensions. 448 Specifying any stride value != 1 is incompatible with specifying 449 any `dilation_rate` value != 1. 451 4D tensor with shape: 452 `(samples, channels, rows, cols)` if data_format='channels_first' 453 or 4D tensor with shape: 454 `(samples, rows, cols, channels)` if data_format='channels_last'. 456 4D tensor with shape: 457 `(samples, filters, new_rows, new_cols)` if data_format='channels_first' 458 or 4D tensor with shape: 459 `(samples, new_rows, new_cols, filters)` if data_format='channels_last'. 460 `rows` and `cols` values might have changed due to stride. 462 Output tensor of block. 464 channel_axis = 1
if K.image_data_format() ==
'channels_first' else -1
465 filters =
int(filters * alpha)
466 x = Conv2D(filters, kernel,
470 name=
'conv1')(inputs)
471 x = BatchNormalization(axis=channel_axis, name=
'conv1_bn')(x)
472 return Activation(relu6, name=
'conv1_relu')(x)
476 depth_multiplier=1, strides=(1, 1), block_id=1):
477 """Adds a depthwise convolution block. 478 A depthwise convolution block consists of a depthwise conv, 479 batch normalization, relu6, pointwise convolution, 480 batch normalization and relu6 activation. 482 inputs: Input tensor of shape `(rows, cols, channels)` 483 (with `channels_last` data format) or 484 (channels, rows, cols) (with `channels_first` data format). 485 pointwise_conv_filters: Integer, the dimensionality of the output space 486 (i.e. the number output of filters in the pointwise convolution). 487 alpha: controls the width of the network. 488 - If `alpha` < 1.0, proportionally decreases the number 489 of filters in each layer. 490 - If `alpha` > 1.0, proportionally increases the number 491 of filters in each layer. 492 - If `alpha` = 1, default number of filters from the paper 493 are used at each layer. 494 depth_multiplier: The number of depthwise convolution output channels 495 for each input channel. 496 The total number of depthwise convolution output 497 channels will be equal to `filters_in * depth_multiplier`. 498 strides: An integer or tuple/list of 2 integers, 499 specifying the strides of the convolution along the width and height. 500 Can be a single integer to specify the same value for 501 all spatial dimensions. 502 Specifying any stride value != 1 is incompatible with specifying 503 any `dilation_rate` value != 1. 504 block_id: Integer, a unique identification designating the block number. 506 4D tensor with shape: 507 `(batch, channels, rows, cols)` if data_format='channels_first' 508 or 4D tensor with shape: 509 `(batch, rows, cols, channels)` if data_format='channels_last'. 511 4D tensor with shape: 512 `(batch, filters, new_rows, new_cols)` if data_format='channels_first' 513 or 4D tensor with shape: 514 `(batch, new_rows, new_cols, filters)` if data_format='channels_last'. 515 `rows` and `cols` values might have changed due to stride. 517 Output tensor of block. 519 channel_axis = 1
if K.image_data_format() ==
'channels_first' else -1
520 pointwise_conv_filters =
int(pointwise_conv_filters * alpha)
524 depth_multiplier=depth_multiplier,
527 name=
'conv_dw_%d' % block_id)(inputs)
528 x = BatchNormalization(axis=channel_axis, name=
'conv_dw_%d_bn' % block_id)(x)
529 x = Activation(relu6, name=
'conv_dw_%d_relu' % block_id)(x)
531 x = Conv2D(pointwise_conv_filters, (1, 1),
535 name=
'conv_pw_%d' % block_id)(x)
536 x = BatchNormalization(axis=channel_axis, name=
'conv_pw_%d_bn' % block_id)(x)
537 x = Activation(relu6, name=
'conv_pw_%d_relu' % block_id)(x)
def SEMobileNet(input_shape=None, alpha=1.0, depth_multiplier=1, dropout=1e-3, include_top=True, weights=None, input_tensor=None, pooling=None, classes=1000)
def call(self, inputs, training=None)
def _depthwise_conv_block(inputs, pointwise_conv_filters, alpha, depth_multiplier=1, strides=(1, 1), block_id=1)
def squeeze_excite_block(input, ratio=16)
def _obtain_input_shape(input_shape, default_size, min_size, data_format, require_flatten, weights=None)
def __init__(self, kernel_size, strides=(1, 1), padding='valid', depth_multiplier=1, data_format=None, activation=None, use_bias=True, depthwise_initializer='glorot_uniform', bias_initializer='zeros', depthwise_regularizer=None, bias_regularizer=None, activity_regularizer=None, depthwise_constraint=None, bias_constraint=None, kwargs)
def compute_output_shape(self, input_shape)
def preprocess_input(x, data_format=None, mode='caffe')
def _conv_block(inputs, filters, alpha, kernel=(3, 3), strides=(1, 1))
def build(self, input_shape)