1 '''ResNeXt models for Keras. 3 - [Aggregated Residual Transformations for Deep Neural Networks](https://arxiv.org/pdf/1611.05431.pdf)) 5 from __future__
import print_function
6 from __future__
import absolute_import
7 from __future__
import division
11 from keras.models
import Model
12 from keras.layers.core
import Dense, Lambda
13 from keras.layers.core
import Activation
14 from keras.layers.convolutional
import Conv2D
15 from keras.layers.pooling
import GlobalAveragePooling2D, GlobalMaxPooling2D, MaxPooling2D
16 from keras.layers
import Input
17 from keras.layers.merge
import concatenate, add
18 from keras.layers.normalization
import BatchNormalization
19 from keras.regularizers
import l2
20 from keras.utils.layer_utils
import convert_all_kernels_in_model
21 from keras.utils.data_utils
import get_file
22 from keras.engine.topology
import get_source_inputs
23 from keras.applications.imagenet_utils
import _obtain_input_shape
24 import keras.backend
as K
26 CIFAR_TH_WEIGHTS_PATH =
'' 27 CIFAR_TF_WEIGHTS_PATH =
'' 28 CIFAR_TH_WEIGHTS_PATH_NO_TOP =
'' 29 CIFAR_TF_WEIGHTS_PATH_NO_TOP =
'' 31 IMAGENET_TH_WEIGHTS_PATH =
'' 32 IMAGENET_TF_WEIGHTS_PATH =
'' 33 IMAGENET_TH_WEIGHTS_PATH_NO_TOP =
'' 34 IMAGENET_TF_WEIGHTS_PATH_NO_TOP =
'' 37 def ResNext(input_shape=None, depth=29, cardinality=8, width=64, weight_decay=5e-4,
38 include_top=
True, weights=
None, input_tensor=
None,
39 pooling=
None, classes=10):
40 """Instantiate the ResNeXt architecture. Note that , 41 when using TensorFlow for best performance you should set 42 `image_data_format="channels_last"` in your Keras config 43 at ~/.keras/keras.json. 44 The model are compatible with both 45 TensorFlow and Theano. The dimension ordering 46 convention used by the model is the one 47 specified in your Keras config file. 49 depth: number or layers in the ResNeXt model. Can be an 50 integer or a list of integers. 51 cardinality: the size of the set of transformations 52 width: multiplier to the ResNeXt width (number of filters) 53 weight_decay: weight decay (l2 norm) 54 include_top: whether to include the fully-connected 55 layer at the top of the network. 56 weights: `None` (random initialization) 57 input_tensor: optional Keras tensor (i.e. output of `layers.Input()`) 58 to use as image input for the model. 59 input_shape: optional shape tuple, only to be specified 60 if `include_top` is False (otherwise the input shape 61 has to be `(32, 32, 3)` (with `tf` dim ordering) 62 or `(3, 32, 32)` (with `th` dim ordering). 63 It should have exactly 3 inputs channels, 64 and width and height should be no smaller than 8. 65 E.g. `(200, 200, 3)` would be one valid value. 66 pooling: Optional pooling mode for feature extraction 67 when `include_top` is `False`. 68 - `None` means that the output of the model will be 69 the 4D tensor output of the 70 last convolutional layer. 71 - `avg` means that global average pooling 72 will be applied to the output of the 73 last convolutional layer, and thus 74 the output of the model will be a 2D tensor. 75 - `max` means that global max pooling will 77 classes: optional number of classes to classify images 78 into, only to be specified if `include_top` is True, and 79 if no `weights` argument is specified. 81 A Keras model instance. 84 if weights
not in {
'cifar10',
None}:
85 raise ValueError(
'The `weights` argument should be either ' 86 '`None` (random initialization) or `cifar10` ' 87 '(pre-training on CIFAR-10).')
89 if weights ==
'cifar10' and include_top
and classes != 10:
90 raise ValueError(
'If using `weights` as CIFAR 10 with `include_top`' 91 ' as true, `classes` should be 10')
93 if type(depth) == int:
94 if (depth - 2) % 9 != 0:
95 raise ValueError(
'Depth of the network must be such that (depth - 2)' 96 'should be divisible by 9.')
102 data_format=K.image_data_format(),
103 require_flatten=include_top)
105 if input_tensor
is None:
106 img_input = Input(shape=input_shape)
108 if not K.is_keras_tensor(input_tensor):
109 img_input = Input(tensor=input_tensor, shape=input_shape)
111 img_input = input_tensor
113 x =
__create_res_next(classes, img_input, include_top, depth, cardinality, width,
114 weight_decay, pooling)
118 if input_tensor
is not None:
119 inputs = get_source_inputs(input_tensor)
123 model = Model(inputs, x, name=
'resnext')
126 if weights ==
'cifar10':
127 if (depth == 29)
and (cardinality == 8)
and (width == 64):
130 if K.image_data_format() ==
'channels_first':
132 weights_path = get_file(
'resnext_cifar_10_8_64_th_dim_ordering_th_kernels.h5',
133 CIFAR_TH_WEIGHTS_PATH,
134 cache_subdir=
'models')
136 weights_path = get_file(
'resnext_cifar_10_8_64_th_dim_ordering_th_kernels_no_top.h5',
137 CIFAR_TH_WEIGHTS_PATH_NO_TOP,
138 cache_subdir=
'models')
140 model.load_weights(weights_path)
142 if K.backend() ==
'tensorflow':
143 warnings.warn(
'You are using the TensorFlow backend, yet you ' 144 'are using the Theano ' 145 'image dimension ordering convention ' 146 '(`image_dim_ordering="th"`). ' 147 'For best performance, set ' 148 '`image_dim_ordering="tf"` in ' 150 'at ~/.keras/keras.json.')
151 convert_all_kernels_in_model(model)
154 weights_path = get_file(
'resnext_cifar_10_8_64_tf_dim_ordering_tf_kernels.h5',
155 CIFAR_TF_WEIGHTS_PATH,
156 cache_subdir=
'models')
158 weights_path = get_file(
'resnext_cifar_10_8_64_tf_dim_ordering_tf_kernels_no_top.h5',
159 CIFAR_TF_WEIGHTS_PATH_NO_TOP,
160 cache_subdir=
'models')
162 model.load_weights(weights_path)
164 if K.backend() ==
'theano':
165 convert_all_kernels_in_model(model)
170 def ResNextImageNet(input_shape=None, depth=[3, 4, 6, 3], cardinality=32, width=4, weight_decay=5e-4,
171 include_top=
True, weights=
None, input_tensor=
None,
172 pooling=
None, classes=1000):
173 """ Instantiate the ResNeXt architecture for the ImageNet dataset. Note that , 174 when using TensorFlow for best performance you should set 175 `image_data_format="channels_last"` in your Keras config 176 at ~/.keras/keras.json. 177 The model are compatible with both 178 TensorFlow and Theano. The dimension ordering 179 convention used by the model is the one 180 specified in your Keras config file. 182 depth: number or layers in the each block, defined as a list. 183 ResNeXt-50 can be defined as [3, 4, 6, 3]. 184 ResNeXt-101 can be defined as [3, 4, 23, 3]. 185 Defaults is ResNeXt-50. 186 cardinality: the size of the set of transformations 187 width: multiplier to the ResNeXt width (number of filters) 188 weight_decay: weight decay (l2 norm) 189 include_top: whether to include the fully-connected 190 layer at the top of the network. 191 weights: `None` (random initialization) or `imagenet` (trained 193 input_tensor: optional Keras tensor (i.e. output of `layers.Input()`) 194 to use as image input for the model. 195 input_shape: optional shape tuple, only to be specified 196 if `include_top` is False (otherwise the input shape 197 has to be `(224, 224, 3)` (with `tf` dim ordering) 198 or `(3, 224, 224)` (with `th` dim ordering). 199 It should have exactly 3 inputs channels, 200 and width and height should be no smaller than 8. 201 E.g. `(200, 200, 3)` would be one valid value. 202 pooling: Optional pooling mode for feature extraction 203 when `include_top` is `False`. 204 - `None` means that the output of the model will be 205 the 4D tensor output of the 206 last convolutional layer. 207 - `avg` means that global average pooling 208 will be applied to the output of the 209 last convolutional layer, and thus 210 the output of the model will be a 2D tensor. 211 - `max` means that global max pooling will 213 classes: optional number of classes to classify images 214 into, only to be specified if `include_top` is True, and 215 if no `weights` argument is specified. 217 A Keras model instance. 220 if weights
not in {
'imagenet',
None}:
221 raise ValueError(
'The `weights` argument should be either ' 222 '`None` (random initialization) or `imagenet` ' 223 '(pre-training on ImageNet).')
225 if weights ==
'imagenet' and include_top
and classes != 1000:
226 raise ValueError(
'If using `weights` as imagenet with `include_top`' 227 ' as true, `classes` should be 1000')
229 if type(depth) == int
and (depth - 2) % 9 != 0:
230 raise ValueError(
'Depth of the network must be such that (depth - 2)' 231 'should be divisible by 9.')
236 data_format=K.image_data_format(),
237 require_flatten=include_top)
239 if input_tensor
is None:
240 img_input = Input(shape=input_shape)
242 if not K.is_keras_tensor(input_tensor):
243 img_input = Input(tensor=input_tensor, shape=input_shape)
245 img_input = input_tensor
248 weight_decay, pooling)
252 if input_tensor
is not None:
253 inputs = get_source_inputs(input_tensor)
257 model = Model(inputs, x, name=
'resnext')
260 if weights ==
'imagenet':
261 if (depth == [3, 4, 6, 3])
and (cardinality == 32)
and (width == 4):
264 if K.image_data_format() ==
'channels_first':
266 weights_path = get_file(
'resnext_imagenet_32_4_th_dim_ordering_th_kernels.h5',
267 IMAGENET_TH_WEIGHTS_PATH,
268 cache_subdir=
'models')
270 weights_path = get_file(
'resnext_imagenet_32_4_th_dim_ordering_th_kernels_no_top.h5',
271 IMAGENET_TH_WEIGHTS_PATH_NO_TOP,
272 cache_subdir=
'models')
274 model.load_weights(weights_path)
276 if K.backend() ==
'tensorflow':
277 warnings.warn(
'You are using the TensorFlow backend, yet you ' 278 'are using the Theano ' 279 'image dimension ordering convention ' 280 '(`image_dim_ordering="th"`). ' 281 'For best performance, set ' 282 '`image_dim_ordering="tf"` in ' 284 'at ~/.keras/keras.json.')
285 convert_all_kernels_in_model(model)
288 weights_path = get_file(
'resnext_imagenet_32_4_tf_dim_ordering_tf_kernels.h5',
289 IMAGENET_TF_WEIGHTS_PATH,
290 cache_subdir=
'models')
292 weights_path = get_file(
'resnext_imagenet_32_4_tf_dim_ordering_tf_kernels_no_top.h5',
293 IMAGENET_TF_WEIGHTS_PATH_NO_TOP,
294 cache_subdir=
'models')
296 model.load_weights(weights_path)
298 if K.backend() ==
'theano':
299 convert_all_kernels_in_model(model)
305 ''' Adds an initial convolution block, with batch normalization and relu activation 308 weight_decay: weight decay factor 309 Returns: a keras tensor 311 channel_axis = 1
if K.image_data_format() ==
'channels_first' else -1
313 x = Conv2D(64, (3, 3), padding=
'same', use_bias=
False, kernel_initializer=
'he_normal',
314 kernel_regularizer=l2(weight_decay))(input)
315 x = BatchNormalization(axis=channel_axis)(x)
316 x = Activation(
'relu')(x)
322 ''' Adds an initial conv block, with batch norm and relu for the inception resnext 325 weight_decay: weight decay factor 326 Returns: a keras tensor 328 channel_axis = 1
if K.image_data_format() ==
'channels_first' else -1
330 x = Conv2D(64, (7, 7), padding=
'same', use_bias=
False, kernel_initializer=
'he_normal',
331 kernel_regularizer=l2(weight_decay), strides=(2, 2))(input)
332 x = BatchNormalization(axis=channel_axis)(x)
333 x = Activation(
'relu')(x)
335 x = MaxPooling2D((3, 3), strides=(2, 2), padding=
'same')(x)
341 ''' Adds a grouped convolution block. It is an equivalent block from the paper 344 grouped_channels: grouped number of filters 345 cardinality: cardinality factor describing the number of groups 346 strides: performs strided convolution for downscaling if > 1 347 weight_decay: weight decay term 348 Returns: a keras tensor 351 channel_axis = 1
if K.image_data_format() ==
'channels_first' else -1
357 x = Conv2D(grouped_channels, (3, 3), padding=
'same', use_bias=
False, strides=(strides, strides),
358 kernel_initializer=
'he_normal', kernel_regularizer=l2(weight_decay))(init)
359 x = BatchNormalization(axis=channel_axis)(x)
360 x = Activation(
'relu')(x)
363 for c
in range(cardinality):
364 x = Lambda(
lambda z: z[:, :, :, c * grouped_channels:(c + 1) * grouped_channels]
365 if K.image_data_format() ==
'channels_last' else 366 lambda z: z[:, c * grouped_channels:(c + 1) * grouped_channels, :, :])(input)
368 x = Conv2D(grouped_channels, (3, 3), padding=
'same', use_bias=
False, strides=(strides, strides),
369 kernel_initializer=
'he_normal', kernel_regularizer=l2(weight_decay))(x)
373 group_merge =
concatenate(group_list, axis=channel_axis)
374 x = BatchNormalization(axis=channel_axis)(group_merge)
375 x = Activation(
'relu')(x)
381 ''' Adds a bottleneck block 384 filters: number of output filters 385 cardinality: cardinality factor described number of 387 strides: performs strided convolution for downsampling if > 1 388 weight_decay: weight decay factor 389 Returns: a keras tensor 393 grouped_channels =
int(filters / cardinality)
394 channel_axis = 1
if K.image_data_format() ==
'channels_first' else -1
397 if K.image_data_format() ==
'channels_first':
398 if init._keras_shape[1] != 2 * filters:
399 init = Conv2D(filters * 2, (1, 1), padding=
'same', strides=(strides, strides),
400 use_bias=
False, kernel_initializer=
'he_normal', kernel_regularizer=l2(weight_decay))(init)
401 init = BatchNormalization(axis=channel_axis)(init)
403 if init._keras_shape[-1] != 2 * filters:
404 init = Conv2D(filters * 2, (1, 1), padding=
'same', strides=(strides, strides),
405 use_bias=
False, kernel_initializer=
'he_normal', kernel_regularizer=l2(weight_decay))(init)
406 init = BatchNormalization(axis=channel_axis)(init)
408 x = Conv2D(filters, (1, 1), padding=
'same', use_bias=
False,
409 kernel_initializer=
'he_normal', kernel_regularizer=l2(weight_decay))(input)
410 x = BatchNormalization(axis=channel_axis)(x)
411 x = Activation(
'relu')(x)
415 x = Conv2D(filters * 2, (1, 1), padding=
'same', use_bias=
False, kernel_initializer=
'he_normal',
416 kernel_regularizer=l2(weight_decay))(x)
417 x = BatchNormalization(axis=channel_axis)(x)
420 x = Activation(
'relu')(x)
425 def __create_res_next(nb_classes, img_input, include_top, depth=29, cardinality=8, width=4,
426 weight_decay=5e-4, pooling=
None):
427 ''' Creates a ResNeXt model with specified parameters 429 nb_classes: Number of output classes 430 img_input: Input tensor or layer 431 include_top: Flag to include the last dense layer 432 depth: Depth of the network. Can be an positive integer or a list 433 Compute N = (n - 2) / 9. 434 For a depth of 56, n = 56, N = (56 - 2) / 9 = 6 435 For a depth of 101, n = 101, N = (101 - 2) / 9 = 11 436 cardinality: the size of the set of transformations. 437 Increasing cardinality improves classification accuracy, 438 width: Width of the network. 439 weight_decay: weight_decay (l2 norm) 440 pooling: Optional pooling mode for feature extraction 441 when `include_top` is `False`. 442 - `None` means that the output of the model will be 443 the 4D tensor output of the 444 last convolutional layer. 445 - `avg` means that global average pooling 446 will be applied to the output of the 447 last convolutional layer, and thus 448 the output of the model will be a 2D tensor. 449 - `max` means that global max pooling will 451 Returns: a Keras Model 454 if type(depth)
is list
or type(depth)
is tuple:
459 N = [(depth - 2) // 9
for _
in range(3)]
461 filters = cardinality * width
464 for i
in range(len(N)):
465 filters_list.append(filters)
471 for i
in range(N[0]):
472 x =
__bottleneck_block(x, filters_list[0], cardinality, strides=1, weight_decay=weight_decay)
475 filters_list = filters_list[1:]
482 weight_decay=weight_decay)
485 weight_decay=weight_decay)
488 x = GlobalAveragePooling2D()(x)
489 x = Dense(nb_classes, use_bias=
False, kernel_regularizer=l2(weight_decay),
490 kernel_initializer=
'he_normal', activation=
'softmax')(x)
493 x = GlobalAveragePooling2D()(x)
494 elif pooling ==
'max':
495 x = GlobalMaxPooling2D()(x)
501 weight_decay=5e-4, pooling=
None):
502 ''' Creates a ResNeXt model with specified parameters 504 nb_classes: Number of output classes 505 img_input: Input tensor or layer 506 include_top: Flag to include the last dense layer 507 depth: Depth of the network. List of integers. 508 Increasing cardinality improves classification accuracy, 509 width: Width of the network. 510 weight_decay: weight_decay (l2 norm) 511 pooling: Optional pooling mode for feature extraction 512 when `include_top` is `False`. 513 - `None` means that the output of the model will be 514 the 4D tensor output of the 515 last convolutional layer. 516 - `avg` means that global average pooling 517 will be applied to the output of the 518 last convolutional layer, and thus 519 the output of the model will be a 2D tensor. 520 - `max` means that global max pooling will 522 Returns: a Keras Model 525 if type(depth)
is list
or type(depth)
is tuple:
530 N = [(depth - 2) // 9
for _
in range(3)]
532 filters = cardinality * width
535 for i
in range(len(N)):
536 filters_list.append(filters)
542 for i
in range(N[0]):
543 x =
__bottleneck_block(x, filters_list[0], cardinality, strides=1, weight_decay=weight_decay)
546 filters_list = filters_list[1:]
553 weight_decay=weight_decay)
556 weight_decay=weight_decay)
559 x = GlobalAveragePooling2D()(x)
560 x = Dense(nb_classes, use_bias=
False, kernel_regularizer=l2(weight_decay),
561 kernel_initializer=
'he_normal', activation=
'softmax')(x)
564 x = GlobalAveragePooling2D()(x)
565 elif pooling ==
'max':
566 x = GlobalMaxPooling2D()(x)
571 if __name__ == '__main__': 572 model = ResNext((32, 32, 3), depth=29, cardinality=8, width=64) def __initial_conv_block(input, weight_decay=5e-4)
def ResNextImageNet(input_shape=None, depth=[3, cardinality=32, width=4, weight_decay=5e-4, include_top=True, weights=None, input_tensor=None, pooling=None, classes=1000)
def __create_res_next_imagenet(nb_classes, img_input, include_top, depth, cardinality=32, width=4, weight_decay=5e-4, pooling=None)
Coord add(Coord c1, Coord c2)
std::string concatenate(H const &h, T const &...t)
auto enumerate(Iterables &&...iterables)
Range-for loop helper tracking the number of iteration.
def _obtain_input_shape(input_shape, default_size, min_size, data_format, require_flatten, weights=None)
def __bottleneck_block(input, filters=64, cardinality=8, strides=1, weight_decay=5e-4)
def __grouped_convolution_block(input, grouped_channels, cardinality, strides, weight_decay=5e-4)
def __create_res_next(nb_classes, img_input, include_top, depth=29, cardinality=8, width=4, weight_decay=5e-4, pooling=None)
def ResNext(input_shape=None, depth=29, cardinality=8, width=64, weight_decay=5e-4, include_top=True, weights=None, input_tensor=None, pooling=None, classes=10)
def __initial_conv_block_imagenet(input, weight_decay=5e-4)