Ching-Chuan Chen's Blogger

Statistics, Machine Learning and Programming

0%

Swish Beta Function In Keras

Google Brain had published a paper: “Swish : A Self-Gated Activation Function” (Arxiv link).

In this blogger, I use Keras API of customizing layer to fulfill the Swish Beta function mentioned in paper.

Here is the source code: GitHub.

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
import keras
from keras import backend as K
from keras.datasets import mnist
from keras.layers import Dense, Dropout, Activation
from keras.layers import Conv2D, MaxPooling2D, GlobalAveragePooling1D
from keras.layers import BatchNormalization
from keras.layers import initializers, InputSpec
from keras.models import Sequential
from keras.engine.topology import Layer

class SwishBeta(Layer):
def __init__(self, trainable_beta = False, beta_initializer = 'ones', **kwargs):
super(SwishBeta, self).__init__(**kwargs)
self.supports_masking = True
self.trainable = trainable_beta
self.beta_initializer = initializers.get(beta_initializer)

def build(self, input_shape):
self.beta = self.add_weight(shape=[1], name='beta',
initializer=self.beta_initializer)
self.input_spec = InputSpec(ndim=len(input_shape))
self.built = True

def call(self, inputs):
return inputs * K.sigmoid(self.beta * inputs)

def get_config(self):
config = {'trainable_beta': self.trainable_beta,
'beta_initializer': initializers.serialize(self.beta_initializer)}
base_config = super(SwishBeta, self).get_config()
return dict(list(base_config.items()) + list(config.items()))

num_classes = 10
img_rows, img_cols = 28, 28

(x_train, y_train), (x_test, y_test) = mnist.load_data()

x_train = x_train.reshape(x_train.shape[0], img_rows, img_cols, 1)
x_test = x_test.reshape(x_test.shape[0], img_rows, img_cols, 1)
input_shape = (img_rows, img_cols, 1)

x_train = x_train.astype('float32')
x_test = x_test.astype('float32')
x_train /= 255.
x_test /= 255.

y_train = keras.utils.to_categorical(y_train, num_classes)
y_test = keras.utils.to_categorical(y_test, num_classes)

model = Sequential()
model.add(Conv2D(64, kernel_size=(3, 3), padding = 'same',
kernel_initializer = 'he_uniform', input_shape=input_shape))
model.add(BatchNormalization())
model.add(SwishBeta(True))
model.add(Conv2D(128, (3, 3), padding = 'same',
kernel_initializer = 'he_uniform'))
model.add(BatchNormalization())
model.add(SwishBeta(True))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Conv2D(256, (3, 3), padding = 'same',
kernel_initializer = 'he_uniform'))
model.add(BatchNormalization())
model.add(SwishBeta(True))
model.add(Conv2D(256, (3, 3), padding = 'same',
kernel_initializer = 'he_uniform'))
model.add(BatchNormalization())
model.add(SwishBeta(True))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Conv2D(512, (3, 3), padding = 'same',
kernel_initializer = 'he_uniform'))
model.add(BatchNormalization())
model.add(SwishBeta(True))
model.add(Conv2D(512, (3, 3), padding = 'same',
kernel_initializer = 'he_uniform'))
model.add(BatchNormalization())
model.add(SwishBeta(True))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(GlobalAveragePooling2D())
model.add(SwishBeta(True))
model.add(Dense(num_classes, activation='softmax'))

model.compile(loss=keras.losses.categorical_crossentropy,
optimizer=keras.optimizers.Adam(),
metrics=['accuracy'])

history = model.fit(x_train, y_train,
batch_size = 128,
epochs = 500,
verbose = 1,
callbacks = [keras.callbacks.EarlyStopping(patience=7)],
validation_data=(x_test, y_test))
score = model.evaluate(x_test, y_test, verbose=0)
print('Test loss:', score[0])
print('Test accuracy:', score[1]) # 99.66%