CIFAR-10の画像分類 (Apache MXNet)
はじめに:
先程、Apache MXNetを用いたサンプルを出したので、
続けて、
CIFAR-10の画像分類/学習
に関して:
import mxnet as mx
from mxnet import nd, autograd, gluon
from mxnet.gluon import nn
from mxnet.gluon.data.vision import datasets, transforms
# Select GPU if available
ctx = mx.gpu() if mx.context.num_gpus() > 0 else mx.cpu()
# Load the data
cifar_train = datasets.CIFAR10(train=True)
cifar_test = datasets.CIFAR10(train=False)
# Define the transforms
transform = transforms.Compose([
transforms.Resize(32),
transforms.ToTensor(),
transforms.Normalize([0.4914, 0.4822, 0.4465], [0.2023, 0.1994, 0.2010])
])
# Apply the transforms
train_data = cifar_train.transform_first(transform)
test_data = cifar_test.transform_first(transform)
# Create DataLoader
batch_size = 128
train_loader = gluon.data.DataLoader(train_data, batch_size, shuffle=True)
test_loader = gluon.data.DataLoader(test_data, batch_size, shuffle=False)
# Define the network
net = nn.Sequential()
net.add(nn.Conv2D(channels=32, kernel_size=3, padding=1, activation='relu'))
net.add(nn.BatchNorm())
net.add(nn.Conv2D(channels=32, kernel_size=3, padding=1, activation='relu'))
net.add(nn.BatchNorm())
net.add(nn.MaxPool2D(pool_size=2, strides=2))
net.add(nn.Dropout(0.3))
net.add(nn.Conv2D(channels=64, kernel_size=3, padding=1, activation='relu'))
net.add(nn.BatchNorm())
net.add(nn.Conv2D(channels=64, kernel_size=3, padding=1, activation='relu'))
net.add(nn.BatchNorm())
net.add(nn.MaxPool2D(pool_size=2, strides=2))
net.add(nn.Dropout(0.5))
net.add(nn.Conv2D(channels=128, kernel_size=3, padding=1, activation='relu'))
net.add(nn.BatchNorm())
net.add(nn.Conv2D(channels=128, kernel_size=3, padding=1, activation='relu'))
net.add(nn.BatchNorm())
net.add(nn.MaxPool2D(pool_size=2, strides=2))
net.add(nn.Dropout(0.5))
net.add(nn.Flatten())
net.add(nn.Dense(128, activation='relu'))
net.add(nn.Dropout(0.5))
net.add(nn.Dense(10))
# Initialize the network
net.initialize(mx.init.Xavier(), ctx=ctx)
# Define the loss function and the trainer
loss_function = gluon.loss.SoftmaxCrossEntropyLoss()
trainer = gluon.Trainer(net.collect_params(), 'adam', {'learning_rate': 0.001})
# Training loop
epochs = 50
for epoch in range(epochs):
train_loss = 0.0
for data, label in train_loader:
data = data.as_in_context(ctx)
label = label.as_in_context(ctx)
with autograd.record():
output = net(data)
loss = loss_function(output, label)
loss.backward()
trainer.step(batch_size)
train_loss += nd.sum(loss).asscalar()
print(f"Epoch {epoch + 1}, Loss: {train_loss / len(train_data)}")
出力:
Epoch 1, Loss: 1.5866488148498534
Epoch 2, Loss: 1.3753878662109376
Epoch 3, Loss: 1.220582713470459
Epoch 4, Loss: 1.1067728715515137
Epoch 5, Loss: 1.0157390971374511
Epoch 6, Loss: 0.9450569129943848
Epoch 7, Loss: 0.8908607377624512
Epoch 8, Loss: 0.836358444442749
Epoch 9, Loss: 0.7913408838653564
Epoch 10, Loss: 0.7615124284362793
Epoch 11, Loss: 0.7251366371917725
Epoch 12, Loss: 0.7106648611450195
Epoch 13, Loss: 0.6845232231903077
Epoch 14, Loss: 0.6593256127166748
Epoch 15, Loss: 0.6442832369232178
Epoch 16, Loss: 0.6172565077972412
Epoch 17, Loss: 0.5994904010772705
Epoch 18, Loss: 0.5869589791870117
Epoch 19, Loss: 0.5688837683105469
Epoch 20, Loss: 0.5571189955902099
Epoch 21, Loss: 0.5475915368652344
Epoch 22, Loss: 0.5308880572509765
Epoch 23, Loss: 0.5215773885345459
Epoch 24, Loss: 0.5108826650238038
Epoch 25, Loss: 0.5001120454406738
Epoch 26, Loss: 0.49407899467468264
Epoch 27, Loss: 0.4850203528594971
Epoch 28, Loss: 0.4785364520263672
Epoch 29, Loss: 0.47730470283508303
Epoch 30, Loss: 0.45588242527008055
Epoch 31, Loss: 0.45513302780151366
Epoch 32, Loss: 0.4487535217285156
Epoch 33, Loss: 0.4404265927886963
Epoch 34, Loss: 0.4384251286315918
Epoch 35, Loss: 0.42635419883728026
Epoch 36, Loss: 0.4267300245666504
Epoch 37, Loss: 0.41773626392364505
Epoch 38, Loss: 0.4159108306121826
Epoch 39, Loss: 0.41291526851654053
Epoch 40, Loss: 0.40703370624542234
Epoch 41, Loss: 0.39807949363708495
Epoch 42, Loss: 0.39894261508941653
Epoch 43, Loss: 0.39554192695617674
Epoch 44, Loss: 0.3856152230834961
Epoch 45, Loss: 0.38401595668792726
Epoch 46, Loss: 0.3803113649749756
Epoch 47, Loss: 0.37449110466003416
Epoch 48, Loss: 0.36861710830688477
Epoch 49, Loss: 0.3622062783432007
Epoch 50, Loss: 0.3613038217163086
永続化(ファイルに保存):
メモリの永続化
net.save_parameters("model.params")
因みに、
$ print(net)
で、
モデルの再利用:
from mxnet import nd
from mxnet.gluon import nn
from mxnet.gluon import Block
def create_network():
net = nn.Sequential()
with net.name_scope():
net.add(nn.Conv2D(channels=32, kernel_size=3, padding=1, activation='relu'))
net.add(nn.BatchNorm())
net.add(nn.Conv2D(channels=32, kernel_size=3, padding=1, activation='relu'))
net.add(nn.BatchNorm())
net.add(nn.MaxPool2D(pool_size=2, strides=2))
net.add(nn.Dropout(0.3))
net.add(nn.Conv2D(channels=64, kernel_size=3, padding=1, activation='relu'))
net.add(nn.BatchNorm())
net.add(nn.Conv2D(channels=64, kernel_size=3, padding=1, activation='relu'))
net.add(nn.BatchNorm())
net.add(nn.MaxPool2D(pool_size=2, strides=2))
net.add(nn.Dropout(0.5))
net.add(nn.Conv2D(channels=128, kernel_size=3, padding=1, activation='relu'))
net.add(nn.BatchNorm())
net.add(nn.Conv2D(channels=128, kernel_size=3, padding=1, activation='relu'))
net.add(nn.BatchNorm())
net.add(nn.MaxPool2D(pool_size=2, strides=2))
net.add(nn.Dropout(0.5))
net.add(nn.Flatten())
net.add(nn.Dense(128, activation="relu"))
net.add(nn.Dropout(0.5))
net.add(nn.Dense(10))
return net
net = create_network()
net.load_parameters("model.params")
(面倒なのでcreate_networkという関数にしてます)
from mxnet import autograd, nd
import mxnet as mx
from mxnet.gluon.data.vision import datasets, transforms
transform_test = transforms.Compose([
transforms.Resize(32),
transforms.ToTensor(),
transforms.Normalize([0.4914, 0.4822, 0.4465], [0.2023, 0.1994, 0.2010])
])
test_data = datasets.CIFAR10(train=False).transform_first(transform_test)
test_loader = mx.gluon.data.DataLoader(test_data, batch_size=128, shuffle=False)
def evaluate_accuracy(data_iterator, net, ctx):
acc = mx.metric.Accuracy()
for data, label in data_iterator:
data = data.as_in_context(ctx)
label = label.as_in_context(ctx)
output = net(data)
predictions = nd.argmax(output, axis=1)
acc.update(preds=predictions, labels=label)
return acc.get()[1]
# evaluation of the models
ctx = mx.cpu() # mx.gpu(0) if GPUs are available
net = create_network()
net.load_parameters("model.params", ctx=ctx)
test_accuracy = evaluate_accuracy(test_loader, net, ctx)
print(f"Test accuracy: {test_accuracy:.4f}")
結果:
Test accuracy: 0.8628
更に、
永続化した学習データを元に、様々なファイルの分類
を行います。(別セッションで行うことを前提にしてるので、悠長ですが)
import mxnet as mx
from mxnet import image, nd
from mxnet.gluon.data.vision import transforms
from mxnet.gluon import nn
from mxnet.gluon import Block
def create_network():
net = nn.Sequential()
with net.name_scope():
net.add(nn.Conv2D(channels=32, kernel_size=3, padding=1, activation='relu'))
net.add(nn.BatchNorm())
net.add(nn.Conv2D(channels=32, kernel_size=3, padding=1, activation='relu'))
net.add(nn.BatchNorm())
net.add(nn.MaxPool2D(pool_size=2, strides=2))
net.add(nn.Dropout(0.3))
net.add(nn.Conv2D(channels=64, kernel_size=3, padding=1, activation='relu'))
net.add(nn.BatchNorm())
net.add(nn.Conv2D(channels=64, kernel_size=3, padding=1, activation='relu'))
net.add(nn.BatchNorm())
net.add(nn.MaxPool2D(pool_size=2, strides=2))
net.add(nn.Dropout(0.5))
net.add(nn.Conv2D(channels=128, kernel_size=3, padding=1, activation='relu'))
net.add(nn.BatchNorm())
net.add(nn.Conv2D(channels=128, kernel_size=3, padding=1, activation='relu'))
net.add(nn.BatchNorm())
net.add(nn.MaxPool2D(pool_size=2, strides=2))
net.add(nn.Dropout(0.5))
net.add(nn.Flatten())
net.add(nn.Dense(128, activation="relu"))
net.add(nn.Dropout(0.5))
net.add(nn.Dense(10))
return net
ctx = mx.cpu() # mx.gpu(0) if GPUs are available
net = create_network()
net.load_parameters("model.params", ctx=ctx)
transform_fn = transforms.Compose([transforms.Resize(32),transforms.CenterCrop(32),transforms.ToTensor(),transforms.Normalize([0.4914, 0.4822, 0.4465], [0.2023, 0.1994, 0.2010])])
class_labels = ['airplane', 'automobile', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck']
print("Predicted class:", class_labels[int(nd.argmax(net(transform_fn(image.imread("/file/to/bird.jpg")).expand_dims(axis=0).as_in_context(ctx)), axis=1).asscalar())])
結果:
Predicted class: bird
(最後の行の「/file/to/bird.jpg」のみを書き換えてください)
Apache MXNetの情報が日本語ではあまりないようにも見えるし、まあこういう感じでまとめておきます。
Appendix:
表紙画像は、タイのLong Neck Karenの女の子(もう、中学生くらいにはなってるんだろうか・・)
さらにおまけ(笑)