|
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374 |
- # -*- coding: utf-8 -*-
- # MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
- #
- # Copyright (c) 2014-2020 Megvii Inc. All rights reserved.
- #
- # Unless required by applicable law or agreed to in writing,
- # software distributed under the License is distributed on an
- # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- import multiprocessing as mp
-
- import numpy as np
- import pytest
-
- import megengine as mge
- import megengine.distributed as dist
- from megengine.core import tensor
- from megengine.module import BatchNorm1d, BatchNorm2d, SyncBatchNorm
- from megengine.test import assertTensorClose
-
-
- @pytest.mark.isolated_distributed
- def test_syncbn():
- nr_chan = 8
- data_shape = (3, nr_chan, 4, 16)
- momentum = 0.9
- eps = 1e-5
- running_mean = np.zeros((1, nr_chan, 1, 1), dtype=np.float32)
- running_var = np.ones((1, nr_chan, 1, 1), dtype=np.float32)
- steps = 4
-
- def worker(rank, data, yv_expect, running_mean, running_var):
- if not mge.is_cuda_available():
- return
- dist.init_process_group("localhost", 2333, 4, rank, rank)
- bn = SyncBatchNorm(nr_chan, momentum=momentum, eps=eps)
- data_tensor = tensor()
- for i in range(steps):
- data_tensor.set_value(data[i])
- yv = bn(data_tensor)
-
- assertTensorClose(yv_expect, yv.numpy(), max_err=5e-6)
- assertTensorClose(running_mean, bn.running_mean.numpy(), max_err=5e-6)
- assertTensorClose(running_var, bn.running_var.numpy(), max_err=5e-6)
-
- xv = []
- for i in range(steps):
- xv.append(np.random.normal(loc=2.3, size=data_shape).astype(np.float32))
- xv_transposed = np.transpose(xv[i], [0, 2, 3, 1]).reshape(
- (data_shape[0] * data_shape[2] * data_shape[3], nr_chan)
- )
-
- mean = np.mean(xv_transposed, axis=0).reshape(1, nr_chan, 1, 1)
-
- var_biased = np.var(xv_transposed, axis=0).reshape((1, nr_chan, 1, 1))
- sd = np.sqrt(var_biased + eps)
-
- var_unbiased = np.var(xv_transposed, axis=0, ddof=1).reshape((1, nr_chan, 1, 1))
- running_mean = running_mean * momentum + mean * (1 - momentum)
- running_var = running_var * momentum + var_unbiased * (1 - momentum)
-
- yv_expect = (xv[i] - mean) / sd
-
- data = []
- for i in range(4):
- data.append([])
- for j in range(steps):
- data[i].append(xv[j][:, :, :, i * 4 : i * 4 + 4])
-
- procs = []
- for rank in range(4):
- p = mp.Process(
- target=worker,
- args=(
- rank,
- data[rank],
- yv_expect[:, :, :, rank * 4 : rank * 4 + 4],
- running_mean,
- running_var,
- ),
- )
- p.start()
- procs.append(p)
-
- for p in procs:
- p.join()
- assert p.exitcode == 0
-
-
- def test_batchnorm():
- nr_chan = 8
- data_shape = (3, nr_chan, 4)
- momentum = 0.9
- bn = BatchNorm1d(nr_chan, momentum=momentum)
- running_mean = np.zeros((1, nr_chan, 1), dtype=np.float32)
- running_var = np.ones((1, nr_chan, 1), dtype=np.float32)
- data = tensor()
- for i in range(3):
- xv = np.random.normal(loc=2.3, size=data_shape).astype(np.float32)
- mean = np.mean(np.mean(xv, axis=0, keepdims=True), axis=2, keepdims=True)
- xv_transposed = np.transpose(xv, [0, 2, 1]).reshape(
- (data_shape[0] * data_shape[2], nr_chan)
- )
-
- var_biased = np.var(xv_transposed, axis=0).reshape((1, nr_chan, 1))
- sd = np.sqrt(var_biased + bn.eps)
-
- var_unbiased = np.var(xv_transposed, axis=0, ddof=1).reshape((1, nr_chan, 1))
- running_mean = running_mean * momentum + mean * (1 - momentum)
- running_var = running_var * momentum + var_unbiased * (1 - momentum)
-
- data.set_value(xv)
- yv = bn(data)
- yv_expect = (xv - mean) / sd
-
- assertTensorClose(yv_expect, yv.numpy(), max_err=5e-6)
- assertTensorClose(
- running_mean.reshape(-1), bn.running_mean.numpy().reshape(-1), max_err=5e-6
- )
- assertTensorClose(
- running_var.reshape(-1), bn.running_var.numpy().reshape(-1), max_err=5e-6
- )
-
- # test set 'training' flag to False
- mean_backup = bn.running_mean.numpy()
- var_backup = bn.running_var.numpy()
- bn.training = False
- xv = np.random.normal(loc=2.3, size=data_shape).astype(np.float32)
- data.set_value(xv)
- yv1 = bn(data)
- yv2 = bn(data)
- assertTensorClose(yv1.numpy(), yv2.numpy(), max_err=0)
- assertTensorClose(mean_backup, bn.running_mean.numpy(), max_err=0)
- assertTensorClose(var_backup, bn.running_var.numpy(), max_err=0)
- yv_expect = (xv - running_mean) / np.sqrt(running_var + bn.eps)
- assertTensorClose(yv_expect, yv1.numpy(), max_err=5e-6)
-
-
- def test_syncbn1d():
- nr_chan = 8
- data_shape = (3, nr_chan, 4)
- momentum = 0.9
- bn = SyncBatchNorm(nr_chan, momentum=momentum)
- running_mean = np.zeros((1, nr_chan, 1), dtype=np.float32)
- running_var = np.ones((1, nr_chan, 1), dtype=np.float32)
- data = tensor()
- for i in range(3):
- xv = np.random.normal(loc=2.3, size=data_shape).astype(np.float32)
- mean = np.mean(np.mean(xv, axis=0, keepdims=True), axis=2, keepdims=True)
- xv_transposed = np.transpose(xv, [0, 2, 1]).reshape(
- (data_shape[0] * data_shape[2], nr_chan)
- )
-
- var_biased = np.var(xv_transposed, axis=0).reshape((1, nr_chan, 1))
- sd = np.sqrt(var_biased + bn.eps)
-
- var_unbiased = np.var(xv_transposed, axis=0, ddof=1).reshape((1, nr_chan, 1))
- running_mean = running_mean * momentum + mean * (1 - momentum)
- running_var = running_var * momentum + var_unbiased * (1 - momentum)
-
- data.set_value(xv)
- yv = bn(data)
- yv_expect = (xv - mean) / sd
-
- assertTensorClose(yv_expect, yv.numpy(), max_err=5e-6)
- assertTensorClose(
- running_mean.reshape(-1), bn.running_mean.numpy().reshape(-1), max_err=5e-6
- )
- assertTensorClose(
- running_var.reshape(-1), bn.running_var.numpy().reshape(-1), max_err=5e-6
- )
-
- # test set 'training' flag to False
- mean_backup = bn.running_mean.numpy()
- var_backup = bn.running_var.numpy()
- bn.training = False
- xv = np.random.normal(loc=2.3, size=data_shape).astype(np.float32)
- data.set_value(xv)
- yv1 = bn(data)
- yv2 = bn(data)
- assertTensorClose(yv1.numpy(), yv2.numpy(), max_err=0)
- assertTensorClose(mean_backup, bn.running_mean.numpy(), max_err=0)
- assertTensorClose(var_backup, bn.running_var.numpy(), max_err=0)
- yv_expect = (xv - running_mean) / np.sqrt(running_var + bn.eps)
- assertTensorClose(yv_expect, yv1.numpy(), max_err=5e-6)
-
-
- def test_batchnorm2d():
- nr_chan = 8
- data_shape = (3, nr_chan, 16, 16)
- momentum = 0.9
- bn = BatchNorm2d(nr_chan, momentum=momentum)
- running_mean = np.zeros((1, nr_chan, 1, 1), dtype=np.float32)
- running_var = np.ones((1, nr_chan, 1, 1), dtype=np.float32)
- data = tensor()
- for i in range(3):
- xv = np.random.normal(loc=2.3, size=data_shape).astype(np.float32)
- xv_transposed = np.transpose(xv, [0, 2, 3, 1]).reshape(
- (data_shape[0] * data_shape[2] * data_shape[3], nr_chan)
- )
-
- mean = np.mean(xv_transposed, axis=0).reshape(1, nr_chan, 1, 1)
-
- var_biased = np.var(xv_transposed, axis=0).reshape((1, nr_chan, 1, 1))
- sd = np.sqrt(var_biased + bn.eps)
-
- var_unbiased = np.var(xv_transposed, axis=0, ddof=1).reshape((1, nr_chan, 1, 1))
- running_mean = running_mean * momentum + mean * (1 - momentum)
- running_var = running_var * momentum + var_unbiased * (1 - momentum)
-
- data.set_value(xv)
- yv = bn(data)
- yv_expect = (xv - mean) / sd
-
- assertTensorClose(yv_expect, yv.numpy(), max_err=5e-6)
- assertTensorClose(running_mean, bn.running_mean.numpy(), max_err=5e-6)
- assertTensorClose(running_var, bn.running_var.numpy(), max_err=5e-6)
-
- # test set 'training' flag to False
- mean_backup = bn.running_mean.numpy()
- var_backup = bn.running_var.numpy()
- bn.training = False
- xv = np.random.normal(loc=2.3, size=data_shape).astype(np.float32)
- data.set_value(xv)
- yv1 = bn(data)
- yv2 = bn(data)
- assertTensorClose(yv1.numpy(), yv2.numpy(), max_err=0)
- assertTensorClose(mean_backup, bn.running_mean.numpy(), max_err=0)
- assertTensorClose(var_backup, bn.running_var.numpy(), max_err=0)
- yv_expect = (xv - running_mean) / np.sqrt(running_var + bn.eps)
- assertTensorClose(yv_expect, yv1.numpy(), max_err=5e-6)
-
-
- def test_syncbn2d():
- nr_chan = 8
- data_shape = (3, nr_chan, 16, 16)
- momentum = 0.9
- bn = SyncBatchNorm(nr_chan, momentum=momentum)
- running_mean = np.zeros((1, nr_chan, 1, 1), dtype=np.float32)
- running_var = np.ones((1, nr_chan, 1, 1), dtype=np.float32)
- data = tensor()
- for i in range(3):
- xv = np.random.normal(loc=2.3, size=data_shape).astype(np.float32)
- xv_transposed = np.transpose(xv, [0, 2, 3, 1]).reshape(
- (data_shape[0] * data_shape[2] * data_shape[3], nr_chan)
- )
-
- mean = np.mean(xv_transposed, axis=0).reshape(1, nr_chan, 1, 1)
-
- var_biased = np.var(xv_transposed, axis=0).reshape((1, nr_chan, 1, 1))
- sd = np.sqrt(var_biased + bn.eps)
-
- var_unbiased = np.var(xv_transposed, axis=0, ddof=1).reshape((1, nr_chan, 1, 1))
- running_mean = running_mean * momentum + mean * (1 - momentum)
- running_var = running_var * momentum + var_unbiased * (1 - momentum)
-
- data.set_value(xv)
- yv = bn(data)
- yv_expect = (xv - mean) / sd
-
- assertTensorClose(yv_expect, yv.numpy(), max_err=5e-6)
- assertTensorClose(running_mean, bn.running_mean.numpy(), max_err=5e-6)
- assertTensorClose(running_var, bn.running_var.numpy(), max_err=5e-6)
-
- # test set 'training' flag to False
- mean_backup = bn.running_mean.numpy()
- var_backup = bn.running_var.numpy()
- bn.training = False
- xv = np.random.normal(loc=2.3, size=data_shape).astype(np.float32)
- data.set_value(xv)
- yv1 = bn(data)
- yv2 = bn(data)
- assertTensorClose(yv1.numpy(), yv2.numpy(), max_err=0)
- assertTensorClose(mean_backup, bn.running_mean.numpy(), max_err=0)
- assertTensorClose(var_backup, bn.running_var.numpy(), max_err=0)
- yv_expect = (xv - running_mean) / np.sqrt(running_var + bn.eps)
- assertTensorClose(yv_expect, yv1.numpy(), max_err=5e-6)
-
-
- def test_batchnorm_no_stats():
- nr_chan = 8
- data_shape = (3, nr_chan, 4)
- bn = BatchNorm1d(8, track_running_stats=False)
- data = tensor()
- for i in range(4):
- if i == 2:
- bn.training = False
- xv = np.random.normal(loc=2.3, size=data_shape).astype(np.float32)
- mean = np.mean(np.mean(xv, axis=0, keepdims=True), axis=2, keepdims=True)
- var = np.var(
- np.transpose(xv, [0, 2, 1]).reshape(
- (data_shape[0] * data_shape[2], nr_chan)
- ),
- axis=0,
- ).reshape((1, nr_chan, 1))
- sd = np.sqrt(var + bn.eps)
-
- data.set_value(xv)
- yv = bn(data)
- yv_expect = (xv - mean) / sd
-
- assertTensorClose(yv_expect, yv.numpy(), max_err=5e-6)
-
-
- def test_syncbn_no_stats():
- nr_chan = 8
- data_shape = (3, nr_chan, 4)
- bn = SyncBatchNorm(8, track_running_stats=False)
- data = tensor()
- for i in range(4):
- if i == 2:
- bn.training = False
- xv = np.random.normal(loc=2.3, size=data_shape).astype(np.float32)
- mean = np.mean(np.mean(xv, axis=0, keepdims=True), axis=2, keepdims=True)
- var = np.var(
- np.transpose(xv, [0, 2, 1]).reshape(
- (data_shape[0] * data_shape[2], nr_chan)
- ),
- axis=0,
- ).reshape((1, nr_chan, 1))
- sd = np.sqrt(var + bn.eps)
-
- data.set_value(xv)
- yv = bn(data)
- yv_expect = (xv - mean) / sd
-
- assertTensorClose(yv_expect, yv.numpy(), max_err=5e-6)
-
-
- def test_batchnorm2d_no_stats():
- nr_chan = 8
- data_shape = (3, nr_chan, 16, 16)
- bn = BatchNorm2d(8, track_running_stats=False)
- data = tensor()
- for i in range(4):
- if i == 2:
- bn.training = False
- xv = np.random.normal(loc=2.3, size=data_shape).astype(np.float32)
- xv_transposed = np.transpose(xv, [0, 2, 3, 1]).reshape(
- (data_shape[0] * data_shape[2] * data_shape[3], nr_chan)
- )
-
- mean = np.mean(xv_transposed, axis=0).reshape(1, nr_chan, 1, 1)
- var = np.var(xv_transposed, axis=0).reshape((1, nr_chan, 1, 1))
- sd = np.sqrt(var + bn.eps)
-
- data.set_value(xv)
- yv = bn(data)
- yv_expect = (xv - mean) / sd
-
- assertTensorClose(yv_expect, yv.numpy(), max_err=5e-6)
-
-
- def test_syncbn2d_no_stats():
- nr_chan = 8
- data_shape = (3, nr_chan, 16, 16)
- bn = SyncBatchNorm(8, track_running_stats=False)
- data = tensor()
- for i in range(4):
- if i == 2:
- bn.training = False
- xv = np.random.normal(loc=2.3, size=data_shape).astype(np.float32)
- xv_transposed = np.transpose(xv, [0, 2, 3, 1]).reshape(
- (data_shape[0] * data_shape[2] * data_shape[3], nr_chan)
- )
-
- mean = np.mean(xv_transposed, axis=0).reshape(1, nr_chan, 1, 1)
- var = np.var(xv_transposed, axis=0).reshape((1, nr_chan, 1, 1))
- sd = np.sqrt(var + bn.eps)
-
- data.set_value(xv)
- yv = bn(data)
- yv_expect = (xv - mean) / sd
-
- assertTensorClose(yv_expect, yv.numpy(), max_err=5e-6)
|