Browse Source

fix(mge/test): fix reproducibility of test_correctness.py

GitOrigin-RevId: 70bd43bbab
tags/v0.3.2
Megvii Engine Team 5 years ago
parent
commit
fd83e0252d
3 changed files with 51 additions and 8 deletions
  1. BIN
      python_module/test/integration/mnist_model_with_test.mge
  2. BIN
      python_module/test/integration/mnist_model_with_test_cpu.mge
  3. +51
    -8
      python_module/test/integration/test_correctness.py

BIN
python_module/test/integration/mnist_model_with_test.mge View File


BIN
python_module/test/integration/mnist_model_with_test_cpu.mge View File


+ 51
- 8
python_module/test/integration/test_correctness.py View File

@@ -7,6 +7,8 @@
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
import os
import re
import subprocess
import sys

import numpy as np
@@ -15,18 +17,48 @@ import megengine as mge
import megengine.functional as F
from megengine import jit, tensor
from megengine.functional.debug_param import set_conv_execution_strategy
from megengine.module import BatchNorm2d, Conv2d, Linear, MaxPool2d, Module
from megengine.module import AvgPool2d, BatchNorm2d, Conv2d, Linear, Module
from megengine.optimizer import SGD
from megengine.test import assertTensorClose


def get_gpu_name():
try:
gpu_info = subprocess.check_output(
["nvidia-smi", "--query-gpu=gpu_name", "--format=csv,noheader"]
)
gpu_info = gpu_info.decode("ascii").split("\n")[0]
except:
gpu_info = "None"
return gpu_info


def get_cpu_name():
cpu_info = "None"
try:
cpu_info = subprocess.check_output(["cat", "/proc/cpuinfo"]).decode("ascii")
for line in cpu_info.split("\n"):
if "model name" in line:
return re.sub(".*model name.*:", "", line, 1).strip()
except:
pass
return cpu_info


def get_xpu_name():
if mge.is_cuda_available():
return get_gpu_name()
else:
return get_cpu_name()


class MnistNet(Module):
def __init__(self, has_bn=False):
super().__init__()
self.conv0 = Conv2d(1, 20, kernel_size=5, bias=True)
self.pool0 = MaxPool2d(2)
self.pool0 = AvgPool2d(2)
self.conv1 = Conv2d(20, 20, kernel_size=5, bias=True)
self.pool1 = MaxPool2d(2)
self.pool1 = AvgPool2d(2)
self.fc0 = Linear(20 * 4 * 4, 500, bias=True)
self.fc1 = Linear(500, 10, bias=True)
self.bn0 = None
@@ -67,6 +99,13 @@ def update_model(model_path):

The model with pre-trained weights is trained for one iter with the test data attached.
The loss and updated net state dict is dumped.

.. code-block:: python

from test_correctness import update_model
update_model('mnist_model_with_test.mge') # for gpu
update_model('mnist_model_with_test_cpu.mge') # for cpu

"""
net = MnistNet(has_bn=True)
checkpoint = mge.load(model_path)
@@ -83,7 +122,11 @@ def update_model(model_path):
loss = train(data, label, net=net, opt=opt)
opt.step()

checkpoint.update({"net_updated": net.state_dict(), "loss": loss.numpy()})
xpu_name = get_xpu_name()

checkpoint.update(
{"net_updated": net.state_dict(), "loss": loss.numpy(), "xpu": xpu_name}
)
mge.save(checkpoint, model_path)


@@ -92,9 +135,9 @@ def run_test(model_path, use_jit, use_symbolic):
"""
Load the model with test cases and run the training for one iter.
The loss and updated weights are compared with reference value to verify the correctness.
Dump a new file with updated result by calling update_model
if you think the test fails due to numerical rounding errors instead of bugs.
Dump a new file with updated result by calling update_model
if you think the test fails due to numerical rounding errors instead of bugs.
Please think twice before you do so.

"""
@@ -109,7 +152,7 @@ def run_test(model_path, use_jit, use_symbolic):
data.set_value(checkpoint["data"])
label.set_value(checkpoint["label"])

max_err = 1e-1
max_err = 1e-5

train_func = train
if use_jit:


Loading…
Cancel
Save