|
@@ -7,6 +7,8 @@ |
|
|
# software distributed under the License is distributed on an |
|
|
# software distributed under the License is distributed on an |
|
|
# "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
|
|
# "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
|
|
import os |
|
|
import os |
|
|
|
|
|
import re |
|
|
|
|
|
import subprocess |
|
|
import sys |
|
|
import sys |
|
|
|
|
|
|
|
|
import numpy as np |
|
|
import numpy as np |
|
@@ -15,18 +17,48 @@ import megengine as mge |
|
|
import megengine.functional as F |
|
|
import megengine.functional as F |
|
|
from megengine import jit, tensor |
|
|
from megengine import jit, tensor |
|
|
from megengine.functional.debug_param import set_conv_execution_strategy |
|
|
from megengine.functional.debug_param import set_conv_execution_strategy |
|
|
from megengine.module import BatchNorm2d, Conv2d, Linear, MaxPool2d, Module |
|
|
|
|
|
|
|
|
from megengine.module import AvgPool2d, BatchNorm2d, Conv2d, Linear, Module |
|
|
from megengine.optimizer import SGD |
|
|
from megengine.optimizer import SGD |
|
|
from megengine.test import assertTensorClose |
|
|
from megengine.test import assertTensorClose |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def get_gpu_name(): |
|
|
|
|
|
try: |
|
|
|
|
|
gpu_info = subprocess.check_output( |
|
|
|
|
|
["nvidia-smi", "--query-gpu=gpu_name", "--format=csv,noheader"] |
|
|
|
|
|
) |
|
|
|
|
|
gpu_info = gpu_info.decode("ascii").split("\n")[0] |
|
|
|
|
|
except: |
|
|
|
|
|
gpu_info = "None" |
|
|
|
|
|
return gpu_info |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def get_cpu_name(): |
|
|
|
|
|
cpu_info = "None" |
|
|
|
|
|
try: |
|
|
|
|
|
cpu_info = subprocess.check_output(["cat", "/proc/cpuinfo"]).decode("ascii") |
|
|
|
|
|
for line in cpu_info.split("\n"): |
|
|
|
|
|
if "model name" in line: |
|
|
|
|
|
return re.sub(".*model name.*:", "", line, 1).strip() |
|
|
|
|
|
except: |
|
|
|
|
|
pass |
|
|
|
|
|
return cpu_info |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def get_xpu_name(): |
|
|
|
|
|
if mge.is_cuda_available(): |
|
|
|
|
|
return get_gpu_name() |
|
|
|
|
|
else: |
|
|
|
|
|
return get_cpu_name() |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class MnistNet(Module): |
|
|
class MnistNet(Module): |
|
|
def __init__(self, has_bn=False): |
|
|
def __init__(self, has_bn=False): |
|
|
super().__init__() |
|
|
super().__init__() |
|
|
self.conv0 = Conv2d(1, 20, kernel_size=5, bias=True) |
|
|
self.conv0 = Conv2d(1, 20, kernel_size=5, bias=True) |
|
|
self.pool0 = MaxPool2d(2) |
|
|
|
|
|
|
|
|
self.pool0 = AvgPool2d(2) |
|
|
self.conv1 = Conv2d(20, 20, kernel_size=5, bias=True) |
|
|
self.conv1 = Conv2d(20, 20, kernel_size=5, bias=True) |
|
|
self.pool1 = MaxPool2d(2) |
|
|
|
|
|
|
|
|
self.pool1 = AvgPool2d(2) |
|
|
self.fc0 = Linear(20 * 4 * 4, 500, bias=True) |
|
|
self.fc0 = Linear(20 * 4 * 4, 500, bias=True) |
|
|
self.fc1 = Linear(500, 10, bias=True) |
|
|
self.fc1 = Linear(500, 10, bias=True) |
|
|
self.bn0 = None |
|
|
self.bn0 = None |
|
@@ -67,6 +99,13 @@ def update_model(model_path): |
|
|
|
|
|
|
|
|
The model with pre-trained weights is trained for one iter with the test data attached. |
|
|
The model with pre-trained weights is trained for one iter with the test data attached. |
|
|
The loss and updated net state dict is dumped. |
|
|
The loss and updated net state dict is dumped. |
|
|
|
|
|
|
|
|
|
|
|
.. code-block:: python |
|
|
|
|
|
|
|
|
|
|
|
from test_correctness import update_model |
|
|
|
|
|
update_model('mnist_model_with_test.mge') # for gpu |
|
|
|
|
|
update_model('mnist_model_with_test_cpu.mge') # for cpu |
|
|
|
|
|
|
|
|
""" |
|
|
""" |
|
|
net = MnistNet(has_bn=True) |
|
|
net = MnistNet(has_bn=True) |
|
|
checkpoint = mge.load(model_path) |
|
|
checkpoint = mge.load(model_path) |
|
@@ -83,7 +122,11 @@ def update_model(model_path): |
|
|
loss = train(data, label, net=net, opt=opt) |
|
|
loss = train(data, label, net=net, opt=opt) |
|
|
opt.step() |
|
|
opt.step() |
|
|
|
|
|
|
|
|
checkpoint.update({"net_updated": net.state_dict(), "loss": loss.numpy()}) |
|
|
|
|
|
|
|
|
xpu_name = get_xpu_name() |
|
|
|
|
|
|
|
|
|
|
|
checkpoint.update( |
|
|
|
|
|
{"net_updated": net.state_dict(), "loss": loss.numpy(), "xpu": xpu_name} |
|
|
|
|
|
) |
|
|
mge.save(checkpoint, model_path) |
|
|
mge.save(checkpoint, model_path) |
|
|
|
|
|
|
|
|
|
|
|
|
|
@@ -92,9 +135,9 @@ def run_test(model_path, use_jit, use_symbolic): |
|
|
""" |
|
|
""" |
|
|
Load the model with test cases and run the training for one iter. |
|
|
Load the model with test cases and run the training for one iter. |
|
|
The loss and updated weights are compared with reference value to verify the correctness. |
|
|
The loss and updated weights are compared with reference value to verify the correctness. |
|
|
|
|
|
|
|
|
Dump a new file with updated result by calling update_model |
|
|
|
|
|
if you think the test fails due to numerical rounding errors instead of bugs. |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
Dump a new file with updated result by calling update_model |
|
|
|
|
|
if you think the test fails due to numerical rounding errors instead of bugs. |
|
|
Please think twice before you do so. |
|
|
Please think twice before you do so. |
|
|
|
|
|
|
|
|
""" |
|
|
""" |
|
@@ -109,7 +152,7 @@ def run_test(model_path, use_jit, use_symbolic): |
|
|
data.set_value(checkpoint["data"]) |
|
|
data.set_value(checkpoint["data"]) |
|
|
label.set_value(checkpoint["label"]) |
|
|
label.set_value(checkpoint["label"]) |
|
|
|
|
|
|
|
|
max_err = 1e-1 |
|
|
|
|
|
|
|
|
max_err = 1e-5 |
|
|
|
|
|
|
|
|
train_func = train |
|
|
train_func = train |
|
|
if use_jit: |
|
|
if use_jit: |
|
|