|
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463 |
- # -*- coding: utf-8 -*-
- # MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
- #
- # Copyright (c) 2014-2020 Megvii Inc. All rights reserved.
- #
- # Unless required by applicable law or agreed to in writing,
- # software distributed under the License is distributed on an
- # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- import multiprocessing as mp
- import platform
-
- import numpy as np
- import pytest
-
- import megengine as mge
- import megengine.distributed as dist
- from megengine import Parameter, Tensor, tensor
- from megengine.functional.distributed import (
- all_gather,
- all_reduce_max,
- all_reduce_min,
- all_reduce_sum,
- all_to_all,
- broadcast,
- gather,
- reduce_scatter_sum,
- reduce_sum,
- remote_recv,
- remote_send,
- scatter,
- )
-
-
- @pytest.mark.skipif(
- platform.system() == "Darwin", reason="do not imp GPU mode at macos now"
- )
- @pytest.mark.skipif(
- platform.system() == "Windows", reason="windows disable MGB_ENABLE_OPR_MM"
- )
- @pytest.mark.isolated_distributed
- def test_reduce_sum():
- world_size = 2
- port = dist.get_free_ports(1)[0]
- server = dist.Server(port)
-
- def worker(rank, data, expect, port):
- if mge.get_device_count("gpu") < world_size:
- return
- dist.init_process_group("localhost", port, world_size, rank, rank)
- inp = tensor(data)
- output = reduce_sum(inp)
- if rank == 0:
- assert np.allclose(output.numpy(), expect)
- else:
- assert np.allclose(output.numpy(), 0)
-
- def check(shape):
- x = np.random.rand(*shape).astype("float32")
- y = np.random.rand(*shape).astype("float32")
- z = x + y
- p0 = mp.Process(target=worker, args=(0, x, z, port))
- p1 = mp.Process(target=worker, args=(1, y, None, port))
-
- p0.start()
- p1.start()
-
- p0.join(10)
- p1.join(10)
-
- assert p0.exitcode == 0 and p1.exitcode == 0
-
- for shape in [(2, 3), (8, 10), (99, 77)]:
- check(shape)
-
-
- @pytest.mark.skipif(
- platform.system() == "Darwin", reason="do not imp GPU mode at macos now"
- )
- @pytest.mark.skipif(
- platform.system() == "Windows", reason="windows disable MGB_ENABLE_OPR_MM"
- )
- @pytest.mark.isolated_distributed
- def test_broadcast():
- world_size = 2
- port = dist.get_free_ports(1)[0]
- server = dist.Server(port)
-
- def worker(rank, data, expect, port):
- if mge.get_device_count("gpu") < world_size:
- return
- dist.init_process_group("localhost", port, world_size, rank, rank)
- inp = tensor(data)
- output = broadcast(inp)
- assert np.allclose(output.numpy(), expect)
-
- def check(shape):
- x = np.random.rand(*shape).astype("float32")
- y = x + 1
- p0 = mp.Process(target=worker, args=(0, x, x, port))
- p1 = mp.Process(target=worker, args=(1, y, x, port))
-
- p0.start()
- p1.start()
-
- p0.join(10)
- p1.join(10)
-
- assert p0.exitcode == 0 and p1.exitcode == 0
-
- for shape in [(2, 3), (8, 10), (99, 77)]:
- check(shape)
-
-
- @pytest.mark.skipif(
- platform.system() == "Darwin", reason="do not imp GPU mode at macos now"
- )
- @pytest.mark.skipif(
- platform.system() == "Windows", reason="windows disable MGB_ENABLE_OPR_MM"
- )
- @pytest.mark.isolated_distributed
- def test_all_gather():
- world_size = 2
- port = dist.get_free_ports(1)[0]
- server = dist.Server(port)
-
- def worker(rank, data, expect, port):
- if mge.get_device_count("gpu") < world_size:
- return
- dist.init_process_group("localhost", port, world_size, rank, rank)
- inp = tensor(data)
- output = all_gather(inp)
- assert np.allclose(output.numpy(), expect)
-
- def check(shape):
- x = np.random.rand(*shape).astype("float32")
- y = np.random.rand(*shape).astype("float32")
- z = np.concatenate((x, y))
- p0 = mp.Process(target=worker, args=(0, x, z, port))
- p1 = mp.Process(target=worker, args=(1, y, z, port))
-
- p0.start()
- p1.start()
-
- p0.join(10)
- p1.join(10)
-
- assert p0.exitcode == 0 and p1.exitcode == 0
-
- for shape in [(2, 3), (8, 10), (99, 77)]:
- check(shape)
-
-
- @pytest.mark.skipif(
- platform.system() == "Darwin", reason="do not imp GPU mode at macos now"
- )
- @pytest.mark.skipif(
- platform.system() == "Windows", reason="windows disable MGB_ENABLE_OPR_MM"
- )
- @pytest.mark.isolated_distributed
- def test_reduce_scatter_sum():
- world_size = 2
- port = dist.get_free_ports(1)[0]
- server = dist.Server(port)
-
- def worker(rank, data, expect, port):
- if mge.get_device_count("gpu") < world_size:
- return
- dist.init_process_group("localhost", port, world_size, rank, rank)
- inp = tensor(data)
- output = reduce_scatter_sum(inp)
- assert np.allclose(output.numpy(), expect)
-
- def check(shape):
- x = np.random.rand(*shape).astype("float32")
- y = np.random.rand(*shape).astype("float32")
- z = x + y
- p0 = mp.Process(target=worker, args=(0, x, z[: shape[0] // 2], port))
- p1 = mp.Process(target=worker, args=(1, y, z[shape[0] // 2 :], port))
-
- p0.start()
- p1.start()
-
- p0.join(10)
- p1.join(10)
-
- assert p0.exitcode == 0 and p1.exitcode == 0
-
- for shape in [(2, 4), (8, 10), (88, 44)]:
- check(shape)
-
-
- @pytest.mark.skipif(
- platform.system() == "Darwin", reason="do not imp GPU mode at macos now"
- )
- @pytest.mark.skipif(
- platform.system() == "Windows", reason="windows disable MGB_ENABLE_OPR_MM"
- )
- @pytest.mark.isolated_distributed
- def test_all_reduce_sum():
- world_size = 2
- port = dist.get_free_ports(1)[0]
- server = dist.Server(port)
-
- def worker(rank, data, expect, port):
- if mge.get_device_count("gpu") < world_size:
- return
- dist.init_process_group("localhost", port, world_size, rank, rank)
- inp = tensor(data)
- output = all_reduce_sum(inp)
- assert np.allclose(output.numpy(), expect)
-
- def check(shape):
- x = np.random.rand(*shape).astype("float32")
- y = np.random.rand(*shape).astype("float32")
- z = x + y
- p0 = mp.Process(target=worker, args=(0, x, z, port))
- p1 = mp.Process(target=worker, args=(1, y, z, port))
-
- p0.start()
- p1.start()
-
- p0.join(10)
- p1.join(10)
-
- assert p0.exitcode == 0 and p1.exitcode == 0
-
- for shape in [(2, 3), (8, 10), (99, 77)]:
- check(shape)
-
-
- @pytest.mark.skipif(
- platform.system() == "Darwin", reason="do not imp GPU mode at macos now"
- )
- @pytest.mark.skipif(
- platform.system() == "Windows", reason="windows disable MGB_ENABLE_OPR_MM"
- )
- @pytest.mark.isolated_distributed
- def test_all_reduce_max():
- world_size = 2
- port = dist.get_free_ports(1)[0]
- server = dist.Server(port)
-
- def worker(rank, data, expect, port):
- if mge.get_device_count("gpu") < world_size:
- return
- dist.init_process_group("localhost", port, world_size, rank, rank)
- inp = tensor(data)
- output = all_reduce_max(inp)
- assert np.allclose(output.numpy(), expect)
-
- def check(shape):
- x = np.random.rand(*shape).astype("float32")
- y = np.random.rand(*shape).astype("float32")
- z = np.maximum(x, y)
- p0 = mp.Process(target=worker, args=(0, x, z, port))
- p1 = mp.Process(target=worker, args=(1, y, z, port))
-
- p0.start()
- p1.start()
-
- p0.join(10)
- p1.join(10)
-
- assert p0.exitcode == 0 and p1.exitcode == 0
-
- for shape in [(2, 3), (8, 10), (99, 77)]:
- check(shape)
-
-
- @pytest.mark.skipif(
- platform.system() == "Darwin", reason="do not imp GPU mode at macos now"
- )
- @pytest.mark.skipif(
- platform.system() == "Windows", reason="windows disable MGB_ENABLE_OPR_MM"
- )
- @pytest.mark.isolated_distributed
- def test_all_reduce_min():
- world_size = 2
- port = dist.get_free_ports(1)[0]
- server = dist.Server(port)
-
- def worker(rank, data, expect, port):
- if mge.get_device_count("gpu") < world_size:
- return
- dist.init_process_group("localhost", port, world_size, rank, rank)
- inp = tensor(data)
- output = all_reduce_min(inp)
- assert np.allclose(output.numpy(), expect)
-
- def check(shape):
- x = np.random.rand(*shape).astype("float32")
- y = np.random.rand(*shape).astype("float32")
- z = np.minimum(x, y)
- p0 = mp.Process(target=worker, args=(0, x, z, port))
- p1 = mp.Process(target=worker, args=(1, y, z, port))
-
- p0.start()
- p1.start()
-
- p0.join(10)
- p1.join(10)
-
- assert p0.exitcode == 0 and p1.exitcode == 0
-
- for shape in [(2, 3), (8, 10), (99, 77)]:
- check(shape)
-
-
- @pytest.mark.skipif(
- platform.system() == "Darwin", reason="do not imp GPU mode at macos now"
- )
- @pytest.mark.skipif(
- platform.system() == "Windows", reason="windows disable MGB_ENABLE_OPR_MM"
- )
- @pytest.mark.isolated_distributed
- def test_gather():
- world_size = 2
- port = dist.get_free_ports(1)[0]
- server = dist.Server(port)
-
- def worker(rank, data, expect, port):
- if mge.get_device_count("gpu") < world_size:
- return
- dist.init_process_group("localhost", port, world_size, rank, rank)
- inp = tensor(data)
- output = gather(inp)
- if rank == 0:
- assert np.allclose(output.numpy(), expect)
- else:
- assert np.allclose(output.numpy(), 0)
-
- def check(shape):
- x = np.random.rand(*shape).astype("float32")
- y = np.random.rand(*shape).astype("float32")
- z = np.concatenate((x, y))
- p0 = mp.Process(target=worker, args=(0, x, z, port))
- p1 = mp.Process(target=worker, args=(1, y, None, port))
-
- p0.start()
- p1.start()
-
- p0.join(10)
- p1.join(10)
-
- assert p0.exitcode == 0 and p1.exitcode == 0
-
- for shape in [(2, 3), (8, 10), (99, 77)]:
- check(shape)
-
-
- @pytest.mark.skipif(
- platform.system() == "Darwin", reason="do not imp GPU mode at macos now"
- )
- @pytest.mark.skipif(
- platform.system() == "Windows", reason="windows disable MGB_ENABLE_OPR_MM"
- )
- @pytest.mark.isolated_distributed
- def test_scatter():
- world_size = 2
- port = dist.get_free_ports(1)[0]
- server = dist.Server(port)
-
- def worker(rank, data, expect, port):
- if mge.get_device_count("gpu") < world_size:
- return
- dist.init_process_group("localhost", port, world_size, rank, rank)
- inp = tensor(data)
- output = scatter(inp)
- assert np.allclose(output.numpy(), expect)
-
- def check(shape):
- x = np.random.rand(*shape).astype("float32")
- y = x + 1
- p0 = mp.Process(target=worker, args=(0, x, x[: shape[0] // 2], port))
- p1 = mp.Process(target=worker, args=(1, y, x[shape[0] // 2 :], port))
-
- p0.start()
- p1.start()
-
- p0.join(10)
- p1.join(10)
-
- assert p0.exitcode == 0 and p1.exitcode == 0
-
- for shape in [(2, 3), (8, 10), (100, 77)]:
- check(shape)
-
-
- @pytest.mark.skipif(
- platform.system() == "Darwin", reason="do not imp GPU mode at macos now"
- )
- @pytest.mark.skipif(
- platform.system() == "Windows", reason="windows disable MGB_ENABLE_OPR_MM"
- )
- @pytest.mark.isolated_distributed
- def test_all_to_all():
- world_size = 2
- port = dist.get_free_ports(1)[0]
- server = dist.Server(port)
-
- def worker(rank, data, expect, port):
- if mge.get_device_count("gpu") < world_size:
- return
- dist.init_process_group("localhost", port, world_size, rank, rank)
- inp = tensor(data)
- output = all_to_all(inp)
- assert np.allclose(output.numpy(), expect)
-
- def check(shape):
- x = np.random.rand(*shape).astype("float32")
- y = np.random.rand(*shape).astype("float32")
- a = np.concatenate((x[: shape[0] // 2], y[: shape[0] // 2]))
- b = np.concatenate((x[shape[0] // 2 :], y[shape[0] // 2 :]))
- p0 = mp.Process(target=worker, args=(0, x, a, port))
- p1 = mp.Process(target=worker, args=(1, y, b, port))
-
- p0.start()
- p1.start()
-
- p0.join(10)
- p1.join(10)
-
- assert p0.exitcode == 0 and p1.exitcode == 0
-
- for shape in [(2, 3), (8, 10), (100, 77)]:
- check(shape)
-
-
- @pytest.mark.skipif(
- platform.system() == "Darwin", reason="do not imp GPU mode at macos now"
- )
- @pytest.mark.skipif(
- platform.system() == "Windows", reason="windows disable MGB_ENABLE_OPR_MM"
- )
- @pytest.mark.isolated_distributed
- def test_io_remote():
- world_size = 2
- port = dist.get_free_ports(1)[0]
- server = dist.Server(port)
- val = np.random.rand(4, 5).astype(np.float32)
-
- def worker(rank):
- if mge.get_device_count("gpu") < world_size:
- return
- if rank == 0: # remote send
- dist.init_process_group("localhost", port, world_size, rank, rank)
- x = Tensor(val, device="gpu0")
- y = remote_send(x, 1)
- assert y.numpy()[0] == 0
- else: # remote recv
- dist.init_process_group("localhost", port, world_size, rank, rank)
- y = remote_recv(0, val.shape, val.dtype, cn="gpu1")
- np.testing.assert_almost_equal(val, y.numpy())
-
- procs = []
- for rank in range(world_size):
- p = mp.Process(target=worker, args=(rank,))
- p.start()
- procs.append(p)
-
- for p in procs:
- p.join(10)
- assert p.exitcode == 0
|