You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

dist.py 2.4 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778
  1. import oneflow
  2. from oneflow import nn
  3. from oneflow.utils.data import DataLoader, Dataset
  4. from oneflow.nn.parallel import DistributedDataParallel as ddp
  5. import os
  6. # print(oneflow.ones(3,4).device)
  7. # print(oneflow.rand(3,4).device)
  8. # exit(0)
  9. # PLACEMENT = oneflow.placement("cuda", [0,1])
  10. # S0 = oneflow.sbp.split(0)
  11. # B = oneflow.sbp.broadcast
  12. print(oneflow.cuda.current_device())
  13. exit(0)
  14. class OneflowArgMaxDataset(Dataset):
  15. def __init__(self, feature_dimension=10, data_num=1000, seed=0):
  16. self.num_labels = feature_dimension
  17. self.feature_dimension = feature_dimension
  18. self.data_num = data_num
  19. self.seed = seed
  20. g = oneflow.Generator()
  21. g.manual_seed(1000)
  22. self.x = oneflow.randint(low=-100, high=100, size=[data_num, feature_dimension], generator=g).float()
  23. self.y = oneflow.max(self.x, dim=-1)[1]
  24. def __len__(self):
  25. return self.data_num
  26. def __getitem__(self, item):
  27. return self.x[item], self.y[item]
  28. class Model(nn.Module):
  29. def __init__(self, num_labels, feature_dimension):
  30. super(Model, self).__init__()
  31. self.num_labels = num_labels
  32. self.linear1 = nn.Linear(in_features=feature_dimension, out_features=10)
  33. self.ac1 = nn.ReLU()
  34. self.linear2 = nn.Linear(in_features=10, out_features=10)
  35. self.ac2 = nn.ReLU()
  36. self.output = nn.Linear(in_features=10, out_features=num_labels)
  37. def forward(self, x):
  38. x = self.ac1(self.linear1(x))
  39. x = self.ac2(self.linear2(x))
  40. x = self.output(x)
  41. return x
  42. dataset = OneflowArgMaxDataset(10, 100)
  43. model = Model(10, 10)
  44. loss_func = nn.CrossEntropyLoss()
  45. optimizer = oneflow.optim.Adam(model.parameters(), 0.001)
  46. dataloader = oneflow.utils.data.DataLoader(dataset, batch_size=32)
  47. device = "cuda"
  48. model.to(device)
  49. # model = ddp(model)
  50. loss_func.to(device)
  51. # model = model.to_global(PLACEMENT, B)
  52. for i in range(2):
  53. for i, (x, y) in enumerate(dataloader):
  54. if i % 2 != oneflow.env.get_rank():
  55. continue
  56. x = x.to(device)
  57. y = y.to(device)
  58. # x = x.to_global(PLACEMENT, S0)
  59. # y = y.to_global(PLACEMENT, S0)
  60. output = model(x)
  61. loss = loss_func(output, y)
  62. optimizer.zero_grad()
  63. loss.backward()
  64. optimizer.step()
  65. oneflow.save(model, "ttt")
  66. print("end.")
  67. # python -m oneflow.distributed.launch --nproc_per_node 2 dist.py