You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

darts_train.py 4.1 kB

2 years ago
12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485
  1. # Copyright (c) Microsoft Corporation.
  2. # Licensed under the MIT license.
  3. import sys
  4. sys.path.append('..'+ '/' + '..')
  5. import time
  6. from argparse import ArgumentParser
  7. import torch
  8. import torch.nn as nn
  9. import datasets
  10. from model import CNN
  11. from utils import accuracy
  12. from dartstrainer import DartsTrainer
  13. from pytorch.utils import *
  14. from pytorch.callbacks import BestArchitectureCheckpoint, LRSchedulerCallback
  15. logger = logging.getLogger(__name__)
  16. if __name__ == "__main__":
  17. parser = ArgumentParser("DARTS train")
  18. parser.add_argument("--data_dir", type=str,
  19. default='../data/', help="search_space json file")
  20. parser.add_argument("--result_path", type=str,
  21. default='.0/result.json', help="training result")
  22. parser.add_argument("--log_path", type=str,
  23. default='.0/log', help="log for info")
  24. parser.add_argument("--search_space_path", type=str,
  25. default='./search_space.json', help="search space of PDARTS")
  26. parser.add_argument("--best_selected_space_path", type=str,
  27. default='./best_selected_space.json', help="final best selected space")
  28. parser.add_argument('--trial_id', type=int, default=0, metavar='N',
  29. help='trial_id,start from 0')
  30. parser.add_argument("--layers", default=8, type=int)
  31. parser.add_argument("--batch_size", default=64, type=int)
  32. parser.add_argument("--log_frequency", default=10, type=int)
  33. parser.add_argument("--epochs", default=5, type=int)
  34. parser.add_argument("--channels", default=16, type=int)
  35. parser.add_argument('--model_lr', type=float, default=0.025, help='learning rate for training model weights')
  36. parser.add_argument('--arch_lr', type=float, default=3e-4, help='learning rate for training architecture')
  37. parser.add_argument("--unrolled", default=False, action="store_true")
  38. parser.add_argument("--visualization", default=False, action="store_true")
  39. parser.add_argument("--class_num", default=10, type=int, help="cifar10")
  40. args = parser.parse_args()
  41. mkdirs(args.result_path, args.log_path, args.search_space_path, args.best_selected_space_path)
  42. init_logger(args.log_path, "info")
  43. logger.info(args)
  44. set_seed(args.trial_id)
  45. dataset_train, dataset_valid = datasets.get_dataset("cifar10", root=args.data_dir)
  46. model = CNN(32, 3, args.channels, args.class_num, args.layers)
  47. criterion = nn.CrossEntropyLoss()
  48. optim = torch.optim.SGD(model.parameters(), args.model_lr, momentum=0.9, weight_decay=3.0E-4)
  49. lr_scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optim, args.epochs, eta_min=0.001)
  50. trainer = DartsTrainer(model,
  51. loss=criterion,
  52. metrics=lambda output, target: accuracy(output, target, topk=(1,)),
  53. optimizer=optim,
  54. num_epochs=args.epochs,
  55. dataset_train=dataset_train,
  56. dataset_valid=dataset_valid,
  57. search_space_path = args.search_space_path,
  58. batch_size=args.batch_size,
  59. log_frequency=args.log_frequency,
  60. result_path=args.result_path,
  61. unrolled=args.unrolled,
  62. arch_lr=args.arch_lr,
  63. callbacks=[LRSchedulerCallback(lr_scheduler), BestArchitectureCheckpoint(args.best_selected_space_path, args.epochs)])
  64. if args.visualization:
  65. trainer.enable_visualization()
  66. t1 = time.time()
  67. trainer.train()
  68. # res_json = trainer.result
  69. cost_time = time.time() - t1
  70. # 后端在终端过滤,{"type": "Cost_time", "result": {"value": "* s"}}
  71. logger.info({"type": "Cost_time", "result": {"value": str(cost_time) + ' s'}})
  72. with open(args.result_path, "a") as file:
  73. file.write(str({"type": "Cost_time", "result": {"value": str(cost_time) + ' s'}}))
  74. # res_json["Cost_time"] = str(cost_time) + ' s'
  75. # dump_global_result(args.result_path, res_json)

一站式算法开发平台、高性能分布式深度学习框架、先进算法模型库、视觉模型炼知平台、数据可视化分析平台等一系列平台及工具,在模型高效分布式训练、数据处理和可视分析、模型炼知和轻量化等技术上形成独特优势,目前已在产学研等各领域近千家单位及个人提供AI应用赋能