You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

search.py 5.9 kB

2 years ago
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135
  1. # Copyright (c) Microsoft Corporation.
  2. # Licensed under the MIT license.
  3. import sys
  4. sys.path.append('..'+ '/' + '..')
  5. import logging
  6. import time
  7. from argparse import ArgumentParser
  8. import torch
  9. import torch.nn as nn
  10. import datasets
  11. from macro import GeneralNetwork
  12. from micro import MicroNetwork
  13. from trainer import EnasTrainer
  14. from mutator import EnasMutator
  15. from pytorch.callbacks import (ArchitectureCheckpoint,
  16. LRSchedulerCallback)
  17. from utils import accuracy, reward_accuracy
  18. from collections import OrderedDict
  19. from pytorch.mutables import LayerChoice, InputChoice
  20. import json
  21. torch.cuda.set_device(4)
  22. logger = logging.getLogger('tadl-enas')
  23. # save search space as search_space.json
  24. def save_nas_search_space(mutator,file_path):
  25. result = OrderedDict()
  26. cur_layer_idx = None
  27. for mutable in mutator.mutables.traverse():
  28. if not isinstance(mutable,(LayerChoice, InputChoice)):
  29. cur_layer_idx = mutable.key + '_'
  30. continue
  31. # macro
  32. if 'layer' in cur_layer_idx:
  33. if isinstance(mutable, LayerChoice):
  34. if 'op_list' not in result:
  35. result['op_list'] = [str(i) for i in mutable]
  36. result[cur_layer_idx + mutable.key] = 'op_list'
  37. else:
  38. result[cur_layer_idx + mutable.key] = {'skip_connection': False if mutable.n_chosen else True,
  39. 'n_chosen': mutable.n_chosen if mutable.n_chosen else '',
  40. 'choose_from': mutable.choose_from if mutable.choose_from else ''}
  41. # micro
  42. elif 'node' in cur_layer_idx:
  43. if isinstance(mutable,LayerChoice):
  44. if 'op_list' not in result:
  45. result['op_list'] = [str(i) for i in mutable]
  46. result[mutable.key] = 'op_list'
  47. else:
  48. result[mutable.key] = {'skip_connection':False if mutable.n_chosen else True,
  49. 'n_chosen': mutable.n_chosen if mutable.n_chosen else '',
  50. 'choose_from': mutable.choose_from if mutable.choose_from else ''}
  51. dump_global_result(file_path,result)
  52. # def dump_global_result(args,global_result):
  53. # with open(args['result_path'], "w") as ss_file:
  54. # json.dump(global_result, ss_file, sort_keys=True, indent=2)
  55. def dump_global_result(res_path,global_result, sort_keys = False):
  56. with open(res_path, "w") as ss_file:
  57. json.dump(global_result, ss_file, sort_keys=sort_keys, indent=2)
  58. if __name__ == "__main__":
  59. parser = ArgumentParser("enas")
  60. parser.add_argument("--search_space_path", type=str,
  61. default='./search_space.json', help="search_space directory")
  62. parser.add_argument("--selected_space_path", type=str,
  63. default='./selected_space.json', help="sapce_path_out directory")
  64. parser.add_argument("--result_path", type=str,
  65. default='./result.json', help="res directory")
  66. parser.add_argument('--trial_id', type=int, default=0, metavar='N',
  67. help='trial_id,start from 0')
  68. parser.add_argument("--batch-size", default=128, type=int)
  69. parser.add_argument("--log-frequency", default=10, type=int)
  70. parser.add_argument("--search_for", choices=["macro", "micro"], default="macro")
  71. parser.add_argument("--epochs", default=None, type=int, help="Number of epochs (default: macro 310, micro 150)")
  72. args = parser.parse_args()
  73. # 设置随机种子
  74. torch.manual_seed(args.trial_id)
  75. torch.cuda.manual_seed_all(args.trial_id)
  76. np.random.seed(args.trial_id)
  77. random.seed(args.trial_id)
  78. dataset_train, dataset_valid = datasets.get_dataset("cifar10")
  79. if args.search_for == "macro":
  80. model = GeneralNetwork()
  81. num_epochs = args.epochs or 310
  82. mutator = None
  83. mutator = EnasMutator(model)
  84. elif args.search_for == "micro":
  85. model = MicroNetwork(num_layers=6, out_channels=20, num_nodes=5, dropout_rate=0.1, use_aux_heads=True)
  86. num_epochs = args.epochs or 150
  87. mutator = EnasMutator(model, tanh_constant=1.1, cell_exit_extra_step=True)
  88. else:
  89. raise AssertionError
  90. # 储存整个网络结构
  91. # args.search_spach_path = None#str(args.search_for) + str(args.search_space_path)
  92. # print( args.search_space_path, args.search_for )
  93. save_nas_search_space(mutator, args.search_space_path)
  94. criterion = nn.CrossEntropyLoss()
  95. optimizer = torch.optim.SGD(model.parameters(), 0.05, momentum=0.9, weight_decay=1.0E-4)
  96. lr_scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=num_epochs, eta_min=0.001)
  97. trainer = EnasTrainer(model,
  98. loss=criterion,
  99. metrics=accuracy,
  100. reward_function=reward_accuracy,
  101. optimizer=optimizer,
  102. callbacks=[LRSchedulerCallback(lr_scheduler)],
  103. batch_size=args.batch_size,
  104. num_epochs=num_epochs,
  105. dataset_train=dataset_train,
  106. dataset_valid=dataset_valid,
  107. log_frequency=args.log_frequency,
  108. mutator=mutator,
  109. child_model_path='./'+args.search_for+'_child_model')
  110. logger.info(trainer.metrics)
  111. t1 = time.time()
  112. trainer.train()
  113. trainer.result["cost_time"] = time.time() - t1
  114. dump_global_result(args.result_path,trainer.result)
  115. selected_model = trainer.export_child_model(selected_space = True)
  116. dump_global_result(args.selected_space_path,selected_model)

一站式算法开发平台、高性能分布式深度学习框架、先进算法模型库、视觉模型炼知平台、数据可视化分析平台等一系列平台及工具,在模型高效分布式训练、数据处理和可视分析、模型炼知和轻量化等技术上形成独特优势,目前已在产学研等各领域近千家单位及个人提供AI应用赋能