You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

train.py 8.0 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199
  1. # Copyright 2020 Huawei Technologies Co., Ltd
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. # ============================================================================
  15. """
  16. #################train vgg16 example on cifar10########################
  17. python train.py --data_path=$DATA_HOME --device_id=$DEVICE_ID
  18. """
  19. import argparse
  20. import datetime
  21. import os
  22. import random
  23. import numpy as np
  24. import mindspore.nn as nn
  25. from mindspore import Tensor
  26. from mindspore import context
  27. from mindspore.nn.optim.momentum import Momentum
  28. from mindspore.train.callback import ModelCheckpoint, CheckpointConfig, LossMonitor
  29. from mindspore.train.model import Model
  30. from mindspore.train.serialization import load_param_into_net, load_checkpoint
  31. from mindarmour.utils import LogUtil
  32. from vgg.dataset import vgg_create_dataset100
  33. from vgg.warmup_step_lr import warmup_step_lr
  34. from vgg.warmup_cosine_annealing_lr import warmup_cosine_annealing_lr
  35. from vgg.warmup_step_lr import lr_steps
  36. from vgg.utils.util import get_param_groups
  37. from vgg.vgg import vgg16
  38. from vgg.config import cifar_cfg as cfg
  39. TAG = "train"
  40. random.seed(1)
  41. np.random.seed(1)
  42. def parse_args(cloud_args=None):
  43. """parameters"""
  44. parser = argparse.ArgumentParser('mindspore classification training')
  45. parser.add_argument('--device_target', type=str, default='Ascend', choices=['Ascend', 'GPU'],
  46. help='device where the code will be implemented. (Default: Ascend)')
  47. parser.add_argument('--device_id', type=int, default=1, help='device id of GPU or Ascend. (Default: None)')
  48. # dataset related
  49. parser.add_argument('--data_path', type=str, default='', help='train data dir')
  50. # network related
  51. parser.add_argument('--pre_trained', default='', type=str, help='model_path, local pretrained model to load')
  52. parser.add_argument('--lr_gamma', type=float, default=0.1,
  53. help='decrease lr by a factor of exponential lr_scheduler')
  54. parser.add_argument('--eta_min', type=float, default=0., help='eta_min in cosine_annealing scheduler')
  55. parser.add_argument('--T_max', type=int, default=150, help='T-max in cosine_annealing scheduler')
  56. # logging and checkpoint related
  57. parser.add_argument('--log_interval', type=int, default=100, help='logging interval')
  58. parser.add_argument('--ckpt_path', type=str, default='outputs/', help='checkpoint save location')
  59. parser.add_argument('--ckpt_interval', type=int, default=2, help='ckpt_interval')
  60. parser.add_argument('--is_save_on_master', type=int, default=1, help='save ckpt on master or all rank')
  61. args_opt = parser.parse_args()
  62. args_opt = merge_args(args_opt, cloud_args)
  63. args_opt.rank = 0
  64. args_opt.group_size = 1
  65. args_opt.label_smooth = cfg.label_smooth
  66. args_opt.label_smooth_factor = cfg.label_smooth_factor
  67. args_opt.lr_scheduler = cfg.lr_scheduler
  68. args_opt.loss_scale = cfg.loss_scale
  69. args_opt.max_epoch = cfg.max_epoch
  70. args_opt.warmup_epochs = cfg.warmup_epochs
  71. args_opt.lr = cfg.lr
  72. args_opt.lr_init = cfg.lr_init
  73. args_opt.lr_max = cfg.lr_max
  74. args_opt.momentum = cfg.momentum
  75. args_opt.weight_decay = cfg.weight_decay
  76. args_opt.per_batch_size = cfg.batch_size
  77. args_opt.num_classes = cfg.num_classes
  78. args_opt.buffer_size = cfg.buffer_size
  79. args_opt.ckpt_save_max = cfg.keep_checkpoint_max
  80. args_opt.pad_mode = cfg.pad_mode
  81. args_opt.padding = cfg.padding
  82. args_opt.has_bias = cfg.has_bias
  83. args_opt.batch_norm = cfg.batch_norm
  84. args_opt.initialize_mode = cfg.initialize_mode
  85. args_opt.has_dropout = cfg.has_dropout
  86. args_opt.lr_epochs = list(map(int, cfg.lr_epochs.split(',')))
  87. args_opt.image_size = list(map(int, cfg.image_size.split(',')))
  88. return args_opt
  89. def merge_args(args_opt, cloud_args):
  90. """dictionary"""
  91. args_dict = vars(args_opt)
  92. if isinstance(cloud_args, dict):
  93. for key_arg in cloud_args.keys():
  94. val = cloud_args[key_arg]
  95. if key_arg in args_dict and val:
  96. arg_type = type(args_dict[key_arg])
  97. if arg_type is not None:
  98. val = arg_type(val)
  99. args_dict[key_arg] = val
  100. return args_opt
  101. if __name__ == '__main__':
  102. args = parse_args()
  103. device_num = int(os.environ.get("DEVICE_NUM", 1))
  104. context.set_context(device_id=args.device_id)
  105. context.set_context(mode=context.GRAPH_MODE, device_target=args.device_target)
  106. # select for master rank save ckpt or all rank save, compatiable for model parallel
  107. args.rank_save_ckpt_flag = 0
  108. if args.is_save_on_master:
  109. if args.rank == 0:
  110. args.rank_save_ckpt_flag = 1
  111. else:
  112. args.rank_save_ckpt_flag = 1
  113. # logger
  114. args.outputs_dir = os.path.join(args.ckpt_path,
  115. datetime.datetime.now().strftime('%Y-%m-%d_time_%H_%M_%S'))
  116. args.logger = LogUtil.get_instance()
  117. args.logger.set_level(20)
  118. # load train data set
  119. dataset = vgg_create_dataset100(args.data_path, args.image_size, args.per_batch_size, args.rank, args.group_size)
  120. batch_num = dataset.get_dataset_size()
  121. args.steps_per_epoch = dataset.get_dataset_size()
  122. # network
  123. args.logger.info(TAG, 'start create network')
  124. # get network and init
  125. network = vgg16(args.num_classes, args)
  126. # pre_trained
  127. if args.pre_trained:
  128. load_param_into_net(network, load_checkpoint(args.pre_trained))
  129. # lr scheduler
  130. if args.lr_scheduler == 'exponential':
  131. lr = warmup_step_lr(args.lr,
  132. args.lr_epochs,
  133. args.steps_per_epoch,
  134. args.warmup_epochs,
  135. args.max_epoch,
  136. gamma=args.lr_gamma,
  137. )
  138. elif args.lr_scheduler == 'cosine_annealing':
  139. lr = warmup_cosine_annealing_lr(args.lr,
  140. args.steps_per_epoch,
  141. args.warmup_epochs,
  142. args.max_epoch,
  143. args.T_max,
  144. args.eta_min)
  145. elif args.lr_scheduler == 'step':
  146. lr = lr_steps(0, lr_init=args.lr_init, lr_max=args.lr_max, warmup_epochs=args.warmup_epochs,
  147. total_epochs=args.max_epoch, steps_per_epoch=batch_num)
  148. else:
  149. raise NotImplementedError(args.lr_scheduler)
  150. # optimizer
  151. opt = Momentum(params=get_param_groups(network),
  152. learning_rate=Tensor(lr),
  153. momentum=args.momentum,
  154. weight_decay=args.weight_decay,
  155. loss_scale=args.loss_scale)
  156. loss = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean', is_grad=False)
  157. model = Model(network, loss_fn=loss, optimizer=opt, metrics={'acc'},
  158. amp_level="O2", keep_batchnorm_fp32=False, loss_scale_manager=None)
  159. # checkpoint save
  160. callbacks = [LossMonitor()]
  161. if args.rank_save_ckpt_flag:
  162. ckpt_config = CheckpointConfig(save_checkpoint_steps=args.ckpt_interval*args.steps_per_epoch,
  163. keep_checkpoint_max=args.ckpt_save_max)
  164. ckpt_cb = ModelCheckpoint(config=ckpt_config,
  165. directory=args.outputs_dir,
  166. prefix='{}'.format(args.rank))
  167. callbacks.append(ckpt_cb)
  168. model.train(args.max_epoch, dataset, callbacks=callbacks)

MindArmour关注AI的安全和隐私问题。致力于增强模型的安全可信、保护用户的数据隐私。主要包含3个模块:对抗样本鲁棒性模块、Fuzz Testing模块、隐私保护与评估模块。 对抗样本鲁棒性模块 对抗样本鲁棒性模块用于评估模型对于对抗样本的鲁棒性,并提供模型增强方法用于增强模型抗对抗样本攻击的能力,提升模型鲁棒性。对抗样本鲁棒性模块包含了4个子模块:对抗样本的生成、对抗样本的检测、模型防御、攻防评估。