You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

train.py 5.4 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148
  1. # Copyright 2020 Huawei Technologies Co., Ltd
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. # ============================================================================
  15. """cnnctc train"""
  16. import numpy as np
  17. import mindspore
  18. import mindspore.common.dtype as mstype
  19. from mindspore import context
  20. from mindspore import Tensor
  21. from mindspore.common import set_seed
  22. from mindspore.communication.management import init, get_rank, get_group_size
  23. from mindspore.dataset import GeneratorDataset
  24. from mindspore.train.callback import ModelCheckpoint, CheckpointConfig
  25. from mindspore.train.model import Model
  26. from mindspore.train.serialization import load_checkpoint, load_param_into_net
  27. from src.callback import LossCallBack
  28. from src.cnn_ctc import CNNCTC, CTCLoss, WithLossCell, CNNCTCTrainOneStepWithLossScaleCell
  29. from src.dataset import STMJGeneratorBatchFixedLength, STMJGeneratorBatchFixedLengthPara
  30. from src.lr_schedule import dynamic_lr
  31. from src.model_utils.config import config
  32. from src.model_utils.device_adapter import get_device_id
  33. from src.model_utils.moxing_adapter import moxing_wrapper
  34. set_seed(1)
  35. context.set_context(mode=context.GRAPH_MODE, save_graphs=False, save_graphs_path=".")
  36. def dataset_creator(run_distribute):
  37. """dataset creator"""
  38. if run_distribute:
  39. st_dataset = STMJGeneratorBatchFixedLengthPara()
  40. else:
  41. st_dataset = STMJGeneratorBatchFixedLength()
  42. ds = GeneratorDataset(st_dataset,
  43. ['img', 'label_indices', 'text', 'sequence_length'],
  44. num_parallel_workers=8)
  45. return ds
  46. def modelarts_pre_process():
  47. pass
  48. @moxing_wrapper(pre_process=modelarts_pre_process)
  49. def train():
  50. """train cnnctc model"""
  51. target = config.device_target
  52. context.set_context(device_target=target)
  53. if target == "Ascend":
  54. device_id = get_device_id()
  55. context.set_context(device_id=device_id)
  56. if config.run_distribute:
  57. init()
  58. context.set_auto_parallel_context(parallel_mode="data_parallel")
  59. ckpt_save_dir = config.SAVE_PATH
  60. else:
  61. # GPU target
  62. device_id = get_device_id()
  63. context.set_context(device_id=device_id)
  64. if config.run_distribute:
  65. init()
  66. context.set_auto_parallel_context(device_num=get_group_size(),
  67. parallel_mode="data_parallel",
  68. gradients_mean=False,
  69. gradient_fp32_sync=False)
  70. ckpt_save_dir = config.SAVE_PATH + "ckpt_" + str(get_rank()) + "/"
  71. print(ckpt_save_dir)
  72. else:
  73. ckpt_save_dir = config.SAVE_PATH + "ckpt_standalone/"
  74. ds = dataset_creator(config.run_distribute)
  75. net = CNNCTC(config.NUM_CLASS, config.HIDDEN_SIZE, config.FINAL_FEATURE_WIDTH)
  76. net.set_train(True)
  77. if config.PRED_TRAINED:
  78. param_dict = load_checkpoint(config.PRED_TRAINED)
  79. load_param_into_net(net, param_dict)
  80. print('parameters loaded!')
  81. else:
  82. print('train from scratch...')
  83. criterion = CTCLoss()
  84. dataset_size = ds.get_dataset_size()
  85. lr = Tensor(dynamic_lr(config, dataset_size), mstype.float32)
  86. opt = mindspore.nn.RMSProp(params=net.trainable_params(),
  87. centered=True,
  88. learning_rate=lr,
  89. momentum=config.MOMENTUM,
  90. loss_scale=config.LOSS_SCALE)
  91. net = WithLossCell(net, criterion)
  92. if target == "Ascend":
  93. loss_scale_manager = mindspore.train.loss_scale_manager.FixedLossScaleManager(
  94. config.LOSS_SCALE, False)
  95. net.set_train(True)
  96. model = Model(net, optimizer=opt, loss_scale_manager=loss_scale_manager, amp_level="O2")
  97. else:
  98. scaling_sens = Tensor(np.full((1), config.LOSS_SCALE), dtype=mstype.float32)
  99. net = CNNCTCTrainOneStepWithLossScaleCell(net, opt, scaling_sens)
  100. net.set_train(True)
  101. model = Model(net)
  102. callback = LossCallBack()
  103. config_ck = CheckpointConfig(save_checkpoint_steps=config.SAVE_CKPT_PER_N_STEP,
  104. keep_checkpoint_max=config.KEEP_CKPT_MAX_NUM)
  105. ckpoint_cb = ModelCheckpoint(prefix="CNNCTC", config=config_ck, directory=ckpt_save_dir)
  106. if config.run_distribute:
  107. if device_id == 0:
  108. model.train(config.TRAIN_EPOCHS,
  109. ds,
  110. callbacks=[callback, ckpoint_cb],
  111. dataset_sink_mode=False)
  112. else:
  113. model.train(config.TRAIN_EPOCHS, ds, callbacks=[callback], dataset_sink_mode=False)
  114. else:
  115. model.train(config.TRAIN_EPOCHS,
  116. ds,
  117. callbacks=[callback, ckpoint_cb],
  118. dataset_sink_mode=False)
  119. if __name__ == '__main__':
  120. train()

MindArmour关注AI的安全和隐私问题。致力于增强模型的安全可信、保护用户的数据隐私。主要包含3个模块:对抗样本鲁棒性模块、Fuzz Testing模块、隐私保护与评估模块。 对抗样本鲁棒性模块 对抗样本鲁棒性模块用于评估模型对于对抗样本的鲁棒性,并提供模型增强方法用于增强模型抗对抗样本攻击的能力,提升模型鲁棒性。对抗样本鲁棒性模块包含了4个子模块:对抗样本的生成、对抗样本的检测、模型防御、攻防评估。