You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

generate_adv_samples.py 6.4 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142
  1. # Copyright 2020 Huawei Technologies Co., Ltd
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. # ============================================================================
  15. """Generated natural robustness samples. """
  16. import sys
  17. import json
  18. import time
  19. import lmdb
  20. from mindspore_serving.client import Client
  21. from cnn_ctc.src.model_utils.config import config
  22. config_perturb = [
  23. {"method": "Contrast", "params": {"alpha": 1.5, "beta": 0}},
  24. {"method": "GaussianBlur", "params": {"ksize": 5}},
  25. {"method": "SaltAndPepperNoise", "params": {"factor": 0.05}},
  26. {"method": "Translate", "params": {"x_bias": 0.1, "y_bias": -0.1}},
  27. {"method": "Scale", "params": {"factor_x": 0.8, "factor_y": 0.8}},
  28. {"method": "Shear", "params": {"factor": 1.5, "direction": "horizontal"}},
  29. {"method": "Rotate", "params": {"angle": 30}},
  30. {"method": "MotionBlur", "params": {"degree": 5, "angle": 45}},
  31. {"method": "GradientBlur", "params": {"point": [50, 100], "kernel_num": 3, "center": True}},
  32. {"method": "GradientLuminance", "params": {"color_start": [255, 255, 255], "color_end": [0, 0, 0],
  33. "start_point": [100, 150], "scope": 0.3,
  34. "bright_rate": 0.3, "pattern": "light", "mode": "circle"}},
  35. {"method": "GradientLuminance", "params": {"color_start": [255, 255, 255],
  36. "color_end": [0, 0, 0], "start_point": [150, 200],
  37. "scope": 0.3, "pattern": "light", "mode": "horizontal"}},
  38. {"method": "GradientLuminance", "params": {"color_start": [255, 255, 255], "color_end": [0, 0, 0],
  39. "start_point": [150, 200], "scope": 0.3,
  40. "pattern": "light", "mode": "vertical"}},
  41. {"method": "Curve", "params": {"curves": 0.5, "depth": 3, "mode": "vertical"}},
  42. {"method": "Perspective", "params": {"ori_pos": [[0, 0], [0, 800], [800, 0], [800, 800]],
  43. "dst_pos": [[10, 0], [0, 800], [790, 0], [800, 800]]}},
  44. ]
  45. def generate_adv_iii5t_3000(lmdb_paths, lmdb_save_path, perturb_config):
  46. """generate perturb iii5t_3000"""
  47. max_len = int((26 + 1) // 2)
  48. instances = []
  49. methods_number = 1
  50. outputs_number = 2
  51. perturb_config = json.dumps(perturb_config)
  52. env = lmdb.open(lmdb_paths, max_readers=32, readonly=True, lock=False, readahead=False, meminit=False)
  53. if not env:
  54. print('cannot create lmdb from %s' % (lmdb_paths))
  55. sys.exit(0)
  56. with env.begin(write=False) as txn:
  57. n_samples = int(txn.get('num-samples'.encode()))
  58. # Filtering
  59. filtered_labels = []
  60. filtered_index_list = []
  61. for index in range(n_samples):
  62. index += 1 # lmdb starts with 1
  63. label_key = 'label-%09d'.encode() % index
  64. label = txn.get(label_key).decode('utf-8')
  65. if len(label) > max_len: continue
  66. illegal_sample = False
  67. for char_item in label.lower():
  68. if char_item not in config.CHARACTER:
  69. illegal_sample = True
  70. break
  71. if illegal_sample: continue
  72. filtered_labels.append(label)
  73. filtered_index_list.append(index)
  74. img_key = 'image-%09d'.encode() % index
  75. imgbuf = txn.get(img_key)
  76. instances.append({"img": imgbuf, 'perturb_config': perturb_config, "methods_number": methods_number,
  77. "outputs_number": outputs_number})
  78. print(f'num of samples in IIIT dataset: {len(filtered_index_list)}')
  79. client = Client("0.0.0.0:5500", "perturbation", "natural_perturbation")
  80. start_time = time.time()
  81. result = client.infer(instances)
  82. end_time = time.time()
  83. print('generated natural perturbs images cost: ', end_time - start_time)
  84. env_save = lmdb.open(lmdb_save_path, map_size=1099511627776)
  85. txn = env.begin(write=False)
  86. with env_save.begin(write=True) as txn_save:
  87. new_index = 1
  88. for i, index in enumerate(filtered_index_list):
  89. try:
  90. file_names = result[i]['file_names'].split(';')
  91. except KeyError:
  92. error_msg = result[i]
  93. msg = 'serving failed to generate the {}th image in origin dataset with ' \
  94. 'error messages: {}'.format(i, error_msg)
  95. print(KeyError(msg))
  96. continue
  97. length = result[i]['file_length'].tolist()
  98. before = 0
  99. label = filtered_labels[i]
  100. label = label.encode()
  101. img_key = 'image-%09d'.encode() % index
  102. ori_img = txn.get(img_key)
  103. names_dict = result[i]['names_dict']
  104. names_dict = json.loads(names_dict)
  105. for name, leng in zip(file_names, length):
  106. label_key = 'label-%09d'.encode() % new_index
  107. txn_save.put(label_key, label)
  108. img_key = 'image-%09d'.encode() % new_index
  109. adv_img = result[i]['results']
  110. adv_img = adv_img[before:before + leng]
  111. adv_img_key = 'adv_image-%09d'.encode() % new_index
  112. txn_save.put(img_key, ori_img)
  113. txn_save.put(adv_img_key, adv_img)
  114. adv_info_key = 'adv_info-%09d'.encode() % new_index
  115. adv_info = json.dumps(names_dict[name]).encode()
  116. txn_save.put(adv_info_key, adv_info)
  117. before = before + leng
  118. new_index += 1
  119. txn_save.put("num-samples".encode(), str(new_index - 1).encode())
  120. env.close()
  121. if __name__ == '__main__':
  122. save_path_lmdb = config.ADV_TEST_DATASET_PATH
  123. generate_adv_iii5t_3000(config.TEST_DATASET_PATH, save_path_lmdb, config_perturb)

MindArmour关注AI的安全和隐私问题。致力于增强模型的安全可信、保护用户的数据隐私。主要包含3个模块:对抗样本鲁棒性模块、Fuzz Testing模块、隐私保护与评估模块。 对抗样本鲁棒性模块 对抗样本鲁棒性模块用于评估模型对于对抗样本的鲁棒性,并提供模型增强方法用于增强模型抗对抗样本攻击的能力,提升模型鲁棒性。对抗样本鲁棒性模块包含了4个子模块:对抗样本的生成、对抗样本的检测、模型防御、攻防评估。