# Copyright (c) Microsoft Corporation. # Licensed under the MIT License. # Written by Hao Du and Houwen Peng # email: haodu8-c@my.cityu.edu.hk and houwen.peng@microsoft.com from ...utils.builder_util import * from ...utils.search_structure_supernet import * from ...utils.op_by_layer_dict import flops_op_dict from ..builders.build_supernet import * from timm.models.layers import SelectAdaptivePool2d from timm.models.layers.activations import hard_sigmoid class SuperNet(nn.Module): def __init__( self, block_args, choices, num_classes=1000, in_chans=3, stem_size=16, num_features=1280, head_bias=True, channel_multiplier=1.0, pad_type='', act_layer=nn.ReLU, drop_rate=0., drop_path_rate=0., slice=4, se_kwargs=None, norm_layer=nn.BatchNorm2d, logger=None, norm_kwargs=None, global_pool='avg', resunit=False, dil_conv=False, verbose=False): super(SuperNet, self).__init__() self.num_classes = num_classes self.num_features = num_features self.drop_rate = drop_rate self._in_chs = in_chans self.logger = logger # Stem stem_size = round_channels(stem_size, channel_multiplier) self.conv_stem = create_conv2d( self._in_chs, stem_size, 3, stride=2, padding=pad_type) self.bn1 = norm_layer(stem_size, **norm_kwargs) self.act1 = act_layer(inplace=True) self._in_chs = stem_size # Middle stages (IR/ER/DS Blocks) builder = SuperNetBuilder( choices, channel_multiplier, 8, None, 32, pad_type, act_layer, se_kwargs, norm_layer, norm_kwargs, drop_path_rate, verbose=verbose, resunit=resunit, dil_conv=dil_conv, logger=self.logger) blocks = builder(self._in_chs, block_args) self.blocks = nn.Sequential(*blocks) self._in_chs = builder.in_chs # Head + Pooling self.global_pool = SelectAdaptivePool2d(pool_type=global_pool) self.conv_head = create_conv2d( self._in_chs, self.num_features, 1, padding=pad_type, bias=head_bias) self.act2 = act_layer(inplace=True) # Classifier self.classifier = nn.Linear( self.num_features * self.global_pool.feat_mult(), self.num_classes) self.meta_layer = nn.Linear(self.num_classes * slice, 1) efficientnet_init_weights(self) def get_classifier(self): return self.classifier def reset_classifier(self, num_classes, global_pool='avg'): self.global_pool = SelectAdaptivePool2d(pool_type=global_pool) self.num_classes = num_classes self.classifier = nn.Linear( self.num_features * self.global_pool.feat_mult(), num_classes) if self.num_classes else None def forward_features(self, x): x = self.conv_stem(x) x = self.bn1(x) x = self.act1(x) x = self.blocks(x) x = self.global_pool(x) x = self.conv_head(x) x = self.act2(x) return x def forward(self, x): x = self.forward_features(x) x = x.flatten(1) if self.drop_rate > 0.: x = F.dropout(x, p=self.drop_rate, training=self.training) return self.classifier(x) def forward_meta(self, features): return self.meta_layer(features.view(1, -1)) def rand_parameters(self, architecture, meta=False): for name, param in self.named_parameters(recurse=True): if 'meta' in name and meta: yield param elif 'blocks' not in name and 'meta' not in name and (not meta): yield param if not meta: for layer, layer_arch in zip(self.blocks, architecture): for blocks, arch in zip(layer, layer_arch): if arch == -1: continue for name, param in blocks[arch].named_parameters( recurse=True): yield param class Classifier(nn.Module): def __init__(self, num_classes=1000): super(Classifier, self).__init__() self.classifier = nn.Linear(num_classes, num_classes) def forward(self, x): return self.classifier(x) def gen_supernet(flops_minimum=0, flops_maximum=600, **kwargs): choices = {'kernel_size': [3, 5, 7], 'exp_ratio': [4, 6]} num_features = 1280 # act_layer = HardSwish act_layer = Swish arch_def = [ # stage 0, 112x112 in ['ds_r1_k3_s1_e1_c16_se0.25'], # stage 1, 112x112 in ['ir_r1_k3_s2_e4_c24_se0.25', 'ir_r1_k3_s1_e4_c24_se0.25', 'ir_r1_k3_s1_e4_c24_se0.25', 'ir_r1_k3_s1_e4_c24_se0.25'], # stage 2, 56x56 in ['ir_r1_k5_s2_e4_c40_se0.25', 'ir_r1_k5_s1_e4_c40_se0.25', 'ir_r1_k5_s2_e4_c40_se0.25', 'ir_r1_k5_s2_e4_c40_se0.25'], # stage 3, 28x28 in ['ir_r1_k3_s2_e6_c80_se0.25', 'ir_r1_k3_s1_e4_c80_se0.25', 'ir_r1_k3_s1_e4_c80_se0.25', 'ir_r2_k3_s1_e4_c80_se0.25'], # stage 4, 14x14in ['ir_r1_k3_s1_e6_c96_se0.25', 'ir_r1_k3_s1_e6_c96_se0.25', 'ir_r1_k3_s1_e6_c96_se0.25', 'ir_r1_k3_s1_e6_c96_se0.25'], # stage 5, 14x14in ['ir_r1_k5_s2_e6_c192_se0.25', 'ir_r1_k5_s1_e6_c192_se0.25', 'ir_r1_k5_s2_e6_c192_se0.25', 'ir_r1_k5_s2_e6_c192_se0.25'], # stage 6, 7x7 in ['cn_r1_k1_s1_c320_se0.25'], ] sta_num, arch_def, resolution = search_for_layer( flops_op_dict, arch_def, flops_minimum, flops_maximum) if sta_num is None or arch_def is None or resolution is None: raise ValueError('Invalid FLOPs Settings') model_kwargs = dict( block_args=decode_arch_def(arch_def), choices=choices, num_features=num_features, stem_size=16, norm_kwargs=resolve_bn_args(kwargs), act_layer=act_layer, se_kwargs=dict( act_layer=nn.ReLU, gate_fn=hard_sigmoid, reduce_mid=True, divisor=8), **kwargs, ) model = SuperNet(**model_kwargs) return model, sta_num, resolution, arch_def