程序员最近都爱上了这个网站  程序员们快来瞅瞅吧!  it98k网:it98k.com

本站消息

站长简介/公众号

  出租广告位,需要合作请联系站长

+关注
已关注

分类  

暂无分类

标签  

暂无标签

日期归档  

mixnet

发布于2019-08-22 19:46     阅读(975)     评论(0)     点赞(29)     收藏(5)


 

网络代码:

https://github.com/romulus0914/MixNet-Pytorch/blob/master/mixnet.py

 

https://github.com/rwightman/pytorch-image-models/blob/d4debe6597556be36645df18650285cfad1f7ae9/timm/models/gen_efficientnet.py

 

https://github.com/rwightman/pytorch-image-models

 

s:  512 和416 1070 28ms,

8 68ms

416 10 66ms

416 12就内存异常了。

 

m: 512 和416 1070 36ms,

8就内存异常了。

  1. #!/usr/bin/env python
  2. # -*- coding: utf-8 -*-
  3. import time
  4. import torch
  5. import torch.nn as nn
  6. import torch.nn.functional as F
  7. from torch.autograd import Variable
  8. import math
  9. class Swish(nn.Module):
  10. def __init__(self):
  11. super(Swish, self).__init__()
  12. self.sigmoid = nn.Sigmoid()
  13. def forward(self, x):
  14. return x * self.sigmoid(x)
  15. NON_LINEARITY = {
  16. 'ReLU': nn.ReLU(inplace=True),
  17. 'Swish': Swish(),
  18. }
  19. def _RoundChannels(c, divisor=8, min_value=None):
  20. if min_value is None:
  21. min_value = divisor
  22. new_c = max(min_value, int(c + divisor / 2) // divisor * divisor)
  23. if new_c < 0.9 * c:
  24. new_c += divisor
  25. return new_c
  26. def _SplitChannels(channels, num_groups):
  27. split_channels = [channels // num_groups for _ in range(num_groups)]
  28. split_channels[0] += channels - sum(split_channels)
  29. return split_channels
  30. def Conv3x3Bn(in_channels, out_channels, stride, non_linear='ReLU'):
  31. return nn.Sequential(
  32. nn.Conv2d(in_channels, out_channels, 3, stride, 1, bias=False),
  33. nn.BatchNorm2d(out_channels),
  34. NON_LINEARITY[non_linear]
  35. )
  36. def Conv1x1Bn(in_channels, out_channels, non_linear='ReLU'):
  37. return nn.Sequential(
  38. nn.Conv2d(in_channels, out_channels, 1, 1, 0, bias=False),
  39. nn.BatchNorm2d(out_channels),
  40. NON_LINEARITY[non_linear]
  41. )
  42. class SqueezeAndExcite(nn.Module):
  43. def __init__(self, channels, se_ratio):
  44. super(SqueezeAndExcite, self).__init__()
  45. squeeze_channels = channels * se_ratio
  46. if not squeeze_channels.is_integer():
  47. raise ValueError('channels must be divisible by 1/ratio')
  48. squeeze_channels = int(squeeze_channels)
  49. self.se_reduce = nn.Conv2d(channels, squeeze_channels, 1, 1, 0, bias=True)
  50. self.non_linear1 = NON_LINEARITY['Swish']
  51. self.se_expand = nn.Conv2d(squeeze_channels, channels, 1, 1, 0, bias=True)
  52. self.non_linear2 = nn.Sigmoid()
  53. def forward(self, x):
  54. y = torch.mean(x, (2, 3), keepdim=True)
  55. y = self.non_linear1(self.se_reduce(y))
  56. y = self.non_linear2(self.se_expand(y))
  57. y = x * y
  58. return y
  59. class MDConv(nn.Module):
  60. def __init__(self, channels, kernel_size, stride):
  61. super(MDConv, self).__init__()
  62. self.num_groups = len(kernel_size)
  63. self.split_channels = _SplitChannels(channels, self.num_groups)
  64. self.mixed_depthwise_conv = nn.ModuleList([])
  65. for i in range(self.num_groups):
  66. self.mixed_depthwise_conv.append(nn.Conv2d(
  67. self.split_channels[i],
  68. self.split_channels[i],
  69. kernel_size[i],
  70. stride=stride,
  71. padding=kernel_size[i] // 2,
  72. groups=self.split_channels[i],
  73. bias=False
  74. ))
  75. def forward(self, x):
  76. if self.num_groups == 1:
  77. return self.mixed_depthwise_conv[0](x)
  78. x_split = torch.split(x, self.split_channels, dim=1)
  79. x = [conv(t) for conv, t in zip(self.mixed_depthwise_conv, x_split)]
  80. x = torch.cat(x, dim=1)
  81. return x
  82. class MixNetBlock(nn.Module):
  83. def __init__(self, in_channels, out_channels, kernel_size, stride, expand_ratio, non_linear='ReLU', se_ratio=0.0):
  84. super(MixNetBlock, self).__init__()
  85. expand = (expand_ratio != 1)
  86. expand_channels = in_channels * expand_ratio
  87. se = (se_ratio != 0.0)
  88. self.residual_connection = (stride == 1 and in_channels == out_channels)
  89. conv = []
  90. if expand:
  91. # expansion phase
  92. pw_expansion = nn.Sequential(
  93. nn.Conv2d(in_channels, expand_channels, 1, 1, 0, bias=False),
  94. nn.BatchNorm2d(expand_channels),
  95. NON_LINEARITY[non_linear]
  96. )
  97. conv.append(pw_expansion)
  98. # depthwise convolution phase
  99. dw = nn.Sequential(
  100. MDConv(expand_channels, kernel_size, stride),
  101. nn.BatchNorm2d(expand_channels),
  102. NON_LINEARITY[non_linear]
  103. )
  104. conv.append(dw)
  105. if se:
  106. # squeeze and excite
  107. squeeze_excite = nn.Sequential(SqueezeAndExcite(expand_channels, se_ratio))
  108. conv.append(squeeze_excite)
  109. # projection phase
  110. pw_projection = nn.Sequential(
  111. nn.Conv2d(expand_channels, out_channels, 1, 1, 0, bias=False),
  112. nn.BatchNorm2d(out_channels)
  113. )
  114. conv.append(pw_projection)
  115. self.conv = nn.Sequential(*conv)
  116. def forward(self, x):
  117. if self.residual_connection:
  118. return x + self.conv(x)
  119. else:
  120. return self.conv(x)
  121. class MixNet(nn.Module):
  122. # [in_channels, out_channels, kernel_size, stride, expand_ratio, non_linear, se_ratio]
  123. mixnet_s = [(16, 16, [3], 1, 1, 'ReLU', 0.0),
  124. (16, 24, [3], 2, 6, 'ReLU', 0.0),
  125. (24, 24, [3], 1, 3, 'ReLU', 0.0),
  126. (24, 40, [3, 5, 7], 2, 6, 'Swish', 0.5),
  127. (40, 40, [3, 5], 1, 6, 'Swish', 0.5),
  128. (40, 40, [3, 5], 1, 6, 'Swish', 0.5),
  129. (40, 40, [3, 5], 1, 6, 'Swish', 0.5),
  130. (40, 80, [3, 5, 7], 2, 6, 'Swish', 0.25),
  131. (80, 80, [3, 5], 1, 6, 'Swish', 0.25),
  132. (80, 80, [3, 5], 1, 6, 'Swish', 0.25),
  133. (80, 120, [3, 5, 7], 1, 6, 'Swish', 0.5),
  134. (120, 120, [3, 5, 7, 9], 1, 3, 'Swish', 0.5),
  135. (120, 120, [3, 5, 7, 9], 1, 3, 'Swish', 0.5),
  136. (120, 200, [3, 5, 7, 9, 11], 2, 6, 'Swish', 0.5),
  137. (200, 200, [3, 5, 7, 9], 1, 6, 'Swish', 0.5),
  138. (200, 200, [3, 5, 7, 9], 1, 6, 'Swish', 0.5)]
  139. mixnet_m = [(24, 24, [3], 1, 1, 'ReLU', 0.0),
  140. (24, 32, [3, 5, 7], 2, 6, 'ReLU', 0.0),
  141. (32, 32, [3], 1, 3, 'ReLU', 0.0),
  142. (32, 40, [3, 5, 7, 9], 2, 6, 'Swish', 0.5),
  143. (40, 40, [3, 5], 1, 6, 'Swish', 0.5),
  144. (40, 40, [3, 5], 1, 6, 'Swish', 0.5),
  145. (40, 40, [3, 5], 1, 6, 'Swish', 0.5),
  146. (40, 80, [3, 5, 7], 2, 6, 'Swish', 0.25),
  147. (80, 80, [3, 5, 7, 9], 1, 6, 'Swish', 0.25),
  148. (80, 80, [3, 5, 7, 9], 1, 6, 'Swish', 0.25),
  149. (80, 80, [3, 5, 7, 9], 1, 6, 'Swish', 0.25),
  150. (80, 120, [3], 1, 6, 'Swish', 0.5),
  151. (120, 120, [3, 5, 7, 9], 1, 3, 'Swish', 0.5),
  152. (120, 120, [3, 5, 7, 9], 1, 3, 'Swish', 0.5),
  153. (120, 120, [3, 5, 7, 9], 1, 3, 'Swish', 0.5),
  154. (120, 200, [3, 5, 7, 9], 2, 6, 'Swish', 0.5),
  155. (200, 200, [3, 5, 7, 9], 1, 6, 'Swish', 0.5),
  156. (200, 200, [3, 5, 7, 9], 1, 6, 'Swish', 0.5),
  157. (200, 200, [3, 5, 7, 9], 1, 6, 'Swish', 0.5)]
  158. def __init__(self, net_type='mixnet_s', input_size=512, num_classes=1000, stem_channels=16, feature_size=1536,
  159. depth_multiplier=1.0):
  160. super(MixNet, self).__init__()
  161. if net_type == 'mixnet_s':
  162. config = self.mixnet_s
  163. stem_channels = 16
  164. dropout_rate = 0.2
  165. elif net_type == 'mixnet_m':
  166. config = self.mixnet_m
  167. stem_channels = 24
  168. dropout_rate = 0.25
  169. elif net_type == 'mixnet_l':
  170. config = self.mixnet_m
  171. stem_channels = 24
  172. depth_multiplier *= 1.3
  173. dropout_rate = 0.25
  174. else:
  175. raise TypeError('Unsupported MixNet type')
  176. assert input_size % 32 == 0
  177. # depth multiplier
  178. if depth_multiplier != 1.0:
  179. stem_channels = _RoundChannels(stem_channels * depth_multiplier)
  180. for i, conf in enumerate(config):
  181. conf_ls = list(conf)
  182. conf_ls[0] = _RoundChannels(conf_ls[0] * depth_multiplier)
  183. conf_ls[1] = _RoundChannels(conf_ls[1] * depth_multiplier)
  184. config[i] = tuple(conf_ls)
  185. # stem convolution
  186. self.stem_conv = Conv3x3Bn(3, stem_channels, 2)
  187. # building MixNet blocks
  188. layers = []
  189. for in_channels, out_channels, kernel_size, stride, expand_ratio, non_linear, se_ratio in config:
  190. layers.append(
  191. MixNetBlock(in_channels, out_channels, kernel_size, stride, expand_ratio, non_linear, se_ratio))
  192. self.layers = nn.Sequential(*layers)
  193. # last several layers
  194. self.head_conv = Conv1x1Bn(config[-1][1], feature_size)
  195. # self.avgpool = nn.AvgPool2d(input_size // 32, stride=1)
  196. # self.classifier = nn.Linear(feature_size, num_classes)
  197. # self.dropout = nn.Dropout(dropout_rate)
  198. self._initialize_weights()
  199. def forward(self, x):
  200. x = self.stem_conv(x)
  201. x = self.layers(x)
  202. x = self.head_conv(x)
  203. # x = self.avgpool(x)
  204. # x = x.view(x.size(0), -1)
  205. # x = self.classifier(x)
  206. # x = self.dropout(x)
  207. return x
  208. def _initialize_weights(self):
  209. for m in self.modules():
  210. if isinstance(m, nn.Conv2d):
  211. n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
  212. m.weight.data.normal_(0, math.sqrt(2.0 / n))
  213. if m.bias is not None:
  214. m.bias.data.zero_()
  215. elif isinstance(m, nn.BatchNorm2d):
  216. m.weight.data.fill_(1)
  217. m.bias.data.zero_()
  218. elif isinstance(m, nn.Linear):
  219. n = m.weight.size(1)
  220. m.weight.data.normal_(0, 0.01)
  221. m.bias.data.zero_()
  222. if __name__ == '__main__':
  223. model = MixNet(input_size=416)
  224. model.cuda()
  225. model.eval()
  226. x = torch.rand(10, 3, 416, 416).cuda()
  227. for i in range(20):
  228. time1 = time.time()
  229. # out3, out4, out5 = model(x)
  230. out3 = model(x)
  231. print("time", time.time() - time1,out3.size())
  232. # print(out4.size())
  233. # print(out5.size())

 



所属网站分类: 技术文章 > 博客

作者:ryuer8423

链接:https://www.pythonheidong.com/blog/article/53509/41ab1b10b0a81f483f0b/

来源:python黑洞网

任何形式的转载都请注明出处,如有侵权 一经发现 必将追究其法律责任

29 0
收藏该文
已收藏

评论内容:(最多支持255个字符)