MaskRCNN-Benchmark训练自己的数据集

发布于2019-08-06 10:27 阅读(1673) 评论(0) 点赞(1) 收藏(5)

这篇文章主要介绍记录使用Maskrcnn-Benchmark（连接官网）的训练自己的数据的心得，还算比较顺利。有问题，希望大佬指出，共同进步

1、安装

$ conda create --name maskrcnn_benchmark
$ source activate maskrcnn_benchmark

# this installs the right pip and dependencies for the fresh python
$ conda install ipython

# maskrnn_benchmark and coco api dependencies
$ pip install ninja yacs cython matplotlib

# follow PyTorch installation in https://pytorch.org/get-started/locally/
# we give the instructions for CUDA 9.0
$ conda install pytorch-nightly -c pytorch

# install torchvision
$ cd ~/github
$ git clone https://github.com/pytorch/vision.git
$ cd vision
$ python setup.py install

# install pycocotools
$ cd ~/github
$ git clone https://github.com/cocodataset/cocoapi.git
$ cd cocoapi/PythonAPI
$ python setup.py build_ext install

# install PyTorch Detection
$ cd ~/github
$ git clone https://github.com/facebookresearch/maskrcnn-benchmark.git
$ cd maskrcnn-benchmark
$ python setup.py build develop

cd demo
# by default, it runs on the GPU
# for best results, use min-image-size 800
python webcam.py --min-image-size 800
# can also run it on the CPU
python webcam.py --min-image-size 300 MODEL.DEVICE cpu
# or change the model that you want to use
python webcam.py --config-file ../configs/caffe2/e2e_mask_rcnn_R_101_FPN_1x_caffe2.yaml --min-image-size 300 MODEL.DEVICE cpu
# in order to see the probability heatmaps, pass --show-mask-heatmaps
python webcam.py --min-image-size 300 --show-mask-heatmaps MODEL.DEVICE cpu

2、数据准备：

import xml.etree.ElementTree as ET
import os
import json

coco = dict()
coco['images'] = []
coco['type'] = 'instances'
coco['annotations'] = []
coco['categories'] = []

category_set = dict()
image_set = set()

category_item_id = 0
image_id = 20190000000
annotation_id = 0


def addCatItem(name):
    global category_item_id
    category_item = dict()
    category_item['supercategory'] = 'none'
    category_item_id += 1
    category_item['id'] = category_item_id
    category_item['name'] = name
    coco['categories'].append(category_item)
    category_set[name] = category_item_id
    return category_item_id


def addImgItem(file_name, size):
    global image_id
    if file_name is None:
        raise Exception('Could not find filename tag in xml file.')
    if size['width'] is None:
        raise Exception('Could not find width tag in xml file.')
    if size['height'] is None:
        raise Exception('Could not find height tag in xml file.')
    image_id += 1
    image_item = dict()
    image_item['id'] = image_id
    image_item['file_name'] = file_name
    image_item['width'] = size['width']
    image_item['height'] = size['height']
    coco['images'].append(image_item)
    image_set.add(file_name)
    return image_id


def addAnnoItem(object_name, image_id, category_id, bbox):
    global annotation_id
    annotation_item = dict()
    annotation_item['segmentation'] = []
    seg = []
    # bbox[] is x,y,w,h
    # left_top
    seg.append(bbox[0])
    seg.append(bbox[1])
    # left_bottom
    seg.append(bbox[0])
    seg.append(bbox[1] + bbox[3])
    # right_bottom
    seg.append(bbox[0] + bbox[2])
    seg.append(bbox[1] + bbox[3])
    # right_top
    seg.append(bbox[0] + bbox[2])
    seg.append(bbox[1])

    annotation_item['segmentation'].append(seg)

    annotation_item['area'] = bbox[2] * bbox[3]
    annotation_item['iscrowd'] = 0
    annotation_item['ignore'] = 0
    annotation_item['image_id'] = image_id
    annotation_item['bbox'] = bbox
    annotation_item['category_id'] = category_id
    annotation_id += 1
    annotation_item['id'] = annotation_id
    coco['annotations'].append(annotation_item)


def parseXmlFiles(xml_path):
    for f in os.listdir(xml_path):
        if not f.endswith('.xml'):
            continue

        bndbox = dict()
        size = dict()
        current_image_id = None
        current_category_id = None
        file_name = None
        size['width'] = None
        size['height'] = None
        size['depth'] = None

        xml_file = os.path.join(xml_path, f)
        print(xml_file)

        tree = ET.parse(xml_file)
        root = tree.getroot() #抓根结点元素

        if root.tag != 'annotation': #根节点标签
            raise Exception('pascal voc xml root element should be annotation, rather than {}'.format(root.tag))

        # elem is <folder>, <filename>, <size>, <object>
        for elem in root:
            current_parent = elem.tag
            current_sub = None
            object_name = None

            #elem.tag, elem.attrib，elem.text
            if elem.tag == 'folder':
                continue

            if elem.tag == 'filename':
                file_name = elem.text
                if file_name in category_set:
                    raise Exception('file_name duplicated')

            # add img item only after parse <size> tag
            elif current_image_id is None and file_name is not None and size['width'] is not None:
                if file_name not in image_set:
                    current_image_id = addImgItem(file_name, size)#图片信息
                    print('add image with {} and {}'.format(file_name, size))
                else:
                    raise Exception('duplicated image: {}'.format(file_name))
                    # subelem is <width>, <height>, <depth>, <name>, <bndbox>
            for subelem in elem:
                bndbox['xmin'] = None
                bndbox['xmax'] = None
                bndbox['ymin'] = None
                bndbox['ymax'] = None

                current_sub = subelem.tag
                if current_parent == 'object' and subelem.tag == 'name':
                    object_name = subelem.text
                    if object_name not in category_set:
                        current_category_id = addCatItem(object_name)
                    else:
                        current_category_id = category_set[object_name]

                elif current_parent == 'size':
                    if size[subelem.tag] is not None:
                        raise Exception('xml structure broken at size tag.')
                    size[subelem.tag] = int(subelem.text)

                # option is <xmin>, <ymin>, <xmax>, <ymax>, when subelem is <bndbox>
                for option in subelem:
                    if current_sub == 'bndbox':
                        if bndbox[option.tag] is not None:
                            raise Exception('xml structure corrupted at bndbox tag.')
                        bndbox[option.tag] = int(option.text)

                # only after parse the <object> tag
                if bndbox['xmin'] is not None:
                    if object_name is None:
                        raise Exception('xml structure broken at bndbox tag')
                    if current_image_id is None:
                        raise Exception('xml structure broken at bndbox tag')
                    if current_category_id is None:
                        raise Exception('xml structure broken at bndbox tag')
                    bbox = []
                    # x
                    bbox.append(bndbox['xmin'])
                    # y
                    bbox.append(bndbox['ymin'])
                    # w
                    bbox.append(bndbox['xmax'] - bndbox['xmin'])
                    # h
                    bbox.append(bndbox['ymax'] - bndbox['ymin'])
                    print(
                    'add annotation with {},{},{},{}'.format(object_name, current_image_id, current_category_id, bbox))
                    addAnnoItem(object_name, current_image_id, current_category_id, bbox)


if __name__ == '__main__':
    xml_path = 'C:/Users/Desktop/VOCdevkit/train'
    json_file = 'C:/Users/Desktop/VOCdevkit/instance_train2014.json'
    parseXmlFiles(xml_path)
    json.dump(coco, open(json_file, 'w'))

通过上面的代码可以生成自己的instances_train2104.json和instances_val2014.json后，建立datasets文件，在建立coco文件（annotations（存放生成的json文件）与 train2014和val2014文件（存放训练测试数据集）

3、配置文件：

建立experiment文件夹存放cfg 与result

模型配置文件:提取configs/e2e_faster_rcnn_R_50_FPN_1x.yaml
e2e_faster_rcnn_R_50_FPN_1x.yaml文件：

MODEL:
  META_ARCHITECTURE: "GeneralizedRCNN"
  WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50"
  BACKBONE:
    CONV_BODY: "R-50-FPN"
  RESNETS:
    BACKBONE_OUT_CHANNELS: 256
  RPN:
    USE_FPN: True    #是否使用FPN,也就是特征金字塔结构,选择True将在不同的特征图提取候选区域
    ANCHOR_STRIDE: (4, 8, 16, 32, 64)# anchor的步长
    PRE_NMS_TOP_N_TRAIN: 2000   #训练中nms之前的候选区的数量
    PRE_NMS_TOP_N_TEST: 1000    #测试时，nms之后的候选框数量
    POST_NMS_TOP_N_TEST: 1000
    FPN_POST_NMS_TOP_N_TEST: 1000
  ROI_HEADS:
    USE_FPN: True
  ROI_BOX_HEAD:
    POOLER_RESOLUTION: 7
    POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
    POOLER_SAMPLING_RATIO: 2
    FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor"
    PREDICTOR: "FPNPredictor"
DATASETS:
  TRAIN: ("coco_2014_train", )#训练的数据文件
  TEST: ("coco_2014_val",)#测试的数据文件，和paths_catalog.py中DATASETS相对应
DATALOADER:
  SIZE_DIVISIBILITY: 32
SOLVER:
  BASE_LR: 0.02 #起始学习率,学习率的调整有多种策略,訪框架自定义了一种策略
  WEIGHT_DECAY: 0.0001
  STEPS: (5000, 8000)
  MAX_ITER: 10000
OUTPUT_DIR : /home/yb/maskrcnn-benchmark/experiments/result   #设置的输出目录

如果训练e2e_mask_rcnn_R_50_FPN_1x.yaml文件：

  MODEL:
    META_ARCHITECTURE: "GeneralizedRCNN"
    WEIGHT: "maskrcnn-benchmark-master/weights/R-50.pkl"  # 预训练模型路径
    BACKBONE:
      CONV_BODY: "R-50-FPN"  # 网络结构
      OUT_CHANNELS: 256
    RPN:
      USE_FPN: True  # 是否使用FPN,也就是特征金字塔结构,选择True将在不同的特征图提取候选区域
      ANCHOR_STRIDE: (4, 8, 16, 32, 64)  # ANCHOR的步长
      PRE_NMS_TOP_N_TRAIN: 2000  # 训练时,NMS之前的候选区数量
      PRE_NMS_TOP_N_TEST: 1000  # 测试时,NMS之后的候选区数量
      POST_NMS_TOP_N_TEST: 1000
      FPN_POST_NMS_TOP_N_TEST: 1000
    ROI_HEADS:
      USE_FPN: True
    ROI_BOX_HEAD:
      POOLER_RESOLUTION: 7
      POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
      POOLER_SAMPLING_RATIO: 2
      FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor"
      PREDICTOR: "FPNPredictor"
    ROI_MASK_HEAD:
      POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
      FEATURE_EXTRACTOR: "MaskRCNNFPNFeatureExtractor"
      PREDICTOR: "MaskRCNNC4Predictor"
      POOLER_RESOLUTION: 14
      POOLER_SAMPLING_RATIO: 2
      RESOLUTION: 28
      SHARE_BOX_FEATURE_EXTRACTOR: False
    MASK_ON: True  # 是否使用语义分割功能
  DATASETS:  # 使用的数据，对应前面的paths_catalog.py
    TRAIN: ("coco_2014_train", "coco_2014_val")
    TEST: ("coco_2014_test",)
  DATALOADER:
    SIZE_DIVISIBILITY: 32
  SOLVER:
    IMS_PER_BATCH: 2  # GPU每次训练的图片数
    BASE_LR: 0.0025  # 初始学习速率
    WEIGHT_DECAY: 0.0001  # 学习速率衰减大小
    STEPS: (30000, 40000)  # 学习速率衰减策略（这个博主不太明白，就使用的默认的）
    MAX_ITER: 60000  # 最大迭代次数
  INPUT:
    MIN_SIZE_TRAIN: 300  # 最大图片尺寸
    MAX_SIZE_TRAIN: 300  # 最小图片尺寸
  TEST:
    IMS_PER_BATCH: 1  
  OUTPUT_DIR: "output"  # 输出文件夹

MaskRCNN-Benchmark框架配置文件：maskrcnn_benchmark/config/defaults.py

import os
from yacs.config import CfgNode as CN
_C = CN()
_C.MODEL = CN()
_C.MODEL.RPN_ONLY = False
_C.MODEL.MASK_ON = False
_C.MODEL.DEVICE = "cuda" 
_C.MODEL.META_ARCHITECTURE = "GeneralizedRCNN"
_C.MODEL.WEIGHT = ""
_C.INPUT = CN()
_C.INPUT.MIN_SIZE_TRAIN = 800  #训练集图片最小尺寸
_C.INPUT.MAX_SIZE_TRAIN = 1333 #训练集图片最大尺寸
_C.INPUT.MIN_SIZE_TEST = 800
_C.INPUT.MAX_SIZE_TEST = 1333
_C.INPUT.PIXEL_MEAN = [102.9801, 115.9465, 122.7717]
_C.INPUT.PIXEL_STD = [1., 1., 1.]
_C.INPUT.TO_BGR255 = True
_C.DATASETS = CN()
_C.DATASETS.TRAIN = () #在模型配置文件中已给出
_C.DATASETS.TEST = ()
_C.DATALOADER = CN()
_C.DATALOADER.NUM_WORKERS = 4 #数据生成启线程数
_C.DATALOADER.SIZE_DIVISIBILITY = 0
_C.DATALOADER.ASPECT_RATIO_GROUPING = True
_C.MODEL.BACKBONE = CN()
_C.MODEL.BACKBONE.CONV_BODY = "R-50-C4"
_C.MODEL.BACKBONE.FREEZE_CONV_BODY_AT = 2
_C.MODEL.BACKBONE.OUT_CHANNELS = 256 * 4
_C.MODEL.RPN = CN()
_C.MODEL.RPN.USE_FPN = False
_C.MODEL.RPN.ANCHOR_SIZES = (32, 64, 128, 256, 512)
_C.MODEL.RPN.ANCHOR_STRIDE = (16,)
_C.MODEL.RPN.ASPECT_RATIOS = (0.5, 1.0, 2.0)
_C.MODEL.RPN.STRADDLE_THRESH = 0
_C.MODEL.RPN.FG_IOU_THRESHOLD = 0.7
_C.MODEL.RPN.BG_IOU_THRESHOLD = 0.3
_C.MODEL.RPN.BATCH_SIZE_PER_IMAGE = 256
_C.MODEL.RPN.POSITIVE_FRACTION = 0.5
_C.MODEL.RPN.PRE_NMS_TOP_N_TRAIN = 12000
_C.MODEL.RPN.PRE_NMS_TOP_N_TEST = 6000
_C.MODEL.RPN.POST_NMS_TOP_N_TRAIN = 2000
_C.MODEL.RPN.POST_NMS_TOP_N_TEST = 1000
_C.MODEL.RPN.NMS_THRESH = 0.7
_C.MODEL.RPN.MIN_SIZE = 0
_C.MODEL.RPN.FPN_POST_NMS_TOP_N_TRAIN = 2000
_C.MODEL.RPN.FPN_POST_NMS_TOP_N_TEST = 2000
_C.MODEL.ROI_HEADS = CN()
_C.MODEL.ROI_HEADS.USE_FPN = False
_C.MODEL.ROI_HEADS.FG_IOU_THRESHOLD = 0.5
_C.MODEL.ROI_HEADS.BG_IOU_THRESHOLD = 0.5
_C.MODEL.ROI_HEADS.BBOX_REG_WEIGHTS = (10., 10., 5., 5.)
_C.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE = 512
_C.MODEL.ROI_HEADS.POSITIVE_FRACTION = 0.25
_C.MODEL.ROI_HEADS.SCORE_THRESH = 0.05
_C.MODEL.ROI_HEADS.NMS = 0.5
_C.MODEL.ROI_HEADS.DETECTIONS_PER_IMG = 100
_C.MODEL.ROI_BOX_HEAD = CN()
_C.MODEL.ROI_BOX_HEAD.FEATURE_EXTRACTOR = "ResNet50Conv5ROIFeatureExtractor"
_C.MODEL.ROI_BOX_HEAD.PREDICTOR = "FastRCNNPredictor"
_C.MODEL.ROI_BOX_HEAD.POOLER_RESOLUTION = 14
_C.MODEL.ROI_BOX_HEAD.POOLER_SAMPLING_RATIO = 0
_C.MODEL.ROI_BOX_HEAD.POOLER_SCALES = (1.0 / 16,)
#数据集类别数,默认是81,因为coco数据集为80+1(背景),我的数据集只有1个类别,加上背景共2个类别
_C.MODEL.ROI_BOX_HEAD.NUM_CLASSES = 2
_C.MODEL.ROI_BOX_HEAD.MLP_HEAD_DIM = 1024
_C.MODEL.ROI_MASK_HEAD = CN()
_C.MODEL.ROI_MASK_HEAD.FEATURE_EXTRACTOR = "ResNet50Conv5ROIFeatureExtractor"
_C.MODEL.ROI_MASK_HEAD.PREDICTOR = "MaskRCNNC4Predictor"
_C.MODEL.ROI_MASK_HEAD.POOLER_RESOLUTION = 14
_C.MODEL.ROI_MASK_HEAD.POOLER_SAMPLING_RATIO = 0
_C.MODEL.ROI_MASK_HEAD.POOLER_SCALES = (1.0 / 16,)
_C.MODEL.ROI_MASK_HEAD.MLP_HEAD_DIM = 1024
_C.MODEL.ROI_MASK_HEAD.CONV_LAYERS = (256, 256, 256, 256)
_C.MODEL.ROI_MASK_HEAD.RESOLUTION = 14
_C.MODEL.ROI_MASK_HEAD.SHARE_BOX_FEATURE_EXTRACTOR = True
_C.MODEL.RESNETS = CN()
_C.MODEL.RESNETS.NUM_GROUPS = 1
_C.MODEL.RESNETS.WIDTH_PER_GROUP = 64
_C.MODEL.RESNETS.STRIDE_IN_1X1 = True
_C.MODEL.RESNETS.TRANS_FUNC = "BottleneckWithFixedBatchNorm"
_C.MODEL.RESNETS.STEM_FUNC = "StemWithFixedBatchNorm"
_C.MODEL.RESNETS.RES5_DILATION = 1
_C.MODEL.RESNETS.RES2_OUT_CHANNELS = 256
_C.MODEL.RESNETS.STEM_OUT_CHANNELS = 64
_C.SOLVER = CN()
_C.SOLVER.MAX_ITER = 40000 #最大迭代次数
_C.SOLVER.BASE_LR = 0.02 #初始学习率,这个通常在模型配置文件中有设置
_C.SOLVER.BIAS_LR_FACTOR = 2
_C.SOLVER.MOMENTUM = 0.9
_C.SOLVER.WEIGHT_DECAY = 0.0005
_C.SOLVER.WEIGHT_DECAY_BIAS = 0
_C.SOLVER.GAMMA = 0.1
_C.SOLVER.STEPS = (30000,)
_C.SOLVER.WARMUP_FACTOR = 1.0 / 3
_C.SOLVER.WARMUP_ITERS = 500 #预热迭代次数,预热迭代次数内(小于訪值)的学习率比较低
_C.SOLVER.WARMUP_METHOD = "constant" #预热策略,有'constant'和'linear'两种
_C.SOLVER.CHECKPOINT_PERIOD = 2000 #生成检查点(checkpoint)的步长
_C.SOLVER.IMS_PER_BATCH = 1 #一个batch包含的图片数量
_C.TEST = CN()
_C.TEST.EXPECTED_RESULTS = []
_C.TEST.EXPECTED_RESULTS_SIGMA_TOL = 4
_C.TEST.IMS_PER_BATCH = 1
_C.OUTPUT_DIR = "output" #主要作为checkpoint和inference的输出目录
_C.PATHS_CATALOG = os.path.join(os.path.dirname(__file__), "paths_catalog.py")

4、数据路径配置文件：maskrcnn_benchmark/config/paths_catalog.py

修改path_catalog.py文件DatasetCatalog类

class DatasetCatalog(object):
    DATA_DIR = "datasets"
 
    DATASETS = {
        "coco_2014_train": (
            "coco/train2014", 
            "coco/annotations/instances_train2014.json", # 标注文件路径
        ),
        "coco_2014_val": (
            "coco/val2014", #同上
            "coco/annotations/instances_val2014.json" #同上
        ),
    }

5、在demo文件中更改predictor.py为自己的类别（背景类加预测类）

6、开始训练：

cd maskrcnn-benchmark
python tools/train_net.py --config-file experiments/cfgs/e2e_faster_rcnn_R_50_FPN_1x.yaml

7、验证结果：

指定模型配置文件,执行测试启动脚本

python tools/test_net.py --config-file experiments/cfgs/e2e_faster_rcnn_R_50_FPN_1x.yaml

8、测试图像,该脚本放到demo下：

#!--*-- coding:utf-8 --*--

import matplotlib.pyplot as plt
import matplotlib.pylab as pylab

import requests
from io import BytesIO
from PIL import Image
import numpy as np

pylab.rcParams['figure.figsize'] = 20, 12

from maskrcnn_benchmark.config import cfg
from predictor import COCODemo

# 参数配置文件
config_file = "/home/maskrcnn-benchmark/experiments/cfgs/e2e_faster_rcnn_R_50_FPN_1X.yaml"#自己配置的cfg文件

cfg.merge_from_file(config_file)
cfg.merge_from_list(["MODEL.DEVICE", "cpu"])
cfg.MODEL.WEIGHT = '../pretrained/e2e_mask_rcnn_R_50_FPN_1x.pth'

coco_demo = COCODemo(cfg, min_image_size=800, confidence_threshold=0.7, )

if False:
# imgurl = "http://farm3.staticflickr.com/2469/3915380994_2e611b1779_z.jpg"
# response = requests.get(imgurl)
# pil_image = Image.open(BytesIO(response.content)).convert("RGB")
else:
    imgfile = './test/1.jpg'
    pil_image = Image.open(imgfile).convert("RGB")

image = np.array(pil_image)[:, :, [2, 1, 0]]

# forward predict
predictions = coco_demo.run_on_opencv_image(image)

# vis
plt.subplot(1, 2, 1)
plt.imshow(image[:, :, ::-1])
plt.axis('off')

plt.subplot(1, 2, 2)
plt.imshow(predictions[:, :, ::-1])
plt.axis('off')
plt.show()

参考链接：https://blog.csdn.net/ChuiGeDaQiQiu/article/details/83868512

https://blog.csdn.net/x572722344/article/details/84934188