fix(mge/data): fix an nan bug in coco dataset

GitOrigin-RevId: 689fbaafa5
5 years ago · d8d3f405bb
--- a/python_module/megengine/data/dataset/vision/coco.py
+++ b/python_module/megengine/data/dataset/vision/coco.py
@@ -14,7 +14,7 @@
 # ---------------------------------------------------------------------
 import json
 import os
 from collections import OrderedDict, defaultdict
 from collections import defaultdict

 import cv2
 import numpy as np
@@ -28,26 +28,21 @@ def _count_visible_keypoints(anno):
    return sum(sum(1 for v in ann["keypoints"][2::3] if v > 0) for ann in anno)


 def _has_only_empty_bbox(anno):
    return all(any(o <= 0 for o in obj["bbox"][2:]) for obj in anno)


 def has_valid_annotation(anno):
 def has_valid_annotation(anno, order):
    # if it"s empty, there is no annotation
    if len(anno) == 0:
        return False
    # if all boxes have close to zero area, there is no annotation
    if _has_only_empty_bbox(anno):
        return False
    # keypoints task have a slight different critera for considering
    # if an annotation is valid
    if "keypoints" not in anno[0]:
        return True
    # for keypoint detection tasks, only consider valid images those
    # containing at least min_keypoints_per_image
    if _count_visible_keypoints(anno) >= min_keypoints_per_image:
        return True
    return False
    if "boxes" in order or "boxes_category" in order:
        if "bbox" not in anno[0]:
            return False
    if "keypoints" in order:
        if "keypoints" not in anno[0]:
            return False
        # for keypoint detection tasks, only consider valid images those
        # containing at least min_keypoints_per_image
        if _count_visible_keypoints(anno) < min_keypoints_per_image:
            return False
    return True


 class COCO(VisionDataset):
@@ -58,8 +53,8 @@ class COCO(VisionDataset):
        "image",
        "boxes",
        "boxes_category",
        "keypoints",
        # TODO: need to check
        # "keypoints",
        # "polygons",
        "info",
    )
@@ -72,7 +67,7 @@ class COCO(VisionDataset):
        with open(ann_file, "r") as f:
            dataset = json.load(f)

        self.imgs = OrderedDict()
        self.imgs = dict()
        for img in dataset["images"]:
            # for saving memory
            if "license" in img:
@@ -98,7 +93,7 @@ class COCO(VisionDataset):
                del ann["segmentation"]
            self.img_to_anns[ann["image_id"]].append(ann)

        self.cats = OrderedDict()
        self.cats = dict()
        for cat in dataset["categories"]:
            self.cats[cat["id"]] = cat

@@ -109,8 +104,17 @@ class COCO(VisionDataset):
            ids = []
            for img_id in self.ids:
                anno = self.img_to_anns[img_id]
                if has_valid_annotation(anno):
                # filter crowd annotations
                anno = [obj for obj in anno if obj["iscrowd"] == 0]
                anno = [
                    obj for obj in anno if obj["bbox"][2] > 0 and obj["bbox"][3] > 0
                ]
                if has_valid_annotation(anno, order):
                    ids.append(img_id)
                    self.img_to_anns[img_id] = anno
                else:
                    del self.imgs[img_id]
                    del self.img_to_anns[img_id]
            self.ids = ids

        self.json_category_id_to_contiguous_id = {
@@ -125,11 +129,6 @@ class COCO(VisionDataset):
        img_id = self.ids[index]
        anno = self.img_to_anns[img_id]

        # filter crowd annotations
        anno = [obj for obj in anno if obj["iscrowd"] == 0]
        # filter empty annotations
        anno = [obj for obj in anno if obj["area"] > 0]

        target = []
        for k in self.order:
            if k == "image":
@@ -181,7 +180,6 @@ class COCO(VisionDataset):
        return img_info

    class_names = (
        "background",
        "person",
        "bicycle",
        "car",
--- a/python_module/megengine/data/dataset/vision/objects365.py
+++ b/python_module/megengine/data/dataset/vision/objects365.py
@@ -69,7 +69,9 @@ class Objects365(VisionDataset):
                anno = self.img_to_anns[img_id]
                # filter crowd annotations
                anno = [obj for obj in anno if obj["iscrowd"] == 0]
                anno = [obj for obj in anno if obj["bbox"][2] > 0 and obj["bbox"][3] > 0]
                anno = [
                    obj for obj in anno if obj["bbox"][2] > 0 and obj["bbox"][3] > 0
                ]
                if len(anno) > 0:
                    ids.append(img_id)
                    self.img_to_anns[img_id] = anno
--- a/python_module/megengine/data/dataset/vision/voc.py
+++ b/python_module/megengine/data/dataset/vision/voc.py
@@ -37,7 +37,9 @@ class PascalVOC(VisionDataset):

    def __init__(self, root, image_set, *, order=None):
        if ("boxes" in order or "boxes_category" in order) and "mask" in order:
            raise ValueError("PascalVOC only supports boxes & boxes_category or mask, not both.")
            raise ValueError(
                "PascalVOC only supports boxes & boxes_category or mask, not both."
            )

        super().__init__(root, order=order, supported_order=self.supported_order)

@@ -48,13 +50,15 @@ class PascalVOC(VisionDataset):
        image_dir = os.path.join(self.root, "JPEGImages")

        if "boxes" in order or "boxes_category" in order:
            annotation_dir = os.path.join(self.root, 'Annotations')
            annotation_dir = os.path.join(self.root, "Annotations")
            splitdet_dir = os.path.join(self.root, "ImageSets/Main")
            split_f = os.path.join(splitdet_dir, image_set.rstrip("\n") + ".txt")
            with open(os.path.join(split_f), "r") as f:
                self.file_names = [x.strip() for x in f.readlines()]
            self.images = [os.path.join(image_dir, x + ".jpg") for x in self.file_names]
            self.annotations = [os.path.join(annotation_dir, x + ".xml") for x in self.file_names]
            self.annotations = [
                os.path.join(annotation_dir, x + ".xml") for x in self.file_names
            ]
            assert len(self.images) == len(self.annotations)
        elif "mask" in order:
            if "aug" in image_set:
@@ -81,13 +85,17 @@ class PascalVOC(VisionDataset):
                anno = self.parse_voc_xml(ET.parse(self.annotations[index]).getroot())
                boxes = [obj["bndbox"] for obj in anno["annotation"]["object"]]
                # boxes type xyxy
                boxes = [(bb['xmin'], bb['ymin'], bb['xmax'], bb['ymax']) for bb in boxes]
                boxes = [
                    (bb["xmin"], bb["ymin"], bb["xmax"], bb["ymax"]) for bb in boxes
                ]
                boxes = np.array(boxes, dtype=np.float32).reshape(-1, 4)
                target.append(boxes)
            elif k == "boxes_category":
                anno = self.parse_voc_xml(ET.parse(self.annotations[index]).getroot())
                boxes_category = [obj["name"] for obj in anno["annotation"]["object"]]
                boxes_category = [self.class_names.index(bc)-1 for bc in boxes_category]
                boxes_category = [
                    self.class_names.index(bc) - 1 for bc in boxes_category
                ]
                boxes_category = np.array(boxes_category, dtype=np.int32)
                target.append(boxes_category)
            elif k == "mask":