Browse Source

fix(mge/data): fix an nan bug in coco dataset

GitOrigin-RevId: 689fbaafa5
tags/v0.4.0
Megvii Engine Team Xinran Xu 5 years ago
parent
commit
d8d3f405bb
3 changed files with 42 additions and 34 deletions
  1. +26
    -28
      python_module/megengine/data/dataset/vision/coco.py
  2. +3
    -1
      python_module/megengine/data/dataset/vision/objects365.py
  3. +13
    -5
      python_module/megengine/data/dataset/vision/voc.py

+ 26
- 28
python_module/megengine/data/dataset/vision/coco.py View File

@@ -14,7 +14,7 @@
# ---------------------------------------------------------------------
import json
import os
from collections import OrderedDict, defaultdict
from collections import defaultdict

import cv2
import numpy as np
@@ -28,26 +28,21 @@ def _count_visible_keypoints(anno):
return sum(sum(1 for v in ann["keypoints"][2::3] if v > 0) for ann in anno)


def _has_only_empty_bbox(anno):
return all(any(o <= 0 for o in obj["bbox"][2:]) for obj in anno)


def has_valid_annotation(anno):
def has_valid_annotation(anno, order):
# if it"s empty, there is no annotation
if len(anno) == 0:
return False
# if all boxes have close to zero area, there is no annotation
if _has_only_empty_bbox(anno):
return False
# keypoints task have a slight different critera for considering
# if an annotation is valid
if "keypoints" not in anno[0]:
return True
# for keypoint detection tasks, only consider valid images those
# containing at least min_keypoints_per_image
if _count_visible_keypoints(anno) >= min_keypoints_per_image:
return True
return False
if "boxes" in order or "boxes_category" in order:
if "bbox" not in anno[0]:
return False
if "keypoints" in order:
if "keypoints" not in anno[0]:
return False
# for keypoint detection tasks, only consider valid images those
# containing at least min_keypoints_per_image
if _count_visible_keypoints(anno) < min_keypoints_per_image:
return False
return True


class COCO(VisionDataset):
@@ -58,8 +53,8 @@ class COCO(VisionDataset):
"image",
"boxes",
"boxes_category",
"keypoints",
# TODO: need to check
# "keypoints",
# "polygons",
"info",
)
@@ -72,7 +67,7 @@ class COCO(VisionDataset):
with open(ann_file, "r") as f:
dataset = json.load(f)

self.imgs = OrderedDict()
self.imgs = dict()
for img in dataset["images"]:
# for saving memory
if "license" in img:
@@ -98,7 +93,7 @@ class COCO(VisionDataset):
del ann["segmentation"]
self.img_to_anns[ann["image_id"]].append(ann)

self.cats = OrderedDict()
self.cats = dict()
for cat in dataset["categories"]:
self.cats[cat["id"]] = cat

@@ -109,8 +104,17 @@ class COCO(VisionDataset):
ids = []
for img_id in self.ids:
anno = self.img_to_anns[img_id]
if has_valid_annotation(anno):
# filter crowd annotations
anno = [obj for obj in anno if obj["iscrowd"] == 0]
anno = [
obj for obj in anno if obj["bbox"][2] > 0 and obj["bbox"][3] > 0
]
if has_valid_annotation(anno, order):
ids.append(img_id)
self.img_to_anns[img_id] = anno
else:
del self.imgs[img_id]
del self.img_to_anns[img_id]
self.ids = ids

self.json_category_id_to_contiguous_id = {
@@ -125,11 +129,6 @@ class COCO(VisionDataset):
img_id = self.ids[index]
anno = self.img_to_anns[img_id]

# filter crowd annotations
anno = [obj for obj in anno if obj["iscrowd"] == 0]
# filter empty annotations
anno = [obj for obj in anno if obj["area"] > 0]

target = []
for k in self.order:
if k == "image":
@@ -181,7 +180,6 @@ class COCO(VisionDataset):
return img_info

class_names = (
"background",
"person",
"bicycle",
"car",


+ 3
- 1
python_module/megengine/data/dataset/vision/objects365.py View File

@@ -69,7 +69,9 @@ class Objects365(VisionDataset):
anno = self.img_to_anns[img_id]
# filter crowd annotations
anno = [obj for obj in anno if obj["iscrowd"] == 0]
anno = [obj for obj in anno if obj["bbox"][2] > 0 and obj["bbox"][3] > 0]
anno = [
obj for obj in anno if obj["bbox"][2] > 0 and obj["bbox"][3] > 0
]
if len(anno) > 0:
ids.append(img_id)
self.img_to_anns[img_id] = anno


+ 13
- 5
python_module/megengine/data/dataset/vision/voc.py View File

@@ -37,7 +37,9 @@ class PascalVOC(VisionDataset):

def __init__(self, root, image_set, *, order=None):
if ("boxes" in order or "boxes_category" in order) and "mask" in order:
raise ValueError("PascalVOC only supports boxes & boxes_category or mask, not both.")
raise ValueError(
"PascalVOC only supports boxes & boxes_category or mask, not both."
)

super().__init__(root, order=order, supported_order=self.supported_order)

@@ -48,13 +50,15 @@ class PascalVOC(VisionDataset):
image_dir = os.path.join(self.root, "JPEGImages")

if "boxes" in order or "boxes_category" in order:
annotation_dir = os.path.join(self.root, 'Annotations')
annotation_dir = os.path.join(self.root, "Annotations")
splitdet_dir = os.path.join(self.root, "ImageSets/Main")
split_f = os.path.join(splitdet_dir, image_set.rstrip("\n") + ".txt")
with open(os.path.join(split_f), "r") as f:
self.file_names = [x.strip() for x in f.readlines()]
self.images = [os.path.join(image_dir, x + ".jpg") for x in self.file_names]
self.annotations = [os.path.join(annotation_dir, x + ".xml") for x in self.file_names]
self.annotations = [
os.path.join(annotation_dir, x + ".xml") for x in self.file_names
]
assert len(self.images) == len(self.annotations)
elif "mask" in order:
if "aug" in image_set:
@@ -81,13 +85,17 @@ class PascalVOC(VisionDataset):
anno = self.parse_voc_xml(ET.parse(self.annotations[index]).getroot())
boxes = [obj["bndbox"] for obj in anno["annotation"]["object"]]
# boxes type xyxy
boxes = [(bb['xmin'], bb['ymin'], bb['xmax'], bb['ymax']) for bb in boxes]
boxes = [
(bb["xmin"], bb["ymin"], bb["xmax"], bb["ymax"]) for bb in boxes
]
boxes = np.array(boxes, dtype=np.float32).reshape(-1, 4)
target.append(boxes)
elif k == "boxes_category":
anno = self.parse_voc_xml(ET.parse(self.annotations[index]).getroot())
boxes_category = [obj["name"] for obj in anno["annotation"]["object"]]
boxes_category = [self.class_names.index(bc)-1 for bc in boxes_category]
boxes_category = [
self.class_names.index(bc) - 1 for bc in boxes_category
]
boxes_category = np.array(boxes_category, dtype=np.int32)
target.append(boxes_category)
elif k == "mask":


Loading…
Cancel
Save