@@ -14,7 +14,7 @@ | |||||
# --------------------------------------------------------------------- | # --------------------------------------------------------------------- | ||||
import json | import json | ||||
import os | import os | ||||
from collections import OrderedDict, defaultdict | |||||
from collections import defaultdict | |||||
import cv2 | import cv2 | ||||
import numpy as np | import numpy as np | ||||
@@ -28,26 +28,21 @@ def _count_visible_keypoints(anno): | |||||
return sum(sum(1 for v in ann["keypoints"][2::3] if v > 0) for ann in anno) | return sum(sum(1 for v in ann["keypoints"][2::3] if v > 0) for ann in anno) | ||||
def _has_only_empty_bbox(anno): | |||||
return all(any(o <= 0 for o in obj["bbox"][2:]) for obj in anno) | |||||
def has_valid_annotation(anno): | |||||
def has_valid_annotation(anno, order): | |||||
# if it"s empty, there is no annotation | # if it"s empty, there is no annotation | ||||
if len(anno) == 0: | if len(anno) == 0: | ||||
return False | return False | ||||
# if all boxes have close to zero area, there is no annotation | |||||
if _has_only_empty_bbox(anno): | |||||
return False | |||||
# keypoints task have a slight different critera for considering | |||||
# if an annotation is valid | |||||
if "keypoints" not in anno[0]: | |||||
return True | |||||
# for keypoint detection tasks, only consider valid images those | |||||
# containing at least min_keypoints_per_image | |||||
if _count_visible_keypoints(anno) >= min_keypoints_per_image: | |||||
return True | |||||
return False | |||||
if "boxes" in order or "boxes_category" in order: | |||||
if "bbox" not in anno[0]: | |||||
return False | |||||
if "keypoints" in order: | |||||
if "keypoints" not in anno[0]: | |||||
return False | |||||
# for keypoint detection tasks, only consider valid images those | |||||
# containing at least min_keypoints_per_image | |||||
if _count_visible_keypoints(anno) < min_keypoints_per_image: | |||||
return False | |||||
return True | |||||
class COCO(VisionDataset): | class COCO(VisionDataset): | ||||
@@ -58,8 +53,8 @@ class COCO(VisionDataset): | |||||
"image", | "image", | ||||
"boxes", | "boxes", | ||||
"boxes_category", | "boxes_category", | ||||
"keypoints", | |||||
# TODO: need to check | # TODO: need to check | ||||
# "keypoints", | |||||
# "polygons", | # "polygons", | ||||
"info", | "info", | ||||
) | ) | ||||
@@ -72,7 +67,7 @@ class COCO(VisionDataset): | |||||
with open(ann_file, "r") as f: | with open(ann_file, "r") as f: | ||||
dataset = json.load(f) | dataset = json.load(f) | ||||
self.imgs = OrderedDict() | |||||
self.imgs = dict() | |||||
for img in dataset["images"]: | for img in dataset["images"]: | ||||
# for saving memory | # for saving memory | ||||
if "license" in img: | if "license" in img: | ||||
@@ -98,7 +93,7 @@ class COCO(VisionDataset): | |||||
del ann["segmentation"] | del ann["segmentation"] | ||||
self.img_to_anns[ann["image_id"]].append(ann) | self.img_to_anns[ann["image_id"]].append(ann) | ||||
self.cats = OrderedDict() | |||||
self.cats = dict() | |||||
for cat in dataset["categories"]: | for cat in dataset["categories"]: | ||||
self.cats[cat["id"]] = cat | self.cats[cat["id"]] = cat | ||||
@@ -109,8 +104,17 @@ class COCO(VisionDataset): | |||||
ids = [] | ids = [] | ||||
for img_id in self.ids: | for img_id in self.ids: | ||||
anno = self.img_to_anns[img_id] | anno = self.img_to_anns[img_id] | ||||
if has_valid_annotation(anno): | |||||
# filter crowd annotations | |||||
anno = [obj for obj in anno if obj["iscrowd"] == 0] | |||||
anno = [ | |||||
obj for obj in anno if obj["bbox"][2] > 0 and obj["bbox"][3] > 0 | |||||
] | |||||
if has_valid_annotation(anno, order): | |||||
ids.append(img_id) | ids.append(img_id) | ||||
self.img_to_anns[img_id] = anno | |||||
else: | |||||
del self.imgs[img_id] | |||||
del self.img_to_anns[img_id] | |||||
self.ids = ids | self.ids = ids | ||||
self.json_category_id_to_contiguous_id = { | self.json_category_id_to_contiguous_id = { | ||||
@@ -125,11 +129,6 @@ class COCO(VisionDataset): | |||||
img_id = self.ids[index] | img_id = self.ids[index] | ||||
anno = self.img_to_anns[img_id] | anno = self.img_to_anns[img_id] | ||||
# filter crowd annotations | |||||
anno = [obj for obj in anno if obj["iscrowd"] == 0] | |||||
# filter empty annotations | |||||
anno = [obj for obj in anno if obj["area"] > 0] | |||||
target = [] | target = [] | ||||
for k in self.order: | for k in self.order: | ||||
if k == "image": | if k == "image": | ||||
@@ -181,7 +180,6 @@ class COCO(VisionDataset): | |||||
return img_info | return img_info | ||||
class_names = ( | class_names = ( | ||||
"background", | |||||
"person", | "person", | ||||
"bicycle", | "bicycle", | ||||
"car", | "car", | ||||
@@ -69,7 +69,9 @@ class Objects365(VisionDataset): | |||||
anno = self.img_to_anns[img_id] | anno = self.img_to_anns[img_id] | ||||
# filter crowd annotations | # filter crowd annotations | ||||
anno = [obj for obj in anno if obj["iscrowd"] == 0] | anno = [obj for obj in anno if obj["iscrowd"] == 0] | ||||
anno = [obj for obj in anno if obj["bbox"][2] > 0 and obj["bbox"][3] > 0] | |||||
anno = [ | |||||
obj for obj in anno if obj["bbox"][2] > 0 and obj["bbox"][3] > 0 | |||||
] | |||||
if len(anno) > 0: | if len(anno) > 0: | ||||
ids.append(img_id) | ids.append(img_id) | ||||
self.img_to_anns[img_id] = anno | self.img_to_anns[img_id] = anno | ||||
@@ -37,7 +37,9 @@ class PascalVOC(VisionDataset): | |||||
def __init__(self, root, image_set, *, order=None): | def __init__(self, root, image_set, *, order=None): | ||||
if ("boxes" in order or "boxes_category" in order) and "mask" in order: | if ("boxes" in order or "boxes_category" in order) and "mask" in order: | ||||
raise ValueError("PascalVOC only supports boxes & boxes_category or mask, not both.") | |||||
raise ValueError( | |||||
"PascalVOC only supports boxes & boxes_category or mask, not both." | |||||
) | |||||
super().__init__(root, order=order, supported_order=self.supported_order) | super().__init__(root, order=order, supported_order=self.supported_order) | ||||
@@ -48,13 +50,15 @@ class PascalVOC(VisionDataset): | |||||
image_dir = os.path.join(self.root, "JPEGImages") | image_dir = os.path.join(self.root, "JPEGImages") | ||||
if "boxes" in order or "boxes_category" in order: | if "boxes" in order or "boxes_category" in order: | ||||
annotation_dir = os.path.join(self.root, 'Annotations') | |||||
annotation_dir = os.path.join(self.root, "Annotations") | |||||
splitdet_dir = os.path.join(self.root, "ImageSets/Main") | splitdet_dir = os.path.join(self.root, "ImageSets/Main") | ||||
split_f = os.path.join(splitdet_dir, image_set.rstrip("\n") + ".txt") | split_f = os.path.join(splitdet_dir, image_set.rstrip("\n") + ".txt") | ||||
with open(os.path.join(split_f), "r") as f: | with open(os.path.join(split_f), "r") as f: | ||||
self.file_names = [x.strip() for x in f.readlines()] | self.file_names = [x.strip() for x in f.readlines()] | ||||
self.images = [os.path.join(image_dir, x + ".jpg") for x in self.file_names] | self.images = [os.path.join(image_dir, x + ".jpg") for x in self.file_names] | ||||
self.annotations = [os.path.join(annotation_dir, x + ".xml") for x in self.file_names] | |||||
self.annotations = [ | |||||
os.path.join(annotation_dir, x + ".xml") for x in self.file_names | |||||
] | |||||
assert len(self.images) == len(self.annotations) | assert len(self.images) == len(self.annotations) | ||||
elif "mask" in order: | elif "mask" in order: | ||||
if "aug" in image_set: | if "aug" in image_set: | ||||
@@ -81,13 +85,17 @@ class PascalVOC(VisionDataset): | |||||
anno = self.parse_voc_xml(ET.parse(self.annotations[index]).getroot()) | anno = self.parse_voc_xml(ET.parse(self.annotations[index]).getroot()) | ||||
boxes = [obj["bndbox"] for obj in anno["annotation"]["object"]] | boxes = [obj["bndbox"] for obj in anno["annotation"]["object"]] | ||||
# boxes type xyxy | # boxes type xyxy | ||||
boxes = [(bb['xmin'], bb['ymin'], bb['xmax'], bb['ymax']) for bb in boxes] | |||||
boxes = [ | |||||
(bb["xmin"], bb["ymin"], bb["xmax"], bb["ymax"]) for bb in boxes | |||||
] | |||||
boxes = np.array(boxes, dtype=np.float32).reshape(-1, 4) | boxes = np.array(boxes, dtype=np.float32).reshape(-1, 4) | ||||
target.append(boxes) | target.append(boxes) | ||||
elif k == "boxes_category": | elif k == "boxes_category": | ||||
anno = self.parse_voc_xml(ET.parse(self.annotations[index]).getroot()) | anno = self.parse_voc_xml(ET.parse(self.annotations[index]).getroot()) | ||||
boxes_category = [obj["name"] for obj in anno["annotation"]["object"]] | boxes_category = [obj["name"] for obj in anno["annotation"]["object"]] | ||||
boxes_category = [self.class_names.index(bc)-1 for bc in boxes_category] | |||||
boxes_category = [ | |||||
self.class_names.index(bc) - 1 for bc in boxes_category | |||||
] | |||||
boxes_category = np.array(boxes_category, dtype=np.int32) | boxes_category = np.array(boxes_category, dtype=np.int32) | ||||
target.append(boxes_category) | target.append(boxes_category) | ||||
elif k == "mask": | elif k == "mask": | ||||