|
|
@@ -29,43 +29,47 @@ class PascalVOC(VisionDataset): |
|
|
|
|
|
|
|
supported_order = ( |
|
|
|
"image", |
|
|
|
# "boxes", |
|
|
|
# "boxes_category", |
|
|
|
"boxes", |
|
|
|
"boxes_category", |
|
|
|
"mask", |
|
|
|
"info", |
|
|
|
) |
|
|
|
|
|
|
|
def __init__(self, root, image_set, *, order=None): |
|
|
|
if ("boxes" in order or "boxes_category" in order) and "mask" in order: |
|
|
|
raise ValueError("PascalVOC only supports boxes & boxes_category or mask, not both.") |
|
|
|
|
|
|
|
super().__init__(root, order=order, supported_order=self.supported_order) |
|
|
|
|
|
|
|
voc_root = self.root |
|
|
|
if not os.path.isdir(voc_root): |
|
|
|
if not os.path.isdir(self.root): |
|
|
|
raise RuntimeError("Dataset not found or corrupted.") |
|
|
|
|
|
|
|
self.image_set = image_set |
|
|
|
image_dir = os.path.join(voc_root, "JPEGImages") |
|
|
|
|
|
|
|
# for segmentation |
|
|
|
if "aug" in image_set: |
|
|
|
mask_dir = os.path.join(voc_root, "SegmentationClass_aug") |
|
|
|
image_dir = os.path.join(self.root, "JPEGImages") |
|
|
|
|
|
|
|
if "boxes" in order or "boxes_category" in order: |
|
|
|
annotation_dir = os.path.join(self.root, 'Annotations') |
|
|
|
splitdet_dir = os.path.join(self.root, "ImageSets/Main") |
|
|
|
split_f = os.path.join(splitdet_dir, image_set.rstrip("\n") + ".txt") |
|
|
|
with open(os.path.join(split_f), "r") as f: |
|
|
|
self.file_names = [x.strip() for x in f.readlines()] |
|
|
|
self.images = [os.path.join(image_dir, x + ".jpg") for x in self.file_names] |
|
|
|
self.annotations = [os.path.join(annotation_dir, x + ".xml") for x in self.file_names] |
|
|
|
assert len(self.images) == len(self.annotations) |
|
|
|
elif "mask" in order: |
|
|
|
if "aug" in image_set: |
|
|
|
mask_dir = os.path.join(self.root, "SegmentationClass_aug") |
|
|
|
else: |
|
|
|
mask_dir = os.path.join(self.root, "SegmentationClass") |
|
|
|
splitmask_dir = os.path.join(self.root, "ImageSets/Segmentation") |
|
|
|
split_f = os.path.join(splitmask_dir, image_set.rstrip("\n") + ".txt") |
|
|
|
with open(os.path.join(split_f), "r") as f: |
|
|
|
self.file_names = [x.strip() for x in f.readlines()] |
|
|
|
self.images = [os.path.join(image_dir, x + ".jpg") for x in self.file_names] |
|
|
|
self.masks = [os.path.join(mask_dir, x + ".png") for x in self.file_names] |
|
|
|
assert len(self.images) == len(self.masks) |
|
|
|
else: |
|
|
|
mask_dir = os.path.join(voc_root, "SegmentationClass") |
|
|
|
splitmask_dir = os.path.join(voc_root, "ImageSets/Segmentation") |
|
|
|
split_f = os.path.join(splitmask_dir, image_set.rstrip("\n") + ".txt") |
|
|
|
with open(os.path.join(split_f), "r") as f: |
|
|
|
self.file_names = [x.strip() for x in f.readlines()] |
|
|
|
self.images = [os.path.join(image_dir, x + ".jpg") for x in self.file_names] |
|
|
|
self.masks = [os.path.join(mask_dir, x + ".png") for x in self.file_names] |
|
|
|
|
|
|
|
# TODO: for detection |
|
|
|
# splitdet_dir = os.path.join(voc_root, "ImageSets/Main") |
|
|
|
# split_f = os.path.join(splitdet_dir, image_set.rstrip("\n") + ".txt") |
|
|
|
# with open(os.path.join(split_f), "r") as f: |
|
|
|
# self.file_names = [x.strip() for x in f.readlines()] |
|
|
|
# self.images = [os.path.join(image_dir, x + ".jpg") for x in file_names] |
|
|
|
# self.annotations = [os.path.join(annotation_dir, x + ".xml") for x in self.file_names] |
|
|
|
|
|
|
|
# assert (len(self.images) == len(self.masks)) and (len(self.images) == len(self.annotations)) |
|
|
|
raise NotImplementedError |
|
|
|
|
|
|
|
def __getitem__(self, index): |
|
|
|
target = [] |
|
|
@@ -73,6 +77,19 @@ class PascalVOC(VisionDataset): |
|
|
|
if k == "image": |
|
|
|
image = cv2.imread(self.images[index], cv2.IMREAD_COLOR) |
|
|
|
target.append(image) |
|
|
|
elif k == "boxes": |
|
|
|
anno = self.parse_voc_xml(ET.parse(self.annotations[index]).getroot()) |
|
|
|
boxes = [obj["bndbox"] for obj in anno["annotation"]["object"]] |
|
|
|
# boxes type xyxy |
|
|
|
boxes = [(bb['xmin'], bb['ymin'], bb['xmax'], bb['ymax']) for bb in boxes] |
|
|
|
boxes = np.array(boxes, dtype=np.float32).reshape(-1, 4) |
|
|
|
target.append(boxes) |
|
|
|
elif k == "boxes_category": |
|
|
|
anno = self.parse_voc_xml(ET.parse(self.annotations[index]).getroot()) |
|
|
|
boxes_category = [obj["name"] for obj in anno["annotation"]["object"]] |
|
|
|
boxes_category = [self.class_names.index(bc)-1 for bc in boxes_category] |
|
|
|
boxes_category = np.array(boxes_category, dtype=np.int32) |
|
|
|
target.append(boxes_category) |
|
|
|
elif k == "mask": |
|
|
|
if "aug" in self.image_set: |
|
|
|
mask = cv2.imread(self.masks[index], cv2.IMREAD_GRAYSCALE) |
|
|
@@ -81,9 +98,6 @@ class PascalVOC(VisionDataset): |
|
|
|
mask = self._trans_mask(mask) |
|
|
|
mask = mask[:, :, np.newaxis] |
|
|
|
target.append(mask) |
|
|
|
elif k == "boxes": |
|
|
|
boxes = self.parse_voc_xml(ET.parse(self.annotations[index]).getroot()) |
|
|
|
target.append(boxes) |
|
|
|
elif k == "info": |
|
|
|
if image is None: |
|
|
|
image = cv2.imread(self.images[index], cv2.IMREAD_COLOR) |
|
|
@@ -128,7 +142,6 @@ class PascalVOC(VisionDataset): |
|
|
|
return voc_dict |
|
|
|
|
|
|
|
class_names = ( |
|
|
|
"background", |
|
|
|
"aeroplane", |
|
|
|
"bicycle", |
|
|
|
"bird", |
|
|
@@ -151,7 +164,6 @@ class PascalVOC(VisionDataset): |
|
|
|
"tvmonitor", |
|
|
|
) |
|
|
|
class_colors = [ |
|
|
|
[0, 0, 0], |
|
|
|
[0, 0, 128], |
|
|
|
[0, 128, 0], |
|
|
|
[0, 128, 128], |
|
|
|