Incorrect bounding box placement in dashboard display and downloaded files
See original GitHub issueSystem:
- OS:
Arch Linux x86_64
- Kernel:
5.8.14-arch1-1
- Python:
3.8.6
- wandb:
0.10.4
Description:
Some image samples have incorrectly displayed bounding boxes, as displayed in the images below:
Correct bounding boxes, drawn using using PIL
and ImageDraw
and uploaded to wandb
:
Bounding boxes generated by wandb
, after downloading the logged image:
Bounding boxes displayed in the wandb
dashboard UI, obtained via screenshot in fullscreen/expanded mode:
Interestingly, in both wandb
samples, some bounding boxes are correct, but some are misplaced. Moreover, the downloaded image seems to have mainly boxes with correct x coordinates bu incorrect y coordinates, and the dashboard screenshot shows the converse.
Steps to reproduce:
- Log an image and its bounding boxes e.g. from COCO or GQA to
wandb
- Compare to the known bounding box positions from the dataset.
The following code snippets were used to log the boxes:
"""Tools for visualising images, bounding boxes and attention maps."""
from typing import Dict, Optional, Sequence, List, Tuple
import random
import wandb
from torch import Tensor
from PIL import Image, ImageDraw
import torchvision.transforms as T
class BoundingBox:
"""Class wrapper for storing bounding box information."""
def __init__(
self,
min_x: int,
min_y: int,
max_x: int,
max_y: int,
label: Optional[int] = None,
score: Optional[float] = None,
) -> None:
"""Initialise a bounding box instance."""
self._coords = (min_x, min_y, max_x, max_y)
self._label = label
self._score = score
@property
def min_x(self) -> int:
"""Get the minimum x value of the box, measured from the left of the image."""
return self._coords[0]
@property
def min_y(self) -> int:
"""Get the minimum y value of the box, measured from the top of the image."""
return self._coords[1]
@property
def max_x(self) -> int:
"""Get the maximum x value of the box, measured from the left of the image."""
return self._coords[2]
@property
def max_y(self) -> int:
"""Get the maximum y value of the box, measured from the top of the image."""
return self._coords[3]
@property
def label(self) -> Optional[int]:
"""Get the bounding box's label."""
return self._label
@property
def score(self) -> Optional[float]:
"""Get the bounding box's score."""
return self._score
def plot_image(
image: Tensor,
boxes: Optional[Dict[str, Sequence[BoundingBox]]] = None,
) -> None:
"""Plot and save an image with optional bounding boxes to file."""
tfm = T.ToPILImage()
img = tfm(image)
draw = ImageDraw.Draw(img)
if boxes is not None:
for bbox_seq in boxes.values():
for bbox in bbox_seq:
color = (
random.randint(0, 255),
random.randint(0, 255),
random.randint(0, 255),
)
draw.rectangle((bbox.min_x, bbox.min_y, bbox.max_x, bbox.max_y), outline=color)
return wandb.Image(img)
def wandb_image(
image: Tensor,
caption: Optional[str] = None,
boxes: Optional[Dict[str, Sequence[BoundingBox]]] = None,
object_to_index: Optional[Dict[str, int]] = None,
) -> wandb.Image:
"""Create a list of wandb images for logging with optional bounding boxes."""
class_labels = (
{idx: key for key, idx in object_to_index.items()}
if object_to_index is not None
else None
)
wandb_boxes = (
{
key: {
"box_data": [
{
"position": {
"minX": box.min_x / image.size(2),
"maxX": box.max_x / image.size(2),
"minY": box.min_y / image.size(1),
"maxY": box.max_y / image.size(1),
},
"class_id": box.label,
"box_caption": class_labels[box.label]
if class_labels is not None and box.label is not None
else box.label
}
for box in keyed_boxes
],
"class_labels": class_labels,
}
for key, keyed_boxes in boxes.items()
}
if boxes is not None
else None
)
return wandb.Image(image, boxes=wandb_boxes, caption=caption)
The above functions were called like so: Please excuse the unreferenced functions and classes. I hope this is enough to give some context.
visualisations: Dict[str, Any] = {"images": [], "custom": []}
dataloader = DataLoader(
torch.utils.data.Subset(
self.datasets.val, range(0, 10)
),
batch_size=1,
num_workers=0,
collate_fn=VariableSizeTensorCollator(),
)
with torch.no_grad():
for sample in dataloader:
# Load data
dependencies = sample["question"]["dependencies"].to(self.device)
graph = sample["scene_graph"]["graph"].to(self.device)
image = self.datasets.images[
self.datasets.images.key_to_index(
sample["scene_graph"]["imageId"][0]
)
]
# Propagate data forward
self.model(question_graph=dependencies, scene_graph=graph)
print(f'{sample["scene_graph"]["boxes"][0].size()=}')
print(f'{len(sample["scene_graph"]["boxes"][0].tolist())=}')
print(f'{image.size()=}')
# Add image with labeled ground truth bounding boxes
visualisations["images"].append(wandb_image(
image,
caption=sample["scene_graph"]["imageId"][0],
boxes={
"ground_truth": [
BoundingBox(box[0], box[1], box[2], box[3], label=lbl)
for box, lbl in zip(
sample["scene_graph"]["boxes"][0].tolist(),
[self.preprocessors.scene_graphs.object_to_index[obj_lbl[0]] for obj_lbl in sample["scene_graph"]["labels"]],
)
]
},
object_to_index=self.preprocessors.scene_graphs.object_to_index,
))
visualisations["custom"].append(plot_image(
image,
boxes={
"ground_truth": [
BoundingBox(box[0], box[1], box[2], box[3], label=lbl)
for box, lbl in zip(
sample["scene_graph"]["boxes"][0].tolist(),
[self.preprocessors.scene_graphs.object_to_index[obj_lbl[0]] for obj_lbl in sample["scene_graph"]["labels"]],
)
]
},
))
Issue Analytics
- State:
- Created 3 years ago
- Comments:5 (1 by maintainers)
Top GitHub Comments
Hi @ariG23498, The problem seems to be fixed now when using
wandb==0.10.12
, so I will close the issue 😃Hi @ariG23498,
I’ve been working on other portions of my code base lately but when I get back to data visualisations I will let you know. Hopefully, I’ll have a solid answer by the end of the week