Stuck on an issue?

Lightrun Answers was designed to reduce the constant googling that comes with debugging 3rd party libraries. It collects links to all the places you might be looking at while hunting down a tough bug.

And, if you’re still stuck at the end, we’re happy to hop on a call to see how we can help out.

Incredibly low CMC scores - possible test set config issue

See original GitHub issue

So I’m using my own dataset (configuration below). The dataset is configured similar to MARS and consists of pig identities (hence the name). I have tried both training with my dataset from scratch and by training with MARS then using transfer learning to train on my PigMARS. However, the results are insanely low, like so low it doesn’t even seem reasonable.

After 20 epochs training SENet on MARS I get Rank-1 CMC: 64%. Then training 20 epochs on PigMARS I get Rank-1 | 3 | 5 CMC: 0% and mAP lower than 20%.

Training accuracy on PigMARS is in the high 90s, which made me think maybe I’m overfitting (even though I still would’ve expected a higher test set score), so I plotted the features generated for my test set (query left, gallery right) using t-SNE and it seems to have worked well in that regard…except the CMC & mAP scores are terrible.

Screenshot from 2019-07-25 17-09-29

The only thing I can think of is that I haven’t configured the query/gallery test set correctly, but I can’t find any issues with it. Any help is hugely appreciated.

from __future__ import absolute_import
from __future__ import print_function
from __future__ import division

import logging
from os import path, listdir

import numpy as np
from torchreid.data import VideoDataset

class PigMars(VideoDataset):
    dataset_dir = "pig_mars"
    def __init__(self, root='', **kwargs):
        self.root = root
        self.dataset_dir = path.join(self.root, self.dataset_dir)
        # self.cached_files = path.join(self.dataset_dir, "cache")

        train = self.load_data("train")
        query = self.load_data("query")
        gallery = self.load_data("gallery")

        print("Validating train")
        self.validate_dataset(train)
        print("Validating query")
        self.validate_dataset(query)
        print("Validating gallery")
        self.validate_dataset(gallery)

        super(PigMars, self).__init__(train, query, gallery, **kwargs)

    def validate_dataset(self, dataset):
        count = 0
        for imgs, pid, cid in dataset:
            for img in imgs:
                img_fn = path.basename(img)
                try:
                    assert self._get_cam_id_from_name(img_fn) == cid
                    assert self._get_object_id_from_name(img_fn) == pid
                except AssertionError:
                    pass

    def load_data(self, dataset):
        return self.build_data(dataset)

    def build_data(self, dataset):
        if dataset == "train":
            return self._build_block("train")
        elif dataset == "query":
            test_block = np.array(self._build_block("test"))
            return self._build_query(test_block)
        elif dataset == "gallery":
            test_block = np.array(self._build_block("test"))
            return self._build_gallery(test_block)
        else:
            raise ValueError(f"Dataset {dataset} not defined")

    def _get_cam_id_from_name(self, filename):
        return int(filename.split("C")[1][0])

    def _get_object_id_from_name(self, filename):
        return int(filename.split("C")[0])

    def _all_same_cam(self, images):
        camid = self._get_cam_id_from_name(images[0])
        for image in images:
            if self._get_cam_id_from_name(image) != camid:
                return False
        return True

    def _build_block(self, dataset):
        tracklet_size = 10
        bbox = path.join(self.dataset_dir,
                         f"bbox_{dataset}")
        tracklets = []

        pids = sorted([int(x) for x in listdir(bbox)])
        pid2label = {pid:label for label, pid in enumerate(pids)}

        for pid in pids:
            logging.debug(f"Processing pid {pid}")
            pid_dir = path.join(bbox, str(pid).zfill(4))
            frames = sorted(listdir(pid_dir))
            for i in range(len(frames)):
                try:
                    selected_images = tuple(frames[i:i+tracklet_size])
                    selected_images = [path.join(self.dataset_dir,
                                                f"bbox_{dataset}",
                                                str(pid).zfill(4),
                                                img_fp) \
                                       for img_fp in selected_images]
                except IndexError:
                    break
                camid = self._get_cam_id_from_name(selected_images[0])
                if self._all_same_cam(selected_images):
                    tracklets.append((selected_images, pid2label[pid], camid))

        return tracklets

    def _load_query_idx(self):
        return np.load(path.join(self.dataset_dir, "info", "query_idx.npy"))

    def _build_query(self, test_block):
        query_idx = self._load_query_idx()
        return test_block[query_idx,:]

    def _build_gallery(self, test_block):
        query_idx = self._load_query_idx()
        gallery_idx = [i for i in range(test_block.shape[0]) \
                       if i not in query_idx]
        return test_block[gallery_idx,:]

Issue Analytics

State:
Created 4 years ago
Comments:7 (4 by maintainers)

Top GitHub Comments

3reactions

JakeCowtoncommented, Aug 12, 2019

Early access version of the paper out now 😃 https://ieeexplore.ieee.org/document/8787792

1reaction

KaiyangZhoucommented, Jul 30, 2019

good, you can put a paper link here if it gets published, I believe it would be of interest to some people

rank1=100% is not equivalent to mAP=100%

cmc rank only cares about whether a true item appears at rank k but mAP takes into account the ranking order, e.g. consider a retrieval result [1, 0, 0, 0, 1] where the first item and last item are correct, in this case the cmc rank1 is 100% but mAP is definitely not

to some extent, mAP is a better metric to measure how good the learned feature is