If predictions have the same score, the current implementation of the metric will score them differently if they are submitted in a different order.
https://github.com/facebookresearch/isc2021/blob/dab82c0381a3198270a8fa3a8ab722c3eba81b58/vcd/vcd/metrics.py#L351
We have fixed this by grouping by score and updating precision and recall for a particular score threshold as a whole rather than for individual scores:
from itertools import groupby
...
def match_metric(gts: Collection[Match], predictions: Collection[Match]):
r"""V2 metric:
Computes the AP based on the VCSL approach for the
calculation of Precision and Recall.
AP = \sum_{i=1}^N P(i) ΔR(i)
where, P(i) = sqrt(P_q * P_r) and R(i) = sqrt(R_q * R_r)
calculated as in the VCSL.
"""
predictions = sorted(predictions, key=lambda x: x.score, reverse=True)
# Initialize video pairs and load their gt bboxs
video_pairs = defaultdict(VideoPairEvaluator)
for gt in gts:
video_pairs[gt.pair_id].add_gt(gt)
# Get the total gt length for each axis
gt_total_lengths = {axis: 0 for axis in Axis}
for _, v in video_pairs.items():
for axis in Axis:
gt_total_lengths[axis] += v.total_gt_length(axis)
# Loop through the predictions
recall = 0.0
metric = 0.0
intersections = {axis: 0 for axis in Axis}
totals = {axis: 0 for axis in Axis}
# Group predictions by score to break ties consistently
for _, preds in groupby(predictions, key=lambda x: x.score):
# Update precision and recall within a given group before updating metric
for pred in preds:
pair_id = pred.pair_id
# Given a new prediction, we only need the differences in the intersection with
# gt and total video length covered for both query and reference axes.
intersection_deltas, total_deltas = video_pairs[pair_id].add_prediction(pred)
recalls = {}
precisions = {}
for axis in Axis:
# Accumulate the differences to the corresponding values
intersections[axis] += intersection_deltas[axis]
totals[axis] += total_deltas[axis]
recalls[axis] = intersections[axis] / gt_total_lengths[axis]
precisions[axis] = intersections[axis] / totals[axis]
new_recall = sqrt(recalls[Axis.QUERY] * recalls[Axis.REF])
precision = sqrt(precisions[Axis.QUERY] * precisions[Axis.REF])
# Compute metric
delta_recall = new_recall - recall
metric += precision * delta_recall
recall = new_recall
return metric
...