Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions deeplabcut/compat.py
Original file line number Diff line number Diff line change
Expand Up @@ -488,6 +488,8 @@ def evaluate_network(
trainingsetindex=trainingsetindex,
plotting=plotting,
show_errors=show_errors,
comparison_bodyparts=comparisonbodyparts,
per_keypoint_evaluation=per_keypoint_evaluation,
modelprefix=modelprefix,
**torch_kwargs,
)
Expand Down
34 changes: 23 additions & 11 deletions deeplabcut/core/metrics/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ def compute_metrics(
pcutoff: float = -1,
oks_bbox_margin: int = 0,
oks_sigma: float = 0.1,
per_keypoint_rmse: bool = False,
) -> dict:
"""Computes pose estimation performance metrics

Expand Down Expand Up @@ -70,6 +71,7 @@ def compute_metrics(
oks_bbox_margin: The margin to add around keypoints to compute the area for OKS
computation.
oks_sigma: The OKS sigma to use to compute pose.
per_keypoint_rmse: Compute per-keypoint RMSE values.

Returns:
A dictionary containing keys "rmse", "rmse_cutoff", "mAP" and "mAR" mapping
Expand All @@ -79,6 +81,10 @@ def compute_metrics(
"rmse_pcutoff_unique_bodyparts" are also returned, containing the metrics for
the unique bodyparts head.

If `per_keypoint_evaluation=True`, "keypoint_rmse", "keypoint_rmse_cutoff" (and
optionally "unique_keypoint_rmse" and "unique_keypoint_rmse_cutoff") keys are
added, containing a list of floats representing the RMSE for each keypoint.

Examples:
>>> # Define the p-cutoff, prediction, and target DataFrames
>>> pcutoff = 0.5
Expand All @@ -94,27 +100,33 @@ def compute_metrics(
} # Sample output scores
"""
data = prepare_evaluation_data(ground_truth, predictions)
rmse, rmse_pcutoff = distance_metrics.compute_rmse(data, single_animal, pcutoff)
oks_scores = distance_metrics.compute_oks(
data=data,
oks_sigma=oks_sigma,
oks_bbox_margin=oks_bbox_margin,
)
results = dict(rmse=rmse, rmse_pcutoff=rmse_pcutoff, **oks_scores)

data_unique = None
if unique_bodypart_gt is not None:
assert unique_bodypart_poses is not None
data_unique = prepare_evaluation_data(unique_bodypart_gt, unique_bodypart_poses)

rmse_scores = distance_metrics.compute_rmse(
data,
single_animal,
pcutoff,
data_unique=data_unique,
per_keypoint_results=per_keypoint_rmse,
)
results = dict(**rmse_scores, **oks_scores)

if not single_animal:
det_rmse, det_rmse_p = distance_metrics.compute_detection_rmse(data, pcutoff)
det_rmse, det_rmse_p = distance_metrics.compute_detection_rmse(
data, pcutoff, data_unique=data_unique,
)
results["rmse_detections"] = det_rmse
results["rmse_detections_pcutoff"] = det_rmse_p

if unique_bodypart_gt is not None:
# TODO: We should integrate unique bodyparts to main RMSE computation
assert unique_bodypart_poses is not None
unique_bpt = prepare_evaluation_data(unique_bodypart_gt, unique_bodypart_poses)
unique_bpt_metrics = distance_metrics.compute_rmse(unique_bpt, True, pcutoff)
results["rmse_unique_bpts"] = unique_bpt_metrics[0]
results["rmse_unique_bpts_pcutoff"] = unique_bpt_metrics[1]

return results


Expand Down
162 changes: 145 additions & 17 deletions deeplabcut/core/metrics/distance_metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -152,13 +152,12 @@ def compute_oks(
}


def compute_rmse(
def match_predictions_for_rmse(
data: list[tuple[np.ndarray, np.ndarray]],
single_animal: bool,
pcutoff: float,
oks_bbox_margin: float = 0.0,
) -> tuple[float, float]:
"""Computes the RMSE for pose predictions.
) -> list[matching.PotentialMatch]:
"""Matches GT keypoints to predictions to compute RMSE.

Single animal RMSE is computed by simply calculating the distance between each
ground truth keypoint and the corresponding prediction.
Expand All @@ -176,16 +175,16 @@ def compute_rmse(
num_bpts, 3). For the GT, the 3 coordinates are (x, y, visibility) while for
the pose they are (x, y, confidence score).
single_animal: Whether this is a single animal dataset.
pcutoff: The p-cutoff to use to compute RMSE.
oks_bbox_margin: When single_animal is False, predictions are matched to GT
using OKS. This is the margin used to apply when computing the bbox from
the pose to compute OKS.

Returns:
The RMSE and RMSE after removing all detections with a score below the pcutoff.
A list containing the predictions matched to ground truth.

Raises:
AssertionError
ValueError: If `single_animal=True` but more than one ground truth/predicted
keypoint is found for an entry
"""
matches = []
for gt, pred in data:
Expand Down Expand Up @@ -217,25 +216,106 @@ def compute_rmse(

matches.extend(image_matches)

rmse, rmse_cutoff = float("nan"), float("nan")
if len(matches) == 0:
return rmse, rmse_cutoff
return matches


def compute_rmse(
data: list[tuple[np.ndarray, np.ndarray]],
single_animal: bool,
pcutoff: float,
data_unique: list[tuple[np.ndarray, np.ndarray]] | None = None,
per_keypoint_results: bool = False,
oks_bbox_margin: float = 0.0,
) -> dict[str, float]:
"""Computes the RMSE for pose predictions.

pixel_errors = np.stack([m.pixel_errors() for m in matches])
if np.any(~np.isnan(pixel_errors)):
rmse = np.nanmean(pixel_errors).item()
Single animal RMSE is computed by simply calculating the distance between each
ground truth keypoint and the corresponding prediction.

Multi-animal RMSE is computed differently: predictions are first matched to ground
truth individuals using greedy OKS matching. RMSE is then computed only between
predictions and the ground truth pose they are matched to, only when the OKS is
non-zero (greater than a small threshold). Predictions that cannot be matched to
any ground truth with non-zero OKS are not used to compute RMSE.

Args:
data: The data for which to compute RMSE. This is a list containing (gt_poses,
predicted_poses), where gt_pose is an array of shape (num_gt_individuals,
num_bpts, 3) and predicted_poses is an array of shape (num_predictions,
num_bpts, 3). For the GT, the 3 coordinates are (x, y, visibility) while for
the pose they are (x, y, confidence score).
single_animal: Whether this is a single animal dataset.
pcutoff: The p-cutoff to use to compute RMSE.
data_unique: Unique bodypart ground truth and predictions to include in RMSE
computations, if there are any such bodyparts.
per_keypoint_results: Whether to compute the RMSE for each individual keypoint.
oks_bbox_margin: When single_animal is False, predictions are matched to GT
using OKS. This is the margin used to apply when computing the bbox from
the pose to compute OKS.

Returns:
A dictionary matching metric names to values. It will at least have "rmse" and
"rmse_cutoff" keys. If `per_keypoint_results=True` and there is at least one
non-NaN pixel error it will also contain "rmse_keypoint_X" and
"rmse_cutoff_keypoint_X" keys for each bodypart, where X is the index of the
bodypart.

Raises:
ValueError: If `single_animal=True` but more than one ground truth/predicted
keypoint is found for an entry
"""
matches = match_predictions_for_rmse(data, single_animal, oks_bbox_margin)
pixel_errors, keypoint_scores = None, None
if len(matches) > 0:
pixel_errors = np.stack([m.pixel_errors() for m in matches])
keypoint_scores = np.stack([m.keypoint_scores() for m in matches])
pixel_errors_cutoff = pixel_errors[keypoint_scores >= pcutoff]
if np.any(~np.isnan(pixel_errors_cutoff)):
rmse_cutoff = np.nanmean(pixel_errors_cutoff).item()

return rmse, rmse_cutoff
error, support, cutoff_error, cutoff_support = 0, 0, 0, 0
if pixel_errors is not None:
error, support, cutoff_error, cutoff_support = collect_pixel_errors(
pixel_errors, keypoint_scores, pcutoff,
)

unique_pixel_errors, unique_keypoint_scores = None, None
if data_unique is not None:
u_matches = match_predictions_for_rmse(data_unique, single_animal=True)
if len(u_matches) > 0:
unique_pixel_errors = np.stack([m.pixel_errors() for m in u_matches])
unique_keypoint_scores = np.stack([m.keypoint_scores() for m in u_matches])

u_error, u_support, u_cutoff_error, u_cutoff_support = collect_pixel_errors(
unique_pixel_errors, unique_keypoint_scores, pcutoff,
)
error += u_error
support += u_support
cutoff_error += u_cutoff_error
cutoff_support += u_cutoff_support

results = dict(rmse=float("nan"), rmse_pcutoff=float("nan"))
if support > 0:
results["rmse"] = error / support
if cutoff_support > 0:
results["rmse_pcutoff"] = cutoff_error / cutoff_support

if per_keypoint_results:
bodypart_errors = [("rmse_keypoint", pixel_errors)]
if unique_pixel_errors is not None:
bodypart_errors.append(("rmse_unique_keypoint", unique_pixel_errors))

for key_prefix, bpt_errors in bodypart_errors:
for idx, keypoint_error in enumerate(bpt_errors.T):
rmse = float("nan")
if np.any(~np.isnan(keypoint_error)):
rmse = np.nanmean(keypoint_error)
results[f"{key_prefix}_{idx}"] = rmse

return results


def compute_detection_rmse(
data: list[tuple[np.ndarray, np.ndarray]],
pcutoff: float,
data_unique: list[tuple[np.ndarray, np.ndarray]] | None = None,
) -> tuple[float, float]:
"""Computes the detection RMSE for pose predictions.

Expand All @@ -252,6 +332,8 @@ def compute_detection_rmse(
num_bpts, 3). For the GT, the 3 coordinates are (x, y, visibility) while for
the pose they are (x, y, confidence score).
pcutoff: The p-cutoff to use to compute RMSE.
data_unique: Unique bodypart ground truth and predictions to include in RMSE
computations, if there are any such bodyparts.

Returns:
The detection RMSE and detection RMSE after removing all detections with a
Expand Down Expand Up @@ -279,6 +361,17 @@ def compute_detection_rmse(
distances.append(np.linalg.norm(gt[:2] - pred[:2]))
scores.append(bpt_pred[pred_index, 2])

if data_unique is not None:
for image_gt, image_pred in data_unique:
assert len(image_gt) == len(image_pred) == 1, (
f"Unique GT an predictions must have length 1! Found {image_gt.shape}, "
f"{image_pred.shape}."
)
unique_gt, unique_pred = image_gt[0], image_pred[0]
for gt, pred in zip(unique_gt, unique_pred):
distances.append(np.linalg.norm(gt[:2] - pred[:2]))
scores.append(pred[2])

rmse, rmse_cutoff = float("nan"), float("nan")
if len(distances) == 0:
return rmse, rmse_cutoff
Expand All @@ -293,3 +386,38 @@ def compute_detection_rmse(
rmse_cutoff = np.nanmean(pixel_errors_cutoff).item()

return rmse, rmse_cutoff


def collect_pixel_errors(
pixel_errors: np.ndarray,
keypoint_scores: np.ndarray,
pcutoff: float,
) -> tuple[float, int, float, int]:
"""Collects pixel errors for RMSE computation

Args:
pixel_errors: The pixel errors to collect, of shape (num_matches, num_bodyparts)
keypoint_scores: The scores corresponding to the pixel errors, of shape
(num_matches, num_bodyparts).
pcutoff: The pcutoff to use when computing cutoff RMSE.

Returns: error, support, cutoff_error, support_cutoff
error: The sum of all pixel errors.
support: The number of valid pixel errors.
cutoff_error: The sum of all pixel errors with score > pcutoff.
support_cutoff: The number of valid pixel errors with score > pcutoff.
"""
error = 0.0
cutoff_error = 0.0
support = np.sum(~np.isnan(pixel_errors)).item()
support_cutoff = 0
if support > 0:
error += np.nansum(pixel_errors).item()

cutoff_mask = keypoint_scores >= pcutoff
cutoff_pixel_errors = pixel_errors[cutoff_mask]
support_cutoff = np.sum(~np.isnan(cutoff_pixel_errors)).item()
if support_cutoff > 0:
cutoff_error = np.nansum(cutoff_pixel_errors)

return error, support, cutoff_error, support_cutoff
10 changes: 4 additions & 6 deletions deeplabcut/pose_estimation_pytorch/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -269,7 +269,6 @@ from deeplabcut.pose_estimation_pytorch.data import (
build_transforms,
DLCLoader,
)
from deeplabcut.pose_estimation_pytorch.task import Task

loader = DLCLoader(
config="/path/to/my/project/config.yaml",
Expand All @@ -279,12 +278,12 @@ loader = DLCLoader(
train_dataset = loader.create_dataset(
transform=build_transforms(loader.model_cfg["data"]["train"]),
mode="train",
task=Task.BOTTOM_UP,
task=loader.pose_task,
)
valid_dataset = loader.create_dataset(
transform=build_transforms(loader.model_cfg["data"]["train"]),
mode="test",
task=Task.BOTTOM_UP,
task=loader.pose_task,
)
```

Expand Down Expand Up @@ -358,7 +357,6 @@ model_cfg = make_pytorch_pose_config(
top_down=True,
)
write_config(config_path=model_cfg_path, config=model_cfg)
task = Task(model_cfg["method"])

# Create the loader for the COCO dataset
loader = COCOLoader(
Expand All @@ -370,12 +368,12 @@ loader = COCOLoader(
train_dataset = loader.create_dataset(
transform=build_transforms(loader.model_cfg["data"]["train"]),
mode="train",
task=task,
task=loader.pose_task,
)
valid_dataset = loader.create_dataset(
transform=build_transforms(loader.model_cfg["data"]["train"]),
mode="test",
task=task,
task=loader.pose_task,
)
```

Expand Down
Loading