Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
71 changes: 69 additions & 2 deletions deeplabcut/pose_estimation_tensorflow/core/evaluate.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
import argparse
import os
from pathlib import Path
from typing import List
import numpy as np
import pandas as pd
from tqdm import tqdm
Expand Down Expand Up @@ -259,7 +260,7 @@ def return_evaluate_network_data(
>>> deeplabcut._evaluate_network_data('/analysis/project/reaching-task/config.yaml', shuffle=[1])
--------
If you want to plot
>>> deeplabcut.evaluate_network('/analysis/project/reaching-task/config.yaml',shuffle=[1],True)
>>> deeplabcut.evaluate_network('/analysis/project/reaching-task/config.yaml',shuffle=[1],plotting=True)
"""

import os
Expand Down Expand Up @@ -494,6 +495,55 @@ def return_evaluate_network_data(
return results


def keypoint_error(
df_error: pd.DataFrame,
df_error_p_cutoff: pd.DataFrame,
train_indices: List[int],
test_indices: List[int],
) -> pd.DataFrame:
"""Computes the RMSE error for each bodypart

The error dataframes can be in single animal format (non-hierarchical columns, one
column for each bodypart) or multi-animal format (hierarchical columns with 3
levels: "scorer", "individuals", "bodyparts").

Args:
df_error: dataframe containing the RMSE error for each image, individual and
bodypart
df_error_p_cutoff: dataframe containing the RMSE error with p-cutoff for each
image, individual and bodypart
train_indices: the indices of rows in the dataframe that are in the train set
test_indices: the indices of rows in the dataframe that are in the test set

Returns:
A dataframe containing 4 rows (train and test error, with and without p-cutoff)
and one column for each bodypart.
"""
df_error = df_error.copy()
df_error_p_cutoff = df_error_p_cutoff.copy()

error_rows = []
for row_name, df in [
("Train error (px)", df_error.iloc[train_indices, :]),
("Test error (px)", df_error.iloc[test_indices, :]),
("Train error (px) with p-cutoff", df_error_p_cutoff.iloc[train_indices, :]),
("Test error (px) with p-cutoff", df_error_p_cutoff.iloc[test_indices, :]),
]:
df_flat = df.copy()
if isinstance(df.columns, pd.MultiIndex):
# MA projects have column indices "scorer", "individuals" and "bodyparts"
# Drop the scorer level, and put individuals in rows
df_flat = df.droplevel("scorer", axis=1).stack(level="individuals").copy()

bodypart_error = df_flat.mean()
bodypart_error["Error Type"] = row_name
error_rows.append(bodypart_error)

# The error rows are series; stack in axis 1 and pivot to get DF
keypoint_error_df = pd.concat(error_rows, axis=1)
return keypoint_error_df.T.set_index("Error Type")


def evaluate_network(
config,
Shuffles=[1],
Expand All @@ -504,6 +554,7 @@ def evaluate_network(
gputouse=None,
rescale=False,
modelprefix="",
per_keypoint_evaluation: bool = False,
):
"""Evaluates the network.

Expand Down Expand Up @@ -557,6 +608,10 @@ def evaluate_network(
Directory containing the deeplabcut models to use when evaluating the network.
By default, the models are assumed to exist in the project folder.

per_keypoint_evaluation: bool, default=False
Compute the train and test RMSE for each keypoint, and save the results to
a {model_name}-keypoint-results.csv in the evalution-results folder

Returns
-------
None
Expand Down Expand Up @@ -609,6 +664,7 @@ def evaluate_network(
comparisonbodyparts=comparisonbodyparts,
gputouse=gputouse,
modelprefix=modelprefix,
per_keypoint_evaluation=per_keypoint_evaluation,
)
else:
from deeplabcut.utils.auxfun_videos import imread, imresize
Expand Down Expand Up @@ -720,7 +776,9 @@ def evaluate_network(
)
),
)
auxiliaryfunctions.attempt_to_make_folder(evaluationfolder, recursive=True)
auxiliaryfunctions.attempt_to_make_folder(
evaluationfolder, recursive=True
)
# path_train_config = modelfolder / 'train' / 'pose_cfg.yaml'

# Check which snapshots are available and sort them by # iterations
Expand Down Expand Up @@ -900,6 +958,15 @@ def evaluate_network(
]
final_result.append(results)

if per_keypoint_evaluation:
df_keypoint_error = keypoint_error(
RMSE, RMSEpcutoff, trainIndices, testIndices
)
kpt_filename = DLCscorer + "-keypoint-results.csv"
df_keypoint_error.to_csv(
Path(evaluationfolder) / kpt_filename
)

if show_errors:
print(
"Results for",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,10 @@
from scipy.spatial import cKDTree
from tqdm import tqdm

from deeplabcut.pose_estimation_tensorflow.core.evaluate import make_results_file
from deeplabcut.pose_estimation_tensorflow.core.evaluate import (
make_results_file,
keypoint_error,
)
from deeplabcut.pose_estimation_tensorflow.config import load_config
from deeplabcut.pose_estimation_tensorflow.lib import crossvalutils
from deeplabcut.utils import visualization
Expand Down Expand Up @@ -106,6 +109,7 @@ def evaluate_multianimal_full(
comparisonbodyparts="all",
gputouse=None,
modelprefix="",
per_keypoint_evaluation: bool = False,
):
from deeplabcut.pose_estimation_tensorflow.core import (
predict,
Expand Down Expand Up @@ -495,6 +499,18 @@ def evaluate_multianimal_full(
]
final_result.append(results)

if per_keypoint_evaluation:
df_keypoint_error = keypoint_error(
error,
error[mask],
trainIndices,
testIndices,
)
kpt_filename = DLCscorer + "-keypoint-results.csv"
df_keypoint_error.to_csv(
Path(evaluationfolder) / kpt_filename
)

if show_errors:
string = (
"Results for {} training iterations, training fraction of {}, and shuffle {}:\n"
Expand Down
4 changes: 3 additions & 1 deletion examples/testscript.py
Original file line number Diff line number Diff line change
Expand Up @@ -177,7 +177,9 @@
deeplabcut.train_network(path_config_file)

print("EVALUATE")
deeplabcut.evaluate_network(path_config_file, plotting=True)
deeplabcut.evaluate_network(
path_config_file, plotting=True, per_keypoint_evaluation=True
)
# deeplabcut.evaluate_network(path_config_file,plotting=True,trainingsetindex=33)
print("CUT SHORT VIDEO AND ANALYZE (with dynamic cropping!)")

Expand Down
8 changes: 6 additions & 2 deletions examples/testscript_multianimal.py
Original file line number Diff line number Diff line change
Expand Up @@ -153,7 +153,9 @@
print("Network trained.")

print("Evaluating network...")
deeplabcut.evaluate_network(config_path, plotting=True)
deeplabcut.evaluate_network(
config_path, plotting=True, per_keypoint_evaluation=True
)

print("Network evaluated....")

Expand Down Expand Up @@ -296,7 +298,9 @@
print("Network trained.")

print("Evaluating network...")
deeplabcut.evaluate_network(config_path, plotting=True)
deeplabcut.evaluate_network(
config_path, plotting=True, per_keypoint_evaluation=True
)

print("Network evaluated....")

Expand Down
154 changes: 154 additions & 0 deletions tests/test_evaluate.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,154 @@
#
# DeepLabCut Toolbox (deeplabcut.org)
# © A. & M.W. Mathis Labs
# https://github.com/DeepLabCut/DeepLabCut
#
# Please see AUTHORS for contributors.
# https://github.com/DeepLabCut/DeepLabCut/blob/master/AUTHORS
#
# Licensed under GNU Lesser General Public License v3.0
#
import numpy as np
import pandas as pd
import pytest

import deeplabcut.pose_estimation_tensorflow as pet


def make_single_animal_rmse_df(
bodyparts,
train_indices,
test_indices,
error_data=None,
) -> pd.DataFrame:
if error_data is None:
error_data = np.ones((len(train_indices) + len(test_indices), len(bodyparts)))
return pd.DataFrame(error_data, columns=bodyparts)


def make_multi_animal_rmse_df(
scorer,
individuals,
bodyparts,
train_indices,
test_indices,
error_data=None,
) -> pd.DataFrame:
columns = pd.MultiIndex.from_product(
[[scorer], individuals, bodyparts],
names=["scorer", "individuals", "bodyparts"],
)
if error_data is None:
error_data = np.ones(
(len(train_indices) + len(test_indices), len(individuals) * len(bodyparts))
)
return pd.DataFrame(error_data, columns=columns)


KEYPOINT_ERROR_NAMES = [
"Train error (px)",
"Test error (px)",
"Train error (px) with p-cutoff",
"Test error (px) with p-cutoff",
]

KEYPOINT_ERROR_TEST_DATA = [
(
{
"df_error": make_single_animal_rmse_df(
bodyparts=["leg", "arm", "head"],
train_indices=[0, 1, 3],
test_indices=[2, 4],
),
"train_indices": [0, 1, 3],
"test_indices": [2, 4],
},
{
"leg": [1.0, 1.0], # train, test
"arm": [1.0, 1.0], # train, test
"head": [1.0, 1.0], # train, test
},
),
(
{
"df_error": make_single_animal_rmse_df(
bodyparts=["leftHand", "rightHand"],
train_indices=[0, 2],
test_indices=[1, 3],
error_data=[
[1.0, np.nan],
[1.0, 0.0],
[0.0, 10.0],
[5.0, 5.0],
],
),
"train_indices": [0, 2],
"test_indices": [1, 3],
},
{
"leftHand": [0.5, 3.0], # train, test
"rightHand": [10.0, 2.5], # train, test
},
),
(
{
"df_error": make_single_animal_rmse_df(
bodyparts=["leg", "arm", "head"],
train_indices=[0, 1, 3],
test_indices=[2, 4],
),
"train_indices": [0, 1, 3],
"test_indices": [2, 4],
},
{
"leg": [1.0, 1.0], # train, test
"arm": [1.0, 1.0], # train, test
"head": [1.0, 1.0], # train, test
},
),
(
{
"df_error": make_multi_animal_rmse_df(
scorer="john",
individuals=["individual_1", "individual_2"],
bodyparts=["leftArm", "rightArm"],
train_indices=[0, 1, 3],
test_indices=[2],
error_data=[
# individual_1, individual2
# leftArm, rightArm, leftArm, rightArm
[1.0, np.nan, 1.0, 2.0],
[2.0, 0.0, 1.0, np.nan],
[3.0, 10.0, 1.0, np.nan],
[10.0, 4.0, np.nan, np.nan],
],
),
"train_indices": [0, 1, 3],
"test_indices": [2],
},
{
"leftArm": [3.0, 2.0], # train, test
"rightArm": [2.0, 10.0], # train, test
},
),
]


@pytest.mark.parametrize("inputs, expected_values", KEYPOINT_ERROR_TEST_DATA)
def test_evaluate_keypoint_error(inputs, expected_values):
keypoint_error = pet.keypoint_error(
inputs["df_error"],
inputs["df_error"],
inputs["train_indices"],
inputs["test_indices"],
)
print(inputs["df_error"])
print(keypoint_error)
for bodypart, mean_errors in expected_values.items():
for error_name in KEYPOINT_ERROR_NAMES:
if "train" in error_name.lower():
mean_error = mean_errors[0]
else:
mean_error = mean_errors[1]

assert keypoint_error.loc[error_name, bodypart] == mean_error