Skip to content

Commit dd8f231

Browse files
authored
fix 3 failed test cases for video_llama_3 model on Intel XPU (#41931)
* fix 3 failed test cases for video_llama_3 model on Intel XPU Signed-off-by: Liu, Kaixuan <kaixuan.liu@intel.com> * update Signed-off-by: Liu, Kaixuan <kaixuan.liu@intel.com> * adjust format Signed-off-by: Liu, Kaixuan <kaixuan.liu@intel.com> * update code Signed-off-by: Liu, Kaixuan <kaixuan.liu@intel.com> --------- Signed-off-by: Liu, Kaixuan <kaixuan.liu@intel.com>
1 parent 1619a34 commit dd8f231

File tree

1 file changed

+23
-5
lines changed

1 file changed

+23
-5
lines changed

tests/models/video_llama_3/test_modeling_video_llama_3.py

Lines changed: 23 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@
3636
is_torch_available,
3737
)
3838
from transformers.testing_utils import (
39+
Expectations,
3940
backend_empty_cache,
4041
require_flash_attn,
4142
require_torch,
@@ -831,7 +832,14 @@ def test_small_model_integration_test(self):
831832
torch.testing.assert_close(expected_pixel_slice, inputs.pixel_values[:6, :3], atol=1e-4, rtol=1e-4)
832833

833834
output = model.generate(**inputs, max_new_tokens=20, do_sample=False, repetition_penalty=None)
834-
EXPECTED_DECODED_TEXT = "user\n\nDescribe the image.\nassistant\nThe image captures a vibrant nighttime scene on a bustling city street. A woman in a striking red dress"
835+
# fmt: off
836+
EXPECTED_DECODED_TEXT = Expectations(
837+
{
838+
("cuda", None): "user\n\nDescribe the image.\nassistant\nThe image captures a vibrant nighttime scene on a bustling city street. A woman in a striking red dress",
839+
("xpu", None): "user\n\nDescribe the image.\nassistant\nThe image captures a vibrant night scene in a bustling Japanese city. A woman in a striking red dress",
840+
}
841+
).get_expectation()
842+
# fmt: on
835843

836844
self.assertEqual(
837845
self.processor.decode(output[0], skip_special_tokens=True),
@@ -874,11 +882,21 @@ def test_small_model_integration_test_batch_wo_image(self):
874882

875883
# it should not matter whether two images are the same size or not
876884
output = model.generate(**inputs, max_new_tokens=20, do_sample=False, repetition_penalty=None)
885+
# fmt: off
886+
EXPECTED_DECODED_TEXT = Expectations(
887+
{
888+
("cuda", None): [
889+
"user\n\nDescribe the image.\nassistant\nThe image captures a vibrant nighttime scene on a bustling city street. A woman in a striking red dress",
890+
"user\nWhat is relativity?\nassistant\nRelativity is a scientific theory that describes the relationship between space and time. It was first proposed by",
891+
],
892+
("xpu", None): [
893+
"user\n\nDescribe the image.\nassistant\nThe image captures a vibrant night scene in a bustling Japanese city. A woman in a striking red dress",
894+
"user\nWhat is relativity?\nassistant\nRelativity is a scientific theory that describes the relationship between space and time. It was first proposed by",
895+
],
896+
}
897+
).get_expectation()
898+
# fmt: on
877899

878-
EXPECTED_DECODED_TEXT = [
879-
"user\n\nDescribe the image.\nassistant\nThe image captures a vibrant nighttime scene on a bustling city street. A woman in a striking red dress",
880-
"user\nWhat is relativity?\nassistant\nRelativity is a scientific theory that describes the relationship between space and time. It was first proposed by",
881-
] # fmt: skip
882900
self.assertEqual(
883901
self.processor.batch_decode(output, skip_special_tokens=True),
884902
EXPECTED_DECODED_TEXT,

0 commit comments

Comments
 (0)