|
36 | 36 | is_torch_available, |
37 | 37 | ) |
38 | 38 | from transformers.testing_utils import ( |
| 39 | + Expectations, |
39 | 40 | backend_empty_cache, |
40 | 41 | require_flash_attn, |
41 | 42 | require_torch, |
@@ -831,7 +832,14 @@ def test_small_model_integration_test(self): |
831 | 832 | torch.testing.assert_close(expected_pixel_slice, inputs.pixel_values[:6, :3], atol=1e-4, rtol=1e-4) |
832 | 833 |
|
833 | 834 | output = model.generate(**inputs, max_new_tokens=20, do_sample=False, repetition_penalty=None) |
834 | | - EXPECTED_DECODED_TEXT = "user\n\nDescribe the image.\nassistant\nThe image captures a vibrant nighttime scene on a bustling city street. A woman in a striking red dress" |
| 835 | + # fmt: off |
| 836 | + EXPECTED_DECODED_TEXT = Expectations( |
| 837 | + { |
| 838 | + ("cuda", None): "user\n\nDescribe the image.\nassistant\nThe image captures a vibrant nighttime scene on a bustling city street. A woman in a striking red dress", |
| 839 | + ("xpu", None): "user\n\nDescribe the image.\nassistant\nThe image captures a vibrant night scene in a bustling Japanese city. A woman in a striking red dress", |
| 840 | + } |
| 841 | + ).get_expectation() |
| 842 | + # fmt: on |
835 | 843 |
|
836 | 844 | self.assertEqual( |
837 | 845 | self.processor.decode(output[0], skip_special_tokens=True), |
@@ -874,11 +882,21 @@ def test_small_model_integration_test_batch_wo_image(self): |
874 | 882 |
|
875 | 883 | # it should not matter whether two images are the same size or not |
876 | 884 | output = model.generate(**inputs, max_new_tokens=20, do_sample=False, repetition_penalty=None) |
| 885 | + # fmt: off |
| 886 | + EXPECTED_DECODED_TEXT = Expectations( |
| 887 | + { |
| 888 | + ("cuda", None): [ |
| 889 | + "user\n\nDescribe the image.\nassistant\nThe image captures a vibrant nighttime scene on a bustling city street. A woman in a striking red dress", |
| 890 | + "user\nWhat is relativity?\nassistant\nRelativity is a scientific theory that describes the relationship between space and time. It was first proposed by", |
| 891 | + ], |
| 892 | + ("xpu", None): [ |
| 893 | + "user\n\nDescribe the image.\nassistant\nThe image captures a vibrant night scene in a bustling Japanese city. A woman in a striking red dress", |
| 894 | + "user\nWhat is relativity?\nassistant\nRelativity is a scientific theory that describes the relationship between space and time. It was first proposed by", |
| 895 | + ], |
| 896 | + } |
| 897 | + ).get_expectation() |
| 898 | + # fmt: on |
877 | 899 |
|
878 | | - EXPECTED_DECODED_TEXT = [ |
879 | | - "user\n\nDescribe the image.\nassistant\nThe image captures a vibrant nighttime scene on a bustling city street. A woman in a striking red dress", |
880 | | - "user\nWhat is relativity?\nassistant\nRelativity is a scientific theory that describes the relationship between space and time. It was first proposed by", |
881 | | - ] # fmt: skip |
882 | 900 | self.assertEqual( |
883 | 901 | self.processor.batch_decode(output, skip_special_tokens=True), |
884 | 902 | EXPECTED_DECODED_TEXT, |
|
0 commit comments