Tensor size does not match batch size

Question

I have used this code below to train an unet model. My dataset at first consisted of 10000 images, shape=(256,256,3), what was working fine. However, ofter enlargening it to 14000 images i encountered this error

 (1) INVALID_ARGUMENT:  Cannot add tensor to the batch: number of elements does not match. Shapes are: [tensor]: [761,720,3], [batch]: [256,256,3]
     [[{{node IteratorGetNext}}]]




import os
import numpy as np
import tensorflow as tf
from tensorflow.keras.layers import Conv2D, BatchNormalization, Activation, MaxPool2D, Conv2DTranspose, Concatenate, Input
from tensorflow.keras.models import Model
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint, ReduceLROnPlateau, CSVLogger

os.environ['PYTHONHASHSEED'] = str(42)
np.random.seed(42)
tf.random.set_seed(42)

batch_size = 256
lr = 0.0001
epochs = 150
height = 256
width = 256

dataset_path = os.path.join(drive_path, 'tmp', 'tmp')

files_dir = os.path.join(drive_path, 'tmp', 'tmp', 'tmp')
model_file = os.path.join(files_dir, 'tmp')
log_file = os.path.join(files_dir, 'tmp')

def create_dir(path):
  if not os.path.exists(path):
    os.makedirs(path)

create_dir(files_dir)

def conv_block(inputs, num_filters):
  x = Conv2D(num_filters, 3, padding= 'same')(inputs)
  x = BatchNormalization()(x)
  x = Activation('relu')(x)

  x = Conv2D(num_filters, 3, padding= 'same')(x)
  x = BatchNormalization()(x)
  x = Activation('relu')(x)

  return x

def encoder_block(inputs, num_filters):
  x = conv_block(inputs, num_filters)
  p = MaxPool2D((2, 2))(x)
  return x, p

def decoder_block(inputs, skip, num_filters):
  x = Conv2DTranspose(num_filters, (2,2), strides=2, padding='same')(inputs)
  x = Concatenate()([x, skip])
  x = conv_block(x, num_filters)
  return x

def built_unet(input_shape):
  inputs = Input(input_shape)
  s1, p1 = encoder_block(inputs, 64)
  s2, p2 = encoder_block(p1, 128)
  s3, p3 = encoder_block(p2, 256)
  s4, p4 = encoder_block(p3, 512)

  b1 = conv_block(p4, 1024)

  d1 = decoder_block(b1, s4, 512)
  d2 = decoder_block(d1, s3, 256)
  d3 = decoder_block(d2, s2, 128)
  d4 = decoder_block(d3, s1, 64)

  outputs = Conv2D(1, 1, padding='same', activation='sigmoid')(d4)
  model = Model(inputs, outputs, name='UNET')
  return model

from glob import glob

def load_data(path):
  train_x = sorted(glob(os.path.join(path, 'train', 'image', '*')))
  train_y = sorted(glob(os.path.join(path, 'train', 'mask', '*')))

  valid_x = sorted(glob(os.path.join(path, 'val', 'image', '*')))
  valid_y = sorted(glob(os.path.join(path, 'val', 'mask', '*')))

  return (train_x, train_y), (valid_x, valid_y)

import cv2

def read_image(path):
  path = path.decode()
  x = cv2.imread(path, cv2.IMREAD_COLOR)
  x = x/255.0
  return x

def read_mask(path):
  path = path.decode()
  x = cv2.imread(path, cv2.IMREAD_GRAYSCALE)
  x = x/255.0
  x = np.expand_dims(x, axis = -1)
  return x

def tf_parse(x, y):
  def _parse(x, y):
    x = read_image(x)
    y = read_mask(y)
    return x, y

  x, y = tf.numpy_function(_parse, [x, y], [tf.float64, tf.float64])

  x.set_shape([height, width, 3])
  y.set_shape([height, width, 1])

  return x, y

def tf_dataset(x, y, batch=8):
  dataset = tf.data.Dataset.from_tensor_slices((x, y))
  dataset = dataset.map(tf_parse, num_parallel_calls=tf.data.AUTOTUNE)
  dataset = dataset.batch(batch)
  dataset = dataset.prefetch(tf.data.AUTOTUNE)
  return dataset

(train_x, train_y), (valid_x, valid_y) = load_data(dataset_path)
print(f'Train: {len(train_x)} - {len(train_y)}')
print(f'val: {len(valid_x)} - {len(valid_y)}')


train_dataset = tf_dataset(train_x, train_y, batch=batch_size)
valid_dataset = tf_dataset(valid_x, valid_y, batch=batch_size)

for x, y in valid_dataset:
  print(x.shape, y.shape)

input_shape = (height, width, 3)
model = built_unet(input_shape)

model.summary()

ost = tf.keras.optimizers.Adam(lr)
model.compile(loss='binary_crossentropy', optimizer=ost, metrics=['acc'])

callbacks = [
    ModelCheckpoint(model_file, verbose = 1, save_best_only=True),
    ReduceLROnPlateau(monitor='val_loss', factor=0.1, patience=4),
    CSVLogger(log_file),
    EarlyStopping(monitor='val_loss', patience=12, restore_best_weights=False)
]

model.fit(train_dataset, validation_data=valid_dataset, epochs=epochs, callbacks=callbacks)

!pip install onnx
!pip install tf2onnx

import onnx
import os
import tf2onnx



from tensorflow.keras.models import load_model
loaded_keras_model = load_model('tmp')

onnx_model, _ = tf2onnx.convert.from_keras(loaded_keras_model)

onnx.save(onnx_model, 'tmp')

i resized the images to (761,720,3) but now i encounter this error

 A `Concatenate` layer requires inputs with matching shapes except for the concatenation axis. Received: input_shape=[(None, 94, 90, 512), (None, 95, 90, 512)]

The unet architektur is relatively new to me i do not know how to solve this (please also take this into account in answers)

mhenning · Accepted Answer · 2024-05-21 10:28:15Z

What you got is a rounding missmatch between the upsampling part x and the passed through part skip in

x = Conv2DTranspose(num_filters, (2,2), strides=2, padding='same')(inputs)
x = Concatenate()([x, skip])  # <- here

You start with the shape (761, 720, 3). In every encoder_block, the size of your images is halved by the MaxPooling2D layer (rounding down, this is important later). So the output shapes for s1, s2,.. are:

s1: (761, 720)
s2: (380, 360)
s3: (190, 180)
s4: (95, 90)
b1: (47, 45)

Note again that if you have uneven numbers like 95, MaxPool2D rounds it down to 47 (from 47.5). If you then call

x = Conv2DTranspose(num_filters, (2,2), strides=2, padding='same')(inputs)

on the input shape b1=(47, 45), the output shape of that will be (94, 90) (double its input shape).
This output shape can not be concatenated with the throughput image part from s4, because now you have 2 different shapes s4=(95, 90) and x=(94, 90). The solution to that would be to use shapes that are powers of 2, because as long as they are big enough powers of 2, they are always even numbers when you divide them by 2. For example if you go back to your image input shapes (256, 256), and you have 4 encoder parts with 4 MaxPooling layers, you get:

s1: (256, 256)
s2: (128, 128)
s3: (64, 64)
s4: (32, 32)
b1: (16, 16)

These are all nice even numbers, which are not rounded. If you upscale e.g. (16, 16) with your Conv2DTranspose layer, you get (32, 32), which is concatenatable with s4.

So I'd reshape/cut your input images to a proper shape as to not have these uneven shapes and rounding inaccuracies. It seems from the first part of your question, that the first 10k images are shape (256, 256, 3), but some of the next 4k are not. Instead if reshaping all to (761, 720, 3), I would reshape all to (256, 256, 3). Consider to cut your (761, 720, 3) images to (720, 720, 3) before downscaling, otherwise you would have a minor warping effect. Of course it depends on the images if cutting parts of it makes sense.

Edit: If you have images of different size that you load (to which the first error gives a strong indication), you can change the following functions in your code:

def read_image(path, im_shape):
  path = path.decode()
  x = cv2.imread(path, cv2.IMREAD_COLOR)
  x = x/255.0
  x = cv2.resize(x, im_shape, cv2.INTER_CUBIC)
  return x

def read_mask(path, im_shape):
  path = path.decode()
  x = cv2.imread(path, cv2.IMREAD_GRAYSCALE)
  x = x/255.0
  x = np.expand_dims(x, axis = -1)
  x = cv2.resize(x, im_shape, cv2.INTER_CUBIC)
  return x

def tf_parse(x, y):
  def _parse(x, y, im_shape):
    x = read_image(x, im_shape)
    y = read_mask(y, im_shape)
    return x, y

  x, y = tf.numpy_function(_parse, [x, y, (height, width)], [tf.float64, tf.float64])
  return x, y

This adds proper resizing to your images.

Hi,thanks for the answer, but every image in my 14k dataset has the shape 256 by 256 but this raises the error I mentioned first in the question.
There certainly is an image with shape (761, 720). I can replicate the exact error ("(1) INVALID_ARGUMENT: Cannot add tensor...") with your code and 2 dummy images of different shapes. I think the error comes from your use of tf.set_shape, which only updates the .shape value of the tensor (which is "unknown" for a symbolic tensor in my tests). It does not properly reshape the underlying image!

Collectives™ on Stack Overflow

Tensor size does not match batch size

1 Answer 1

2 Comments

Your Answer

Hot Network Questions

Collectives™ on Stack Overflow

1 Answer 1

2 Comments

Your Answer

Sign up or log in

Post as a guest

Related