Getting an input shape related Value error,input shape incompatibility with layer.Any help will be highly appreciated

class TransformerBlock(layers.Layer): def init(self, transformer_output_dim: int, num_heads: int, transformer_dense_dim: int, rate: float = 0.1, normalize_epsilon: float = 1e-6): """ transformer_output_dim: Number of neurons in final dense layer of transformer num_heads: Number of heads for multi head attention transformer_dense_dim: Number of neurons in the first dense layer of transformer rate: Dropout rate normalize_epsilon: Epsilon value for dropout in Normalization """ super(TransformerBlock, self).init() self.att = layers.MultiHeadAttention(num_heads=num_heads, key_dim=transformer_output_dim) self.ffn = keras.Sequential( [layers.Dense(transformer_dense_dim, activation="relu"), layers.Dense(transformer_output_dim), ] ) self.layernorm1 = layers.LayerNormalization(epsilon=normalize_epsilon) self.layernorm2 = layers.LayerNormalization(epsilon=normalize_epsilon) self.dropout1 = layers.Dropout(rate) self.dropout2 = layers.Dropout(rate)

def get_config(self):
    Used for saving the weights of the trained model.
    config = super().get_config().copy()
        'att': self.att,
        'ffn': self.ffn,
        'layernorm1': self.layernorm1,
        'layernorm2': self.layernorm2,
        'dropout1': self.dropout1,
        'dropout2': self.dropout2,
    return config

def call(self, inputs, training):
    attn_output = self.att(inputs, inputs)
    attn_output = self.dropout1(attn_output, training=training)
    out1 = self.layernorm1(inputs + attn_output)
    ffn_output = self.ffn(out1)
    ffn_output = self.dropout2(ffn_output, training=training)
    return self.layernorm2(out1 + ffn_output)

class TokenAndPositionEmbedding(layers.Layer): def init(self, max_len, vocab_size, embed_dim): super(TokenAndPositionEmbedding, self).init() self.max_len = max_len self.token_emb = layers.Embedding(input_dim=vocab_size, output_dim=embed_dim) self.pos_emb = layers.Embedding(input_dim=max_len, output_dim=embed_dim)

def call(self, x):
    maxlen = self.max_len  # tf.shape(x)[-1]
    positions = tf.range(start=0, limit=maxlen, delta=1)
    positions = self.pos_emb(positions)
    x = self.token_emb(x)
    return x + positions

def get_config(self):
    config = super().get_config().copy()
        'token_emb': self.token_emb,
        'pos_emb': self.pos_emb
    return config

def transformer_model(vocab, max_len: int, transformer_output_dim: int, transformer_dense_dim: int, num_heads: int, transformer_dropout_rate: float, normalize_epsilon: float, base_model_output_dense_dim: int): """ vocab_size: The total size of vocabulary + 1 for the padded length with 0 value max_len: Max len of the input transformer_output_dim: Number of neurons in final dense layer of transformer num_heads: Number of heads for multi head attention transformer_dense_dim: Number of neurons in the first dense layer of transformer transformer_dropout_rate: Dropout rate normalize_epsilon: Epsilon value for dropout in Normalization """ inputs = layers.Input(shape=(max_len,)) print(inputs.shape) embeddings_index = {} with open("/content/drive/MyDrive/latex_similarity_update/text_models/transformer_latex/5lakhswithsize10.txt") as f: for line in f: word, coefs = line.split(maxsplit=1) coefs = np.fromstring(coefs, "f", sep=" ") embeddings_index[word] = coefs

print("Found %s word vectors." % len(embeddings_index))

# transformer_output_dim = 32  # Embedding size for each token
#embedding_layer = TokenAndPositionEmbedding(max_len, len(vocab), transformer_output_dim)

num_tokens = len(vocab) + 2
embedding_dim = 10
hits = 0
misses = 0
word_index = dict(zip(vocab, range(len(vocab))))
# Prepare embedding matrix
embedding_matrix = np.zeros((num_tokens, embedding_dim))
for word, i in word_index.items():
    embedding_vector = embeddings_index.get(word)
    if embedding_vector is not None:
    # Words not found in embedding index will be all-zeros.
    # This includes the representation for "padding" and "OOV"
        embedding_matrix[i] = embedding_vector
        hits += 1
        misses += 1
print("Converted %d words (%d misses)" % (hits, misses))
x = embedding_layer(inputs)
# num_heads = 2  # Number of attention heads
# transformer_dense_dim = 32  # Hidden layer size in feed forward network inside transformer
transformer_block = TransformerBlock(transformer_output_dim, num_heads, transformer_dense_dim,
                                     transformer_dropout_rate, normalize_epsilon)
x = transformer_block(x)
#x = layers.GlobalAveragePooling1D()(x)
x = Flatten()(x)
x = Dense(base_model_output_dense_dim)(x)
out = Lambda(lambda vector: K.l2_normalize(vector, axis=-1))(x)
model = keras.Model(inputs=inputs,
                    outputs=out)  # Normalize using Lambda here or later within Distance Calculating Function
return model

class ModelBuild:

def build_network(self, base_model, out_dim: int):
    base_model: Base transformer model
    out_dim: Output dimension for passing in the final dense layer
    input_1 = Input((None,))
    x = base_model(input_1)
    x = Dense(out_dim)(x)
    network = Model(input_1, x)
    return network

def model_build(self, network):
    input_1 = Input((None,))
    input_2 = Input((None,))
    x1 = network(input_1)
    x2 = network(input_2)
    distance = Lambda(euclidean_distance)([x1, x2])
    model = Model(inputs=[input_1, input_2], outputs=distance)
    return model

The model gets train always but during testing ,it throws value error :

ValueError: Input 0 of layer dense_2 is incompatible with the layer: expected axis -1 of input shape to have value 1000 but received input with shape (None, 10)