Why does my Neural network gives same prediction for every input?

I am trying to create a neural network that takes 294 inputs and predicts which of the inputs has the probability to be the output. Then, I also wanted to regress to find out how much difference is there between the actual value and predicted value. So I added two regression output node at the output layer. Before, I added the regression at the output the model was predicting decent enough but after the addition the model started to same value no matter what I do. Then I decided do check the weights then I found somethings like this:

[[ 0.19589818  0.45867598 -0.1103735  -0.11739671  0.3524462   0.3615998
  -0.11838996]
 [-0.37149632  0.29049385  0.27328718  0.39140654 -0.22933161  0.07160628
   0.33962536]
 [ 0.21745765  0.19408011 -0.28868628 -0.0097748   0.06756687 -0.40600073
   0.0485481 ]
 [-0.4144268   0.4770614  -0.1586262   0.06003821  0.01309896  0.47136605
  -0.41377842]
 [-0.25865722 -0.3038118   0.2767954   0.33988214 -0.48508477  0.33661437
  -0.20484531]
 [ 0.4246924  -0.4958439   0.2031511   0.4845667   0.18330884 -0.1708759
   0.28903925]
 [-0.4602847  -0.02263796  0.27997506 -0.33072484 -0.44759667 -0.14221525
   0.2714281 ]
 [-0.3839649  -0.13256657 -0.03424132 -0.36362755 -0.4561025  -0.12396967
   0.15885079]
 [-0.273561   -0.09750211 -0.4644209   0.4556396  -0.3021226   0.26363683
  -0.43606043]
 [ 0.2392633  -0.1741817   0.48888505 -0.43252754  0.101964    0.02732563
  -0.28655064]
 [ 0.41151023 -0.16941857 -0.48709846  0.23205352 -0.22945309  0.2136854]
.
.
.
.
[-0.01252615 -0.19594312  0.26858175 -0.07100904  0.16546512  0.11748069
   0.36638904]]

Above is the weights for layer 294 before any update. Then after some update weights :

weights for layer294:[[[ 0.19589818  0.19589818  0.19589818 ...  0.19589818  0.19589818
    0.19589818]
  [ 0.45867598  0.45867598  0.45867598 ...  0.45867598  0.45867598
    0.45867598]
  [-0.1103735  -0.1103735  -0.1103735  ... -0.1103735  -0.1103735
   -0.1103735 ]
  ...
  [ 0.3524462   0.3524462   0.3524462  ...  0.3524462   0.3524462
    0.3524462 ]
  [ 0.3615998   0.3615998   0.3615998  ...  0.3615998   0.3615998
    0.3615998 ]
  [-0.11838996 -0.11838996 -0.11838996 ... -0.11838996 -0.11838996
   -0.11838996]]

 [[-0.37149632 -0.37149632 -0.37149632 ... -0.37149632 -0.37149632
   -0.37149632]
  [ 0.29049385  0.29049385  0.29049385 ...  0.29049385  0.29049385
    0.29049385]
  [ 0.27328718  0.27328718  0.27328718 ...  0.27328718  0.27328718
    0.27328718]
  ...
  [-0.22933161 -0.22933161 -0.22933161 ... -0.22933161 -0.22933161
   -0.22933161]
  [ 0.07160628  0.07160628  0.07160628 ...  0.07160628  0.07160628
    0.07160628]
  [ 0.33962536  0.33962536  0.33962536 ...  0.33962536  0.33962536
    0.33962536]]

 [[ 0.21745765  0.21745765  0.21745765 ...  0.21745765  0.21745765
    0.21745765]
  [ 0.19408011  0.19408011  0.19408011 ...  0.19408011  0.19408011
    0.19408011]
  [-0.28868628 -0.28868628 -0.28868628 ... -0.28868628 -0.28868628
   -0.28868628]
  ...
  [ 0.06756687  0.06756687  0.06756687 ...  0.06756687  0.06756687
    0.06756687]
  [-0.40600073 -0.40600073 -0.40600073 ... -0.40600073 -0.40600073
   -0.40600073]
  [ 0.0485481   0.0485481   0.0485481  ...  0.0485481   0.0485481
    0.0485481 ]]
.
.
.
.
.
.
[ 0.36638904  0.36638904  0.36638904 ...  0.36638904  0.36638904
    0.36638904]]]

It seems weights does not seem to change rather grow in dimension. Is this how it supposed to be? This is how I constructed my model:

 import warnings
    import pandas as pd
    pd.options.mode.chained_assignment = None  # default='warn'
    
    
    import os
    os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' 
    import tensorflow as tf
    from tensorflow.keras import Input
    import tensorflow.keras.callbacks
    from keras.models import Sequential 
    from keras.layers.core import Dense
    from keras.optimizers import SGD,Adam
    from keras.models import Model
    from keras.layers import concatenate,Activation
    from keras.layers.advanced_activations import ELU
    from sklearn.metrics import classification_report
    from sklearn.preprocessing import LabelBinarizer
    from sklearn.model_selection import train_test_split
    import matplotlib.pyplot as plt
    import numpy as np
    from getWeights import GetWeights
    
    def build(layer_str):
        #take the input layer structure and convert it into a list
        layers=layer_str.split("-")
    
        
        #convert the strings in the list to integer
        layers=list(map(int,layers))
        
        #let's build our model
            
        #we add the first layer and the input layer to our network
        inputs = Input(shape=(layers[0],))
        H_inputs=inputs    
        
        #we add the hidden layers 
        Hidden_list=[]
        for (x,i) in enumerate(layers):
            if(x>0 and x!=(len(layers)-1)):
                layer=Dense(i)(H_inputs)
                Hidden_list.append(ELU(alpha=1.0)(layer))
                H_inputs=Hidden_list[-1]
    
        #then add the final layer        
        classifier = Dense(layers[-1],activation="sigmoid")(Hidden_list[-1])
    
        model = Model(inputs=inputs, outputs=classifier)
    
        return model
        
    def split(data,label,split_ratio):
        train_list=[]
        test_list=[]
        for a in data:
            split=round(len(a)*(1-split_ratio))
            train_list.append(a[:split])
            test_list.append(a[split:])
        for l in label:
            split=round(len(l)*(1-split_ratio))
            train_list.append(l[:split])
            test_list.append(l[split:])
        return train_list,test_list
    def train_eval(data,label,model,lr=0.01,epochs_in=100,batch_size_in=16):
        warnings.filterwarnings("ignore", category=FutureWarning)
        
        #split your data and labels into test and train data, we usually use 25% of the total data for testing                   
        initial_learning_rate=lr             
        
        #for merged model
        split_ratio=0.25
        train_list,test_list=split(data,label,split_ratio)
       
        #extract label
        trainY=train_list[-3:]
        del train_list[-3:]
        testY=test_list[-3:]
        del test_list[-3:]
            
        #training the network
        print("[INFO]Trainig the network....")
        decay_steps = 1000
        lr_decayed_fn = tf.keras.experimental.CosineDecay(initial_learning_rate, decay_steps)
        sgd=SGD(lr_decayed_fn,momentum=0.8)
        model.compile(loss=["categorical_crossentropy","mean_squared_error","mean_squared_error"],optimizer=sgd,metrics=["accuracy"])
        checkpoint_filepath = 'checkpoint1'
        model_checkpoint_callback=tf.keras.callbacks.ModelCheckpoint(filepath=checkpoint_filepath,
                                                                                       save_weights_only=True,
                                                                                       monitor='val_pred_accuracy',
                                                                                       mode='max', save_best_only=True) 
        gw = GetWeights()
        H=model.fit(train_list,trainY,validation_data=(test_list,testY),epochs=epochs_in,batch_size=batch_size_in,callbacks=[[model_checkpoint_callback],[gw]])
            
        #evalute the network
        
        print("[INFO]Evaluating the network....")
        predictions=model.predict(test_list,batch_size=batch_size_in)
        
       return(predictions)


    def Merge_model(layer,nbx,regress=False):
        model_list=[]
        for i in range(nbx):
            model=build(layer)
            model_list.append(model)
        merged_layers = concatenate([tf.convert_to_tensor(model_list[i].output) for i in range(nbx)])
        x = Dense(nbx,activation="relu")(merged_layers)
        out = Dense(nbx,activation="softmax",name="pred")(x)
        if(regress==True):
            adj1 = Dense(1, activation='linear',name="x")(x)
            adj2 = Dense(1, activation='linear',name="y")(x)
            merged_model = Model([model_list[i].input for i in range(nbx)], [out,adj1,adj2])
        else:
            merged_model = Model([model_list[i].input for i in range(nbx)], [out])
        
        return merged_model

This is how I Implemented it:

with open("dataframe.pkl","rb") as vector_file:
    vect_df=pickle.load(vector_file) 
    input_list=[np.stack(vect_df[str(i)]) for i in range(294) ]


#hyperparameters 
nbx=294  
lr=1e-8 
epochs=100 
batch_size=16

#input data 
data=input_list 
label_path=glob.glob("test_image/*.pkl") 
label=lb. read_label_file(label_path)

#if regressing uncomment the following 
label1=np.array([a[0] for a in label])
label2=np.array([a[1] for a in label]) 
label3=np.array([a[2] for a in label])
input_label=[label1,label2,label3] 


model=nn.Merge_model("17-7-1",nbx,regress=True)
plot_model(model, to_file='model.png',rankdir='LR')
prediction=nn.train_eval(data,input_label,model,lr,epochs,batch_size) 

The plot for my neural network: https://drive.google.com/file/d/1w_Obek1fzyrUBRfXilEBD4LH5urP0kal/view?usp=sharing

How many English words
do you know?
Test your English vocabulary size, and measure
how many words do you know
Online Test
Powered by Examplum