Tensorflow error: ValueError: Shapes (128, 100) and (128, 100, 139) are incompatible
I try to use Functional API for my model, but i don't understand why i have error:
ValueError: Shapes (128, 100) and (128, 100, 139) are incompatible
My code:
input_tensor = Input(batch_input_shape=(batch_size,None))
x = Embedding(vocab_size, embed_dim)(input_tensor)
x = LSTM(rnn_neurons4, return_sequences=True, stateful=True)(x)
output_tensor = Dense(vocab_size, activation='softmax')(x)
model = Model(input_tensor, output_tensor)
model.summary()
Adam = tf.keras.optimizers.Adam(learning_rate=0.0001)
model.compile(optimizer=Adam, loss="categorical_crossentropy", metrics=['accuracy'])
fit code:
epochs = 1000
early_stop = EarlyStopping(monitor='loss', patience=25)
try:
model.fit(dataset,epochs=epochs, callbacks=[early_stop])
model.save('train.h5')
except KeyboardInterrupt:
model.save('train.h5')
1 answer
-
answered 2022-05-04 11:03
Артем Голуб
I don't know this is correct way or not.
I create my own function with sparse_categorical_crossential and add in model.compile
do you know?
how many words do you know
See also questions close to this topic
-
how to print all parameters of a keras model
I am trying to print all the 1290 parameters in
dense_1
layer, butmodel.get_weights()[7]
only show 10 parameters. How could I print all the 1290 parameters ofdense_1
layer? What is the difference betweenmodel.get_weights()
andmodel.layer.get_weights()
>model.get_weights()[7] array([-2.8552295e-04, -4.3254648e-03, -1.8752701e-04, 2.3482188e-03, -3.4848123e-04, 7.6121779e-04, -2.7494309e-06, -1.9068648e-03, 6.0777756e-04, 1.9550985e-03], dtype=float32) >model.summary() Model: "sequential" _________________________________________________________________ Layer (type) Output Shape Param # ================================================================= conv2d (Conv2D) (None, 26, 26, 32) 320 conv2d_1 (Conv2D) (None, 24, 24, 64) 18496 max_pooling2d (MaxPooling2D (None, 12, 12, 64) 0 ) dropout (Dropout) (None, 12, 12, 64) 0 flatten (Flatten) (None, 9216) 0 dense (Dense) (None, 128) 1179776 dropout_1 (Dropout) (None, 128) 0 dense_1 (Dense) (None, 10) 1290 _________________________________________________________________ ================================================================= Total params: 1,199,882 Trainable params: 1,199,882 Non-trainable params: 0 _________________________________________________________________
-
Can't use Keras MeanIoU to train semantic segmentation model
I'm working on a binary semantic segmentation problem. I built an UNet model with MobileNetV2 backbone. Here is my model code:
def upsample(filters, size, apply_dropout=False): initializer = tf.random_normal_initializer(0., 0.02) layer = Sequential() layer.add(layers.Conv2DTranspose(filters, size, strides=2, padding='same', kernel_initializer=initializer, use_bias=False)) layer.add(layers.BatchNormalization()) if apply_dropout: layer.add(layers.Dropout(0.5)) layer.add(layers.ReLU()) return layer def UNet(image_size, num_classes): inputs = Input(shape=image_size + (3,)) base_model = applications.MobileNetV2(input_shape=image_size + (3,), include_top=False) layer_names = [ 'block_1_expand_relu', 'block_3_expand_relu', 'block_6_expand_relu', 'block_13_expand_relu', 'block_16_project', ] base_model_outputs = [base_model.get_layer(name).output for name in layer_names] down_stack = Model(inputs=base_model.input, outputs=base_model_outputs) down_stack.trainable = False up_stack = [ upsample(512, 3), upsample(256, 3), upsample(128, 3), upsample(64, 3) ] skips = down_stack(inputs) x = skips[-1] skips = reversed(skips[:-1]) for up, skip in zip(up_stack, skips): x = up(x) x = layers.Concatenate()([x, skip]) outputs = layers.Conv2DTranspose(filters=num_classes, kernel_size=3, strides=2, padding='same')(x) return Model(inputs, outputs)
To load the images and masks for training, I built an image loader inherits from
keras.Sequnce
.class ImageLoader(utils.Sequence): def __init__(self, batch_size, img_size, img_paths, mask_paths): self.batch_size = batch_size self.img_size = img_size self.img_paths = img_paths self.mask_paths = mask_paths def __len__(self): return len(self.mask_paths) // self.batch_size def __getitem__(self, idx): i = idx * self.batch_size batch_img_paths = self.img_paths[i:i + self.batch_size] batch_mask_paths = self.mask_paths[i:i + self.batch_size] x = np.zeros((self.batch_size,) + self.img_size + (3,), dtype='float32') for j, path in enumerate(batch_img_paths): img = utils.load_img(path, target_size=self.img_size) img = utils.img_to_array(img) x[j] = img y = np.zeros((self.batch_size,) + self.img_size + (1,), dtype='uint8') for j, path in enumerate(batch_mask_paths): img = utils.load_img(path, target_size=self.img_size, color_mode='grayscale') img = utils.img_to_array(img) # [0, 255] -> [0, 1] img //= 255 y[j] = img return x, y
In my segmentation problem, all the labels are in the range [0, 1]. However, when I try to compile and then fit the model using Adam optimizer, Sparse categorical cross entropy loss and metric
tf.keras.metrics.MeanIoU
, I encountered with the following problem:Node: 'confusion_matrix/assert_non_negative_1/assert_less_equal/Assert/AssertGuard/Assert' 2 root error(s) found. (0) INVALID_ARGUMENT: assertion failed: [`predictions` contains negative values. ] [Condition x >= 0 did not hold element-wise:] [x (confusion_matrix/Cast:0) = ] [-1 -1 -1...] [[{{node confusion_matrix/assert_non_negative_1/assert_less_equal/Assert/AssertGuard/Assert}}]] [[confusion_matrix/assert_less_1/Assert/AssertGuard/pivot_f/_31/_67]] (1) INVALID_ARGUMENT: assertion failed: [`predictions` contains negative values. ] [Condition x >= 0 did not hold element-wise:] [x (confusion_matrix/Cast:0) = ] [-1 -1 -1...] [[{{node confusion_matrix/assert_non_negative_1/assert_less_equal/Assert/AssertGuard/Assert}}]]
At first, I used accuracy as a metrics for training and I didn't encounter this problem, however when I changed to MeanIoU, this problem happened. Does anyone know how to fix this problem? Thank you very much!
UPDATE: I've searched on StackOverflow and found this question about a similar error, however the fix mentioned in that link (reduce learning rate) doesn't work in my case.
-
Training plot is not appearing properly for keras model
I have data where I need to train it with X and Y. Traning part is done but when I want to plot the prediction and actual data, it is appearing with so many lines instead of showing just non-linear regression line.
model= Sequential() model.add(Dense(7,input_dim=1, activation="tanh")) model.add(Dense(1)) model.compile(loss="mse", optimizer=tf.keras.optimizers.Adam(learning_rate=0.001), metrics= ["mae"]) history=model.fit(X,Y,epochs=1000) predict=model.predict(X) plt.scatter(X, Y,edgecolors='g') plt.plot(X, predict,'r') plt.legend([ 'Predictated Y' ,'Actual Y']) plt.show()
Please see the attached imageplotting image
-
"Module not Found Error : No module named albumentations"
I'm writing a code with albumentations function and it will give me this error
ModuleNotFoundError: No module named 'albumentations'
I also installed this package.
This is my code:
import albumentations as alb
-
Dividing a large file into smaller ones for training
I have a very large file and I want to divide it into smaller ones for training. I've read about pickle files, so I split the large file into training-validation. Then, I divided the training file (about 1000000 datapoints) into ~60 pickle files. The testing file was divided into 5 pickles files.
Now I am confused on how to train these 60 files and then testing them with all the 5 testing files.
My strategy was run the network with the first training pickle file with the first testing pickle file. Then, save the weights after finishing the first training part. Then, load the weights and complete all the training pickle files with the first testing and move on to the next testing pickle file and do the same training process again saving the weights after each run... etc.
I don't this this strategy is correct because I am going to train again and test which will produce wrong unstable results.
Some recommend doing the training in parallel. For example, train the first training file in one epoch without validation, then move on to the second training file in the second epoch taking the weights that was saved from the first one and so on until I got through all the training files. Then do the validation later on the trained network. I don't think I know how to do this.
Note, I use Siamese network here to train images to find similar pairs.
here is my code so far,
First the pickle files part. I change the number when the training is done.
#train data pickle_in = open(path + "TrainPairs1.pickle", "rb") trainPixel = pickle.load(pickle_in) trainPixel = np.array(trainPixel) print(trainPixel.shape) tr_pairs = trainPixel.reshape(40000,2,71,71,1) #train labels pickle_in = open(path + "TrainLabels1.pickle", "rb") tr_y = pickle.load(pickle_in) tr_y = np.array(tr_y) pickle_in = open(path + "TestPairs1.pickle", "rb") testPixel = pickle.load(pickle_in) testPixel = np.array(testPixel) print(testPixel.shape) te_pairs = testPixel.reshape(40000,2,71,71,1) print(te_pairs.shape) pickle_in = open(path + "TestLabels1.pickle", "rb") te_y = pickle.load(pickle_in) te_y = np.array(te_y) print(te_y.shape)
Build the network:
# network definition base_network = create_base_network(input_shape) input_a = Input(shape=input_shape) input_b = Input(shape=input_shape) processed_a = base_network(input_a) processed_b = base_network(input_b) distance = Lambda(euclidean_distance, output_shape=eucl_dist_output_shape)([processed_a, processed_b]) model = Model([input_a, input_b], distance)
Here to load the saved weights:
#model.load_weights("/SavedWeights/weights.ckpt")
Train the network:
rms = RMSprop() model.compile(loss=contrastive_loss, optimizer='adam', metrics=[accuracy]) history = model.fit([tr_pairs[:, 0], tr_pairs[:, 1]], tr_y, batch_size=128, epochs=epochs, validation_data=([te_pairs[:, 0], te_pairs[:, 1]], te_y)) #Training prediction y_pred_tr = model.predict([tr_pairs[:, 0], tr_pairs[:, 1]]) #validation prediction y_pred_te = model.predict([te_pairs[:, 0], te_pairs[:, 1]]) #save weights.. base_network.save("/my_model") model.save_weights("/Saved_Weights/weights.ckpt")
Please help me in running these files correctly with my network.
-
Define flatten layer in neural network using pytorch
I'm trying to define a flatten layer before initiating fully connected layer. As my input is a tensor with shape
(512, 2, 2)
, so I want to flatten this tensor before FC layers.I used to get this error:
empty(): argument 'size' must be tuple of ints, but found element of type Flatten at pos 2
import torch.nn as nn class Network(nn.Module): def __init__(self): super(Network,self).__init__() self.flatten=nn.Flatten() self.fc1=nn.Linear(self.flatten,512) self.fc2=nn.Linear(512,256) self.fc3=nn.Linear(256,3) def forward(self,x): x=self.flatten(x) # Flatten layer x=torch.ReLU(self.fc1(x)) x=torch.ReLU(self.fc2(x)) x=torch.softmax(self.fc3(x)) return x
-
Preparing input data for LSTM layer with conditions
I have a data frame that looks like the one below:
DF.head(20): time var1 var2 prob 12:30 10 12 85 12:31 15 45 85 12:32 18 12 85 12:33 17 26 85 12:34 11 14 85 12:35 14 65 85 12:36 19 29 92 12:37 15 32 92 12:38 13 44 92 12:39 15 33 92 12:40 11 15 92 12:41 15 45 92 12:42 13 44 94 12:43 15 33 94 12:44 11 15 94 12:45 15 45 94 12:46 13 44 92 12:47 15 33 92 12:48 11 15 92 12:49 15 45 92
I want to predict the value of prob for a sequence of 6 previous values. So for the given example, I will take two-time series -> var1 and var2 from time 12:30 to 12:35 to predict prob for 12:35. the input shape that will go to LSTM as per my knowledge will be (df. shape[0],6,1). but I do not know how to convert my input from 2 dimensions to 3 dimensions. I also have a condition where I need to see the previous 6 times only if they are all under the same prob value. so in the given example, I won't be able to take the previous 6 values for prob = 94 as 94 occurs only 4 times and I cannot make 6 timesteps from that.
My pseudo code looks like this:
for i in range(df.shape[0]): #loop across all rows if final_df[i,'prob'] == final_df[i+1,'prob']: #go until the value of prob change make multiple non overlaping dataframes of shape (6,2) else: continue
I need help building the logic and preparing the input data for my LSTM.
-
Replace bidirectional LSTM with GRU in coref?
I am training the coarse-to-fine coreference model (for some other language than English) from Allennlp with template configs from bert_lstm.jsonnet. When I replace the type “lstm” of the context layer with “gru”, it works, but seems to have very little impact on training. The same 63 GB of RAM are consumed each epoch, validation f1-score is hovering around the same value. Is this change in config actually replace Bi-LSTM layer with Bi-GRU layer, or am I missing something?
"context_layer": { "type": "gru", "bidirectional": true, "hidden_size": gru_dim, "input_size": bert_dim, "num_layers": 1 },
-
NaNs in predictions with LSTM
I have an LSTM model that I have trained and tested it with a dataset. Now I want to test it to an other dataset and I use the following snippet:
from keras.models import load_model import pandas as pd import numpy as np import matplotlib.pyplot as plt from sklearn.preprocessing import MinMaxScaler from sklearn.metrics import mean_squared_error from sklearn.metrics import mean_absolute_percentage_error model = load_model('lstm.h5') df = pd.read_csv('datasets/Residential_4.csv') data = df['energy_kWh'].values data = data.reshape((-1,1)) scaler = MinMaxScaler(feature_range=(0,1)) data = scaler.fit_transform(data) lookback = 7 * 24 prediction_horizon = 24 X_test, Y_test = Create_Dataset(data, lookback, prediction_horizon) X_test = np.reshape(X_test, (X_test.shape[0], X_test.shape[1], 1)) test_predict = model.predict(X_test) test_predict = scaler.inverse_transform(test_predict) Y_test = scaler.inverse_transform(Y_test) def Create_Dataset(df, lookback=1, prediction_horizon=1): X, Y = [], [] for i in range(lookback, len(df)-lookback): X.append(df[i-lookback : i, 0]) Y.append(df[i : i + prediction_horizon, 0]) return np.array(X), np.array(Y)
The problem however is that the
test_predict
hasNaN
values after the row 96. Any idea of why is this happening? -
Keras-tuning can't find callback
I am using keras-tuner in order to obtain the best set of hyperparameters for my model. Here is the training script:
tuner = kt.Hyperband( hypermodel=build_model, objective="val_accuracy", max_epochs=1000, factor=3, hyperband_iterations=1, directory=TrainingSpecific.SAVE_DIR, project_name="cnn_tunning" ) early_stop_callback = tf.keras.callbacks.EarlyStopping(monitor='val_loss', mode='min', patience=10, min_delta=0.0002) train_filenames = get_filenames(f'/*train_fold.csv') val_filenames = get_filenames(f'/*val_fold.csv') x_train_list, y_train_list = data_reader(train_filenames) training_generator = custom_generator(x_train_list, y_train_list) x_val_list, y_val_list = data_reader(val_filenames) validation_generator = custom_generator(x_val_list, y_val_list) print("********** Begin search **********") tuner.search( training_generator, steps_per_epoch=len(train_filenames), validation_data=validation_generator, validation_steps=len(val_filenames), callbacks=[early_stop_callback], workers=1 ) # grab the best hyperparameters print("********** End of search **********") bestHP = tuner.get_best_hyperparameters(num_trials=1)[0]
Now what I have found is that after the hyperband starts using a decent number of iterations and the callback I set up should come into play I get this error:
W tensorflow/core/framework/op_kernel.cc:1733] INVALID_ARGUMENT: ValueError: Could not find callback with key=pyfunc_11900 in the registry.
However it just proceeds to the next trial so I'm not sure what is going on, can someone explain why it can't find the callback?
I'm using
tensorflow 2.8
andkeras-tuner 1.1.2
-
File system for s3 already registered when importing tensorflow_io
I installed tensorflow-io with
pip install tensorflow-io
, when I import it I get:tensorflow.python.framework.errors_impl.AlreadyExistsError: File system for s3 already registered
.
The trace is this.import tensorflow_io as tfio File "/opt/miniconda/lib/python3.7/site-packages/tensorflow_io/__init__.py", line 17, in <module> from tensorflow_io.python.api import * # pylint: disable=wildcard-import File "/opt/miniconda/lib/python3.7/site-packages/tensorflow_io/python/api/__init__.py", line 19, in <module> from tensorflow_io.python.ops.io_dataset import IODataset File "/opt/miniconda/lib/python3.7/site-packages/tensorflow_io/python/ops/__init__.py", line 96, in <module> plugin_ops = _load_library("libtensorflow_io_plugins.so", "fs") File "/opt/miniconda/lib/python3.7/site-packages/tensorflow_io/python/ops/__init__.py", line 64, in _load_library l = load_fn(f) File "/opt/miniconda/lib/python3.7/site-packages/tensorflow_io/python/ops/__init__.py", line 56, in <lambda> load_fn = lambda f: tf.experimental.register_filesystem_plugin(f) is None File "/opt/miniconda/lib/python3.7/site-packages/tensorflow/python/framework/load_library.py", line 178, in register_filesystem_plugin py_tf.TF_RegisterFilesystemPlugin(plugin_location)
Can't get away from this problem, any ideas?
-
Group By and Sort a Tensorflow Dataset
I would like to group rows in a tensorflow dataset by a key and select top k rows in each group by some value. This is easily doable ex. in Pandas or SQL, but not so obvious in TF.
I found in tf.experimental group_by_window and group_by_reducer, but I can't figure out how to sort a dataset by a specific column.
My dataset has Dict structure for the rows. What I am looking for is smth like:
from tensorflow.data.experimental import group_by_window def key_f(row): return row['id'] def reduce_func(key, ds): # sort by a value - except there is no method like this... ds=ds.sort(by='value') return ds.take(5) t = group_by_window(key_func = key_f, reduce_func = reduce_func, window_size=100) ds = dataset.apply(t)
-
Is there a way to get the real sequence_length in the model description of a RNN/LSTM in Keras?
I would like to get to know the real sequence_length in Keras for a LSTM/RNN. Unfortunately, when I print the model I only get
None
all the time as a value. Here is a part of the code:model = keras.models.Sequential([ keras.layers.SimpleRNN(iteration_NN_L1, return_sequences=True, input_shape=[None, numberOfInputFeatures]), keras.layers.SimpleRNN(iteration_NN_L2, return_sequences=True), keras.layers.Conv1D(160, kernel_size=3, strides=2), keras.layers.Dense(numberOfOutputNeurons) ]) print(model.summary())
This leads to the following output:
Layer (type) Output Shape Param # ================================================================= simple_rnn_48 (SimpleRNN) (None, None, 5) 45 _________________________________________________________________ simple_rnn_49 (SimpleRNN) (None, None, 5) 55 _________________________________________________________________ conv1d_23 (Conv1D) (None, None, 160) 2560 _________________________________________________________________ dense_22 (Dense) (None, None, 1) 161 ================================================================= Total params: 2,821 Trainable params: 2,821 Non-trainable params: 0
So both for the batchsize and the sequence length I always get None and I would like to know if there is a way to get the real output of layer in a Sequential RNN/LSTM model.
-
tensorflow model saving JSON serializable error
Why when I do model.save() on the model without the scalars it saves fine, but when I add the scalars I get a serialization error? (Both models train fine). Input sequences in this case are of batch size 1 and sequence length 10.
class my_rnn(CustomBaseModel): def __init__(self, in_shape, in_types, out_types, activations=None, losses=None, *args, **kwargs): super().__init__(in_shape=in_shape, in_types=in_types, out_types=out_types, activations=activations, losses=losses) if use_scalars: in_shape_imgs = in_shape[0] else: in_shape_imgs = np.array(in_shape)[0] input_imgs = tf.keras.Input(in_shape_imgs[1:], batch_size=1) self.l1 = tf.keras.layers.ConvLSTM2D(32, kernel_size=(7, 7), padding='valid', return_sequences=True, stateful=True)(input_imgs) x = tf.keras.layers.Activation('relu')(self.l1) x = tf.keras.layers.MaxPooling3D(pool_size=(1, 2, 2))(x) x = tf.keras.layers.Dense(320)(x) x = tf.keras.layers.Activation('relu')(x) x = tf.keras.layers.Dropout(0.5)(x) img_output = tf.keras.layers.Flatten()(x) # self.l6 = tf.keras.layers.LSTM(64)(x, stateful=True)(x) # Scalars if use_scalars: in_shape_scalars = np.array(in_shape[1]) input_scalars = tf.keras.Input(in_shape_scalars[1:], batch_size=1) self.lpose1 = tf.keras.layers.LSTM(32, return_sequences=True, stateful=True)(input_scalars) x = tf.keras.layers.Activation('relu')(self.lpose1) x = tf.keras.layers.Dense(150)(x) x = tf.keras.layers.Activation('relu')(x) x = tf.keras.layers.Dropout(0.5)(x) pose_output = tf.keras.layers.Flatten()(x) x = tf.keras.layers.concatenate([img_output, pose_output]) inputs = [input_imgs, input_scalars] else: x = img_output inputs = [input_imgs] x = tf.keras.layers.Dense(150)(x) x = tf.keras.layers.Activation('relu')(x) x = tf.keras.layers.Dropout(0.5)(x) out = tf.keras.layers.Dense(12, activation='linear')(x) self.model = Model(inputs, out, name="pose_model")
Traceback (most recent call last): File "/home/alberto/Developer/RouxNN/tracking_error/rnn_experimentation/run_rnn_model.py", line 494, in model = compile_and_fit(model, windows, windows_test) File "/home/alberto/Developer/RouxNN/tracking_error/rnn_experimentation/run_rnn_model.py", line 310, in compile_and_fit model.save(model_path,save_format='tf') File "/home/alberto/anaconda3/envs/tf-roux/lib/python3.8/site-packages/tensorflow/python/keras/engine/training.py", line 2001, in save save.save_model(self, filepath, overwrite, include_optimizer, save_format, File "/home/alberto/anaconda3/envs/tf-roux/lib/python3.8/site-packages/tensorflow/python/keras/saving/save.py", line 156, in save_model saved_model_save.save(model, filepath, overwrite, include_optimizer, File "/home/alberto/anaconda3/envs/tf-roux/lib/python3.8/site-packages/tensorflow/python/keras/saving/saved_model/save.py", line 89, in save save_lib.save(model, filepath, signatures, options) File "/home/alberto/anaconda3/envs/tf-roux/lib/python3.8/site-packages/tensorflow/python/saved_model/save.py", line 1032, in save _, exported_graph, object_saver, asset_info = _build_meta_graph( File "/home/alberto/anaconda3/envs/tf-roux/lib/python3.8/site-packages/tensorflow/python/saved_model/save.py", line 1198, in _build_meta_graph return _build_meta_graph_impl(obj, signatures, options, meta_graph_def) File "/home/alberto/anaconda3/envs/tf-roux/lib/python3.8/site-packages/tensorflow/python/saved_model/save.py", line 1162, in _build_meta_graph_impl object_graph_proto = _serialize_object_graph(saveable_view, File "/home/alberto/anaconda3/envs/tf-roux/lib/python3.8/site-packages/tensorflow/python/saved_model/save.py", line 754, in _serialize_object_graph _write_object_proto(obj, obj_proto, asset_file_def_index, File "/home/alberto/anaconda3/envs/tf-roux/lib/python3.8/site-packages/tensorflow/python/saved_model/save.py", line 800, in _write_object_proto metadata=obj._tracking_metadata) File "/home/alberto/anaconda3/envs/tf-roux/lib/python3.8/site-packages/tensorflow/python/keras/engine/base_layer.py", line 3079, in _tracking_metadata return self._trackable_saved_model_saver.tracking_metadata File "/home/alberto/anaconda3/envs/tf-roux/lib/python3.8/site-packages/tensorflow/python/keras/saving/saved_model/base_serialization.py", line 55, in tracking_metadata return json_utils.Encoder().encode(self.python_properties) File "/home/alberto/anaconda3/envs/tf-roux/lib/python3.8/site-packages/tensorflow/python/keras/saving/saved_model/json_utils.py", line 53, in encode return super(Encoder, self).encode(_encode_tuple(obj)) File "/home/alberto/anaconda3/envs/tf-roux/lib/python3.8/json/encoder.py", line 199, in encode chunks = self.iterencode(o, _one_shot=True) File "/home/alberto/anaconda3/envs/tf-roux/lib/python3.8/json/encoder.py", line 257, in iterencode return _iterencode(o, 0) File "/home/alberto/anaconda3/envs/tf-roux/lib/python3.8/site-packages/tensorflow/python/keras/saving/saved_model/json_utils.py", line 50, in default return serialization.get_json_type(obj) File "/home/alberto/anaconda3/envs/tf-roux/lib/python3.8/site-packages/tensorflow/python/util/serialization.py", line 79, in get_json_type raise TypeError('Not JSON Serializable:', obj) TypeError: ('Not JSON Serializable:', <tf.Tensor: shape=(), dtype=int32, numpy=10>)
-
How to setup LSTM to use n-grams instead of sequence length?
I currently have an LSTM which uses sequence length as input, but this only allows the LSTM to predict when the input length is equal to the used sequence length. But i want the LSTM to use n-grams so that i can predict full words.
Examples:
So for input(Sequence length = 10):
Input: "no sweet t" Ouput(5 options): "['he ', 'o ', 'aste ', 'ime ', 'errible ']"
What I want is that input can be:
Input: "No sweet" Output: ['tea', 'taste', 'but', 'the', 'and']
So that i can predict full words and am not constrained by the sequence length.
My current code:
#Setup import numpy as np import tensorflow as tf from numpy.core.multiarray import dtype from tensorflow.keras.models import Sequential, load_model from tensorflow.keras.layers import Dense, Activation from tensorflow.keras.layers import LSTM, Dropout from tensorflow.keras.layers import TimeDistributed from tensorflow.keras.layers import Dense, Activation, Dropout, RepeatVector from tensorflow.keras.optimizers import RMSprop import matplotlib matplotlib.use('agg') import matplotlib.pyplot as plt import pickle import sys import heapq import seaborn as sns from pylab import rcParams #Loading the data path = 'text_2.txt' text = open(path, encoding='utf8').read().lower() # print ('Corpus length: ',len(text)) #Preprocessing #Finding all the unique characters in the corpus chars = sorted(list(set(text))) char_indices = dict((c, i) for i, c in enumerate(chars)) indices_char = dict((i, c) for i, c in enumerate(chars)) print ("unique chars: ",len(chars)) #Cutting the corpus into chunks of 10 chars, spacing the sequences by 3 characters #We will additionally store the next character (the one we need to predict) for every sequence SEQUENCE_LENGTH = 10 step = 3 sentences = [] next_chars = [] for i in range(0, len(text) - SEQUENCE_LENGTH, step): sentences.append(text[i:i+SEQUENCE_LENGTH]) next_chars.append(text[i+SEQUENCE_LENGTH]) print ('num training examples: ',len(sentences)) #Generating features and labels. #Using previously generated sequences and characters that need to be predicted to create one-hot encoded vectors X = np.zeros((len(sentences), SEQUENCE_LENGTH, len(chars)), dtype=np.bool) y = np.zeros((len(sentences), len(chars)), dtype=np.bool) for i, sentence in enumerate(sentences): for t, char in enumerate(sentence): X[i, t, char_indices[char]] = 1 y[i, char_indices[next_chars[i]]] = 1 #Building the model model = Sequential(); model.add(LSTM(128, input_shape=(SEQUENCE_LENGTH, len(chars)))) model.add(Dense(len(chars))) model.add(Activation('softmax')) #Training optimizer = RMSprop(lr= 0.01) model.compile(loss='categorical_crossentropy', optimizer=optimizer, metrics=['accuracy']) history = model.fit(X, y, validation_split=0.05, batch_size=128, epochs=1, shuffle=True).history #Predicting #Testing def prepare_input(text): x = np.zeros((1, SEQUENCE_LENGTH, len(chars))) for t, char in enumerate(text): x[0, t, char_indices[char]] = 1 return x #The sequences must be 40 chars long and the tensor is of the shape (1, 40, 57) #The sample function #This function allows us to ask our model what are the next probable characters (The heap simplifies the job) def sample(preds, top_n = 3): preds = np.asarray(preds).astype('float64') preds = np.log(preds) exp_preds = np.exp(preds) preds = exp_preds / np.sum(exp_preds) return heapq.nlargest(top_n, range(len(preds)), preds.take) #Prediction function def predict_completion(text): original_text = text generalised = text completion = '' while True: x = prepare_input(text) preds = model.predict(x, verbose=0)[0] next_index = sample(preds, top_n=1)[0] next_char = indices_char[next_index] text = text[1:] + next_char completion += next_char if len(original_text + completion) + 2 > len(original_text) and next_char == ' ': return completion #This methods wraps everything and allows us to predict multiple completions def predict_completions(text, n = 3): x = prepare_input(text) preds = model.predict(x, verbose=0)[0] next_indices = sample(preds, n) return [indices_char[idx] + predict_completion(text[1:] + indices_char[idx]) for idx in next_indices]