M2M100Tokenizer.from_pretrained 'NoneType' object is not callable
I have the following chunk of code from this link:
from transformers import M2M100ForConditionalGeneration, M2M100Tokenizer
hi_text = "जीवन एक चॉकलेट बॉक्स की तरह है।"
chinese_text = "生活就像一盒巧克力。"
model = M2M100ForConditionalGeneration.from_pretrained("facebook/m2m100_418M")
tokenizer = M2M100Tokenizer.from_pretrained("facebook/m2m100_418M")
Which gives me the following error:
'NoneType' object is not callable
I'm using Google Colab, but funnily enough it works perfectly fine in VSCode.
do you know?
how many words do you know
See also questions close to this topic
-
Pytorch model object has no attribute 'predict' BERT
I had train a BertClassifier model using pytorch. After creating my best.pt I would like to make in production my model and using it to predict and classifier starting from a sample, so I resume them from the checkpoint. Otherwise after put it in evaluation and freeze model, I use .predict to make in work on my sample but I'm encountering this Attribute Error. I had also inizialize it before calling the checkpoint. When I am wrong? Thank you for your help!
def save_ckp(state, is_best, checkpoint_path, best_model_path): """ function created to save checkpoint, the latest one and the best one. This creates flexibility: either you are interested in the state of the latest checkpoint or the best checkpoint. state: checkpoint we want to save is_best: is this the best checkpoint; min validation loss checkpoint_path: path to save checkpoint best_model_path: path to save best model """ f_path = checkpoint_path # save checkpoint data to the path given, checkpoint_path torch.save(state, f_path) # if it is a best model, min validation loss if is_best: best_fpath = best_model_path # copy that checkpoint file to best path given, best_model_path shutil.copyfile(f_path, best_fpath) def load_ckp(checkpoint_fpath, model, optimizer): """ checkpoint_path: path to save checkpoint model: model that we want to load checkpoint parameters into optimizer: optimizer we defined in previous training """ # load check point checkpoint = torch.load(checkpoint_fpath) # initialize state_dict from checkpoint to model model.load_state_dict(checkpoint['state_dict']) # initialize optimizer from checkpoint to optimizer optimizer.load_state_dict(checkpoint['optimizer']) # initialize valid_loss_min from checkpoint to valid_loss_min valid_loss_min = checkpoint['valid_loss_min'] # return model, optimizer, epoch value, min validation loss return model, optimizer, checkpoint['epoch'], valid_loss_min.item() #Create the BertClassfier class class BertClassifier(nn.Module): """Bert Model for Classification Tasks.""" def __init__(self, freeze_bert=True): """ @param bert: a BertModel object @param classifier: a torch.nn.Module classifier @param freeze_bert (bool): Set `False` to fine-tune the BERT model """ super(BertClassifier, self).__init__() ....... def forward(self, input_ids, attention_mask): ''' Feed input to BERT and the classifier to compute logits. @param input_ids (torch.Tensor): an input tensor with shape (batch_size, max_length) @param attention_mask (torch.Tensor): a tensor that hold attention mask information with shape (batch_size, max_length) @return logits (torch.Tensor): an output tensor with shape (batch_size, num_labels) ''' # Feed input to BERT outputs = self.bert(input_ids=input_ids, attention_mask=attention_mask) # Extract the last hidden state of the token `[CLS]` for classification task last_hidden_state_cls = outputs[0][:, 0, :] # Feed input to classifier to compute logits logits = self.classifier(last_hidden_state_cls) return logits def initialize_model(epochs): """ Initialize the Bert Classifier, the optimizer and the learning rate scheduler.""" # Instantiate Bert Classifier bert_classifier = BertClassifier(freeze_bert=False) # Tell PyTorch to run the model on GPU bert_classifier = bert_classifier.to(device) # Create the optimizer optimizer = AdamW(bert_classifier.parameters(), lr=lr, # Default learning rate eps=1e-8 # Default epsilon value ) # Total number of training steps total_steps = len(train_dataloader) * epochs # Set up the learning rate scheduler scheduler = get_linear_schedule_with_warmup(optimizer, num_warmup_steps=0, # Default value num_training_steps=total_steps) return bert_classifier, optimizer, scheduler def train(model, train_dataloader, val_dataloader, valid_loss_min_input, checkpoint_path, best_model_path, start_epochs, epochs, evaluation=True): """Train the BertClassifier model.""" # Start training loop logging.info("--Start training...\n") # Initialize tracker for minimum validation loss valid_loss_min = valid_loss_min_input for epoch_i in range(start_epochs, epochs): # ======================================= # Training # ======================================= # Print the header of the result table logging.info((f"{'Epoch':^7} | {'Batch':^7} | {'Train Loss':^12} | {'Val Loss':^10} | {'Val Acc':^9} | {'Elapsed':^9}")) # Measure the elapsed time of each epoch t0_epoch, t0_batch = time.time(), time.time() # Reset tracking variables at the beginning of each epoch total_loss, batch_loss, batch_counts = 0, 0, 0 # Put the model into the training mode model.train() # For each batch of training data... for step, batch in enumerate(train_dataloader): batch_counts +=1 # Load batch to GPU b_input_ids, b_attn_mask, b_labels = tuple(t.to(device) for t in batch) # Zero out any previously calculated gradients model.zero_grad() # Perform a forward pass. This will return logits. logits = model(b_input_ids, b_attn_mask) # Compute loss and accumulate the loss values loss = loss_fn(logits, b_labels) batch_loss += loss.item() total_loss += loss.item() # Perform a backward pass to calculate gradients loss.backward() # Clip the norm of the gradients to 1.0 to prevent "exploding gradients" torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0) # Update parameters and the learning rate optimizer.step() scheduler.step() # Print the loss values and time elapsed for every 20 batches if (step % 500 == 0 and step != 0) or (step == len(train_dataloader) - 1): # Calculate time elapsed for 20 batches time_elapsed = time.time() - t0_batch # Print training results logging.info(f"{epoch_i + 1:^7} | {step:^7} | {batch_loss / batch_counts:^12.6f} | {'-':^10} | {'-':^9} | {time_elapsed:^9.2f}") # Reset batch tracking variables batch_loss, batch_counts = 0, 0 t0_batch = time.time() # Calculate the average loss over the entire training data avg_train_loss = total_loss / len(train_dataloader) logging.info("-"*70) # ======================================= # Evaluation # ======================================= if evaluation == True: # After the completion of each training epoch, measure the model's performance # on our validation set. val_loss, val_accuracy = evaluate(model, val_dataloader) # Print performance over the entire training data time_elapsed = time.time() - t0_epoch logging.info(f"{epoch_i + 1:^7} | {'-':^7} | {avg_train_loss:^12.6f} | {val_loss:^10.6f} | {val_accuracy:^10.6f} | {time_elapsed:^9.2f}") logging.info("-"*70) logging.info("\n") # create checkpoint variable and add important data checkpoint = { 'epoch': epoch_i + 1, 'valid_loss_min': val_loss, 'state_dict': model.state_dict(), 'optimizer': optimizer.state_dict(), } # save checkpoint save_ckp(checkpoint, False, checkpoint_path, best_model_path) ## TODO: save the model if validation loss has decreased if val_loss <= valid_loss_min: print('Validation loss decreased ({:.6f} --> {:.6f}). Saving model ...'.format(valid_loss_min,val_loss)) # save checkpoint as best model save_ckp(checkpoint, True, checkpoint_path, best_model_path) valid_loss_min = val_loss logging.info("-----------------Training complete--------------------------") def evaluate(model, val_dataloader): """After the completion of each training epoch, measure the model's performance on our validation set.""" # Put the model into the evaluation mode. The dropout layers are disabled during the test time. model.eval() # Tracking variables val_accuracy = [] val_loss = [] # For each batch in our validation set... for batch in val_dataloader: # Load batch to GPU b_input_ids, b_attn_mask, b_labels = tuple(t.to(device) for t in batch) # Compute logits with torch.no_grad(): logits = model(b_input_ids, b_attn_mask) # Compute loss loss = loss_fn(logits, b_labels) val_loss.append(loss.item()) # Get the predictions preds = torch.argmax(logits, dim=1).flatten() # Calculate the accuracy rate accuracy = (preds == b_labels).cpu().numpy().mean() * 100 val_accuracy.append(accuracy) # Compute the average accuracy and loss over the validation set. val_loss = np.mean(val_loss) val_accuracy = np.mean(val_accuracy) return val_loss, val_accuracy bert_classifier, optimizer, scheduler = initialize_model(epochs=n_epochs) train(model = bert_classifier ......) bert_classifier, optimizer, scheduler = initialize_model(epochs=n_epochs) model, optimizer, start_epoch, valid_loss_min = load_ckp(r"./best_model/best_model.pt", bert_classifier, optimizer) model.eval() model.freeze() sample = { "seq": "ABCDE",} predictions = model.predict(sample)
AttributeError: 'BertClassifier' object has no attribute 'predict'
-
Where does transformers save microsoft/DialoGPT-small?
when I run the code
from transformers import AutoModelForCausalLM, AutoTokenizer import torch SML=input("small,medium or large: ").lower()#I pick small model_name = "microsoft/DialoGPT-"+SML tokenizer = AutoTokenizer.from_pretrained(model_name) model = AutoModelForCausalLM.from_pretrained(model_name)
It downloads microsoft/DialoGPT-small from somewhere on the internet and downloads it to my computer but I don`t know where the saved file is. Anyone have any idea where this file is saved?
#EDIT#
I found the below code in
\Lib\site-packages\transformers\models\dialogpt\convert_dialogpt_original_pytorch_checkpoint_to_pytorch.py
but have no idea what it means.def convert_dialogpt_checkpoint(checkpoint_path: str, pytorch_dump_folder_path: str): d = torch.load(checkpoint_path) d[NEW_KEY] = d.pop(OLD_KEY) os.makedirs(pytorch_dump_folder_path, exist_ok=True) torch.save(d, os.path.join(pytorch_dump_folder_path, WEIGHTS_NAME)) if __name__ == "__main__": parser = argparse.ArgumentParser() parser.add_argument("--dialogpt_path", default=".", type=str) args = parser.parse_args() for MODEL in DIALOGPT_MODELS: checkpoint_path = os.path.join(args.dialogpt_path, f"{MODEL}_ft.pkl") pytorch_dump_folder_path = f"./DialoGPT-{MODEL}" convert_dialogpt_checkpoint( checkpoint_path, pytorch_dump_folder_path, )
-
what is the correct transformer for generating text out of image features
I am working on a project about image captioning. I wanna make a full transformer model using SWIN as the encoder and for the decoder part I need another transformer to learn the captions in the COCO 2017 dataset and make text using features extracted from each image. the problem is I do not know if this is a good idea although I have found two papers with fully transformer architecture. they used only one encoder and a custom decoder not BERT or .. so I do not know and I can not decide which language transformer is good to use for the purpose.
-
Discord Bot // Voice Client Returns NoneType
Experimenting with Discord and Python followed a couple of guides and created a bot that could play music from streaming URLs.
However, now for the same code, I get Attribute Error: 'NoneType' object exceptions.
Exception snippet Console Snapshot
The actual method goes like this:
. . FFMPEG_OPTIONS = { "before_options": "-reconnect 1 -reconnect_streamed 1 -reconnect_delay_max 5", "options": "-vn -sn -dn" } @commands.command() async def radio(self,ctx,url): await self.join(ctx) await self.playStreamUrlLogic(ctx,url); async def playStreamUrlLogic(self, ctx, url): source = await discord.FFmpegOpusAudio.from_probe(url, **self.FFMPEG_OPTIONS) await ctx.voice_client.play(source) . .
Even though everything points to a non instantiated class when I debug I can see that there is a voice_client object with info Snapshot from Debbuger property viewer
I already tried to use FFMPegPCMAudio as well but with no results. Any insights would be helpful.
-
AttributeError: 'NoneType' object has no attribute 'gene_doc'
I have an async method
async def test_doc(item_id, order_id, acc_number): test_doc = None try: test_doc = TestGene(item_id, order_id, acc_number) gene = test_doc.determine_gene() await gene.gene_doc() except Exception as e: await test_doc.close_sessions() logging.exception(e)
and the main method
def determine_gene(self): order_item_details = self.async_gene_client.get_order_items(self.item_id) shippable_order_items = order_item_details.get("shippable_order_items") for shippable_order_item in shippable_order_items: shippable_type = shippable_order_item.get('type') acc_number = order_item_details.get("name") print(f'got shippable type {shippable_type}') if shippable_type == 'shippable_gene_type': return NewDocGene(self.item_id, self.order_id, acc_number=acc_number, order_item_details=order_item_details, shippable_type=shippable_type)
When I run my code, I am getting the following error;
AttributeError: 'NoneType' object has no attribute 'gene_doc'
Can anyone help me with this?
-
None difference for Python versions
I've read a lot about Python versions differences but never met those
Python 2.7.18 (default, Mar 8 2021, 13:02:45) [GCC 9.3.0] on linux2 Type "help", "copyright", "credits" or "license" for more information. >>> None.__eq__("abc") Traceback (most recent call last): File "<stdin>", line 1, in <module> AttributeError: 'NoneType' object has no attribute '__eq__'
and
Python 3.8.10 (default, Mar 15 2022, 12:22:08) [GCC 9.4.0] on linux Type "help", "copyright", "credits" or "license" for more information. >>> None.__eq__("abc") NotImplemented
I found it already being in Python 3.2
Python 3.2.6 (default, Jan 18 2016, 19:21:14) [GCC 4.9.2] on linux2 Type "help", "copyright", "credits" or "license" for more information. >>> None.__eq__("abc") NotImplemented
What I wan't to know if it is told somewhere in docs or somewhere. Didn't find anything here. Any sources?
-
why Vision Transformers key and query linar layer do not combine into one matrix
I study some vision transformers code (e.g. vit-pytorch) and found in attention module:
#x is input key=nn.Linear(...,bias=False)(x) query=nn.Linear(...,bias=False)(x) similar_matrix=torch.matmul(query,key.transpose(...))
because Linear can be considered as a matrix, I get
key=K^T @ x query=Q^T @ x similar_matrix = query^T @ key = x^T @ (Q @ K^T) @ x (K,Q means learnable matrix, @ means matmul, ^T means transpose)
here Q @ K^T , I think they can be combined into a matrix in order to reduce the amount of parameters and calculation
why not do this? is it because the training effect is not good?
-
Bert tokenizer wont work with tensor format (tensorflow)
This may be a silly question but im new using tf. I have the following code but the tokenizer wont use the strings inside the tensor.
import tensorflow as tf docs = tf.data.Dataset.from_tensor_slices([['hagamos que esto funcione.'], ["por fin funciona!"]]) from transformers import AutoTokenizer, DataCollatorWithPadding import numpy as np checkpoint = "dccuchile/bert-base-spanish-wwm-uncased" tokenizer = AutoTokenizer.from_pretrained(checkpoint) def tokenize(review): return tokenizer(review) tokens = docs.map(tokenize)
I get the folowing output:
ValueError: in user code: File "<ipython-input-54-3272cedfdcab>", line 13, in tokenize * return tokenizer(review) File "/usr/local/lib/python3.7/dist-packages/transformers/tokenization_utils_base.py", line 2429, in __call__ * raise ValueError( ValueError: text input must of type `str` (single example), `List[str]` (batch or single pretokenized example) or `List[List[str]]` (batch of pretokenized examples).
while my expected output is something like this:
tokenizer('esto al fin funciona!') {'input_ids': [4, 1202, 1074, 1346, 4971, 1109, 5], 'token_type_ids': [0, 0, 0, 0, 0, 0, 0], 'attention_mask': [1, 1, 1, 1, 1, 1, 1]}
Any idea how to make it work?
-
Why does tokeniser break down words that are present in vocab
In my understanding, what tokeniser does is that, given each word, the tokeniser will break down the word into sub-words only the word is not present in the
tokeniser.get_vocab()
def checkModel(model): tokenizer = AutoTokenizer.from_pretrained(model) allList = [] for word in tokenizer.get_vocab(): word = word.lower() tokens = tokenizer.tokenize(word) try: if word[0]!='#' and word[0]!='[' and tokens[0] != word: allList.append((word, tokens)) print(word, tokens) except: continue return allList checkModel('bert-base-uncased') # ideally should return an empty list
However, what I have observed is that some models on huggingface will break down words into smaller pieces even if the word is present in the vocab.
checkModel('emilyalsentzer/Bio_ClinicalBERT') output: welles ['well', '##es'] lexington ['le', '##xing', '##ton'] palestinian ['pale', '##st', '##inian'] ... elisabeth ['el', '##isa', '##beth'] alexander ['ale', '##xa', '##nder'] appalachian ['app', '##ala', '##chia', '##n'] mitchell ['mit', '##chel', '##l'] ... 4630 # tokens in vocab got broken down, not supposed to happen
I have checked a few models of this behaviour, was wondering why is this happening?
-
Python: Input type (torch.FloatTensor) and weight type (torch.cuda.FloatTensor) should be the same
I'm developing a Speech to Text model. However while training I'm getting the following error:
***** Running training ***** Num examples = 531 Num Epochs = 2 Instantaneous batch size per device = 8 Total train batch size (w. parallel, distributed & accumulation) = 8 Gradient Accumulation steps = 1 Total optimization steps = 134 --------------------------------------------------------------------------- RuntimeError Traceback (most recent call last) <ipython-input-72-9f65de613925> in <module>() 12 13 # with torch.nn.DataParallel(): ---> 14 trainer.train() 13 frames /usr/local/lib/python3.7/dist-packages/torch/nn/modules/conv.py in _conv_forward(self, input, weight, bias) 297 _single(0), self.dilation, self.groups) 298 return F.conv1d(input, weight, bias, self.stride, --> 299 self.padding, self.dilation, self.groups) 300 301 def forward(self, input: Tensor) -> Tensor: RuntimeError: Input type (torch.FloatTensor) and weight type (torch.cuda.FloatTensor) should be the same or input should be a MKLDNN tensor and weight is a dense tensor
I understand it is complaining that input is on CPU while the model is on GPU. Can someone please help me fix this? I'm not sure how to put input values on CUDA.
############# Training #################### import torch torch.cuda.empty_cache() from dataclasses import dataclass, field from typing import Any, Dict, List, Optional, Union @dataclass class DataCollatorCTCWithPadding: processor: Wav2Vec2Processor padding: Union[bool, str] = True max_length: Optional[int] = None max_length_labels: Optional[int] = None pad_to_multiple_of: Optional[int] = None pad_to_multiple_of_labels: Optional[int] = None def __call__(self, features: List[Dict[str, Union[List[int], torch.Tensor]]]) -> Dict[str, torch.Tensor]: # split inputs and labels since they have to be of different lenghts and need # different padding methods input_features = [{"input_values": feature["input_values"]} for feature in features] label_features = [{"input_ids": feature["labels"]} for feature in features] batch = self.processor.pad( input_features, padding=self.padding, max_length=self.max_length, pad_to_multiple_of=self.pad_to_multiple_of, return_tensors="pt", ) with self.processor.as_target_processor(): labels_batch = self.processor.pad( label_features, padding=self.padding, max_length=self.max_length_labels, pad_to_multiple_of=self.pad_to_multiple_of_labels, return_tensors="pt", ) # replace padding with -100 to ignore loss correctly labels = labels_batch["input_ids"].masked_fill(labels_batch.attention_mask.ne(1), -100) batch["labels"] = labels return batch data_collator = DataCollatorCTCWithPadding(processor=processor, padding=True) from transformers import Trainer torch.cuda.empty_cache() trainer = Trainer( model=model, data_collator=data_collator, args=training_args, compute_metrics=compute_metrics, train_dataset=train_data, eval_dataset=test_data, tokenizer=processor.feature_extractor, ) # with torch.nn.DataParallel(): trainer.train()
Link to notebook: https://colab.research.google.com/drive/1b5UmblSssdSfusXh-3R5PJ6y8phV8U3x?usp=sharing
-
Use 1 tokenizer or 2 tokenizers for translation task?
I’ve seen several tutorials about seq2seq tasks like translation. They usually use 2 tokenizers trained on corpus, one for source language and the other for target language. However, in huggingface’s translation task example, they just use one tokenizer for 2 languages. I wonder which is the better way, 1 tokenizer or 2 tokenizers? If i use 2 tokenizers then the output classes would be smaller and may be it can eliminate some tokens that target language doesn’t have, thus, improve the result or it is okay to use one tokenizer and the performance is still the same? Please, help me, thanks In advance!