Where does transformers save microsoft/DialoGPT-small?
when I run the code
from transformers import AutoModelForCausalLM, AutoTokenizer
import torch
SML=input("small,medium or large: ").lower()#I pick small
model_name = "microsoft/DialoGPT-"+SML
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(model_name)
It downloads microsoft/DialoGPT-small from somewhere on the internet and downloads it to my computer but I don`t know where the saved file is. Anyone have any idea where this file is saved?
#EDIT#
I found the below code in \Lib\site-packages\transformers\models\dialogpt\convert_dialogpt_original_pytorch_checkpoint_to_pytorch.py
but have no idea what it means.
def convert_dialogpt_checkpoint(checkpoint_path: str, pytorch_dump_folder_path: str):
d = torch.load(checkpoint_path)
d[NEW_KEY] = d.pop(OLD_KEY)
os.makedirs(pytorch_dump_folder_path, exist_ok=True)
torch.save(d, os.path.join(pytorch_dump_folder_path, WEIGHTS_NAME))
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument("--dialogpt_path", default=".", type=str)
args = parser.parse_args()
for MODEL in DIALOGPT_MODELS:
checkpoint_path = os.path.join(args.dialogpt_path, f"{MODEL}_ft.pkl")
pytorch_dump_folder_path = f"./DialoGPT-{MODEL}"
convert_dialogpt_checkpoint(
checkpoint_path,
pytorch_dump_folder_path,
)
do you know?
how many words do you know
See also questions close to this topic
-
Python File Tagging System does not retrieve nested dictionaries in dictionary
I am building a file tagging system using Python. The idea is simple. Given a directory of files (and files within subdirectories), I want to filter them out using a filter input and tag those files with a word or a phrase.
If I got the following contents in my current directory:
data/ budget.xls world_building_budget.txt a.txt b.exe hello_world.dat world_builder.spec
and I execute the following command in the shell:
py -3 tag_tool.py -filter=world -tag="World-Building Tool"
My output will be:
These files were tagged with "World-Building Tool": data/ world_building_budget.txt hello_world.dat world_builder.spec
My current output isn't exactly like this but basically, I am converting all files and files within subdirectories into a single dictionary like this:
def fs_tree_to_dict(path_): file_token = '' for root, dirs, files in os.walk(path_): tree = {d: fs_tree_to_dict(os.path.join(root, d)) for d in dirs} tree.update({f: file_token for f in files}) return tree
Right now, my dictionary looks like this:
key:''
.In the following function, I am turning the empty values
''
into empty lists (to hold my tags):def empty_str_to_list(d): for k,v in d.items(): if v == '': d[k] = [] elif isinstance(v, dict): empty_str_to_list(v)
When I run my entire code, this is my output:
hello_world.dat ['World-Building Tool'] world_builder.spec ['World-Building Tool']
But it does not see
data/world_building_budget.txt
. This is the full dictionary:{'data': {'world_building_budget.txt': []}, 'a.txt': [], 'hello_world.dat': [], 'b.exe': [], 'world_builder.spec': []}
This is my full code:
import os, argparse def fs_tree_to_dict(path_): file_token = '' for root, dirs, files in os.walk(path_): tree = {d: fs_tree_to_dict(os.path.join(root, d)) for d in dirs} tree.update({f: file_token for f in files}) return tree def empty_str_to_list(d): for k, v in d.items(): if v == '': d[k] = [] elif isinstance(v, dict): empty_str_to_list(v) parser = argparse.ArgumentParser(description="Just an example", formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument("--filter", action="store", help="keyword to filter files") parser.add_argument("--tag", action="store", help="a tag phrase to attach to a file") parser.add_argument("--get_tagged", action="store", help="retrieve files matching an existing tag") args = parser.parse_args() filter = args.filter tag = args.tag get_tagged = args.get_tagged current_dir = os.getcwd() files_dict = fs_tree_to_dict(current_dir) empty_str_to_list(files_dict) for k, v in files_dict.items(): if filter in k: if v == []: v.append(tag) print(k, v) elif isinstance(v, dict): empty_str_to_list(v) if get_tagged in v: print(k, v)
-
Actaully i am working on a project and in it, it is showing no module name pip_internal plz help me for the same. I am using pycharm(conda interpreter
File "C:\Users\pjain\AppData\Local\Programs\Python\Python310\lib\runpy.py", line 196, in _run_module_as_main return _run_code(code, main_globals, None, File "C:\Users\pjain\AppData\Local\Programs\Python\Python310\lib\runpy.py", line 86, in _run_code exec(code, run_globals) File "C:\Users\pjain\AppData\Local\Programs\Python\Python310\Scripts\pip.exe\__main__.py", line 4, in <module> File "C:\Users\pjain\AppData\Local\Programs\Python\Python310\lib\site-packages\pip\_internal\__init__.py", line 4, in <module> from pip_internal.utils import _log
I am using pycharm with conda interpreter.
-
Looping the function if the input is not string
I'm new to python (first of all) I have a homework to do a function about checking if an item exists in a dictionary or not.
inventory = {"apple" : 50, "orange" : 50, "pineapple" : 70, "strawberry" : 30} def check_item(): x = input("Enter the fruit's name: ") if not x.isalpha(): print("Error! You need to type the name of the fruit") elif x in inventory: print("Fruit found:", x) print("Inventory available:", inventory[x],"KG") else: print("Fruit not found") check_item()
I want the function to loop again only if the input written is not string. I've tried to type return Under print("Error! You need to type the name of the fruit") but didn't work. Help
-
Putting the percentage (confidence score) of the predicted class in CNN using torch
I'm trying to show the percentage of the predicted class of the image. Example: 95% that the input image is Dog.
Can someone help me to figure out the process and explain it to me, because I do not completely understand how the code below works.
class ConvNet(nn.Module): def __init__(self, num_classes): super(ConvNet, self).__init__() self.conv1 = nn.Conv2d(in_channels=3, out_channels=12, kernel_size=3, stride=1, padding=1) self.bn1 = nn.BatchNorm2d(num_features=12) self.relu1 = nn.ReLU() self.pool1 = nn.MaxPool2d(kernel_size=2) self.conv2 = nn.Conv2d(in_channels=12, out_channels=20, kernel_size=3, stride=1, padding=1) self.relu2 = nn.ReLU() self.conv3 = nn.Conv2d(in_channels=20, out_channels=32, kernel_size=3, stride=1, padding=1) self.bn3 = nn.BatchNorm2d(num_features=32) self.relu3 = nn.ReLU() self.conv4 = nn.Conv2d(in_channels=32, out_channels=64, kernel_size=3, stride=1, padding=1) self.bn4 = nn.BatchNorm2d(num_features=64) self.relu4 = nn.ReLU() self.pool2 = nn.MaxPool2d(kernel_size=2) self.flat = nn.Flatten() self.drp = nn.Dropout(p=0.3) self.fc = nn.Linear(in_features=64 * 39 * 39, out_features=num_classes) # feed forward function def forward(self, input): output = self.conv1(input) output = self.bn1(output) output = self.relu1(output) output = self.pool1(output) output = self.conv2(output) output = self.relu2(output) output = self.conv3(output) output = self.bn3(output) output = self.relu3(output) # output = self.pool2(output) output = self.conv4(output) output = self.bn4(output) output = self.relu4(output) output = self.pool2(output) output = self.flat(output) output = output.view(-1, 64 * 39 * 39) output = self.drp(output) output = self.fc(output) return output
Prediction Function In this part i already able to get the prediction but i want to add the part where I can get the percentage (confidence score) of the prediction.
def prediction(img_path, transformer): image = Image.open(img_path) image_tensor = transformer(image).float() image_tensor = image_tensor.unsqueeze_(0) if torch.cuda.is_available(): image_tensor.cuda() input = Variable(image_tensor) output = model(input) probs = nnf.softmax(output, dim=1) conf, classess = torch.max(probs, 1) index = output.data.numpy().argmax() pred = classes[index] return pred, conf pred_dict, probability = prediction(img, transformer)
-
Cannot find CUDA device even if installed on the system when accessed from pytorch
NVCC version
C:\Users\acute>nvcc --version nvcc: NVIDIA (R) Cuda compiler driver Copyright (c) 2005-2021 NVIDIA Corporation Built on Sun_Mar_21_19:24:09_Pacific_Daylight_Time_2021 Cuda compilation tools, release 11.3, V11.3.58 Build cuda_11.3.r11.3/compiler.29745058_0NVIDIA SMI version
C:\Users\acute>nvidia-smi Mon May 02 00:12:20 2022 +-----------------------------------------------------------------------------+ | NVIDIA-SMI 431.02 Driver Version: 431.02 CUDA Version: 10.2 | |-------------------------------+----------------------+----------------------+ | GPU Name TCC/WDDM | Bus-Id Disp.A | Volatile Uncorr. ECC | | Fan Temp Perf Pwr:Usage/Cap| Memory-Usage | GPU-Util Compute M. | |===============================+======================+======================| | 0 GeForce GTX 1650 WDDM | 00000000:01:00.0 Off | N/A | | N/A 43C P8 5W / N/A | 134MiB / 4096MiB | 0% Default | +-------------------------------+----------------------+----------------------++-----------------------------------------------------------------------------+ | Processes: GPU Memory | | GPU PID Type Process name Usage | |=============================================================================| | No running processes found | +-----------------------------------------------------------------------------+ C:\Users\acute>pip freeze certifi==2021.10.8 charset-normalizer==2.0.12 idna==3.3 numpy==1.22.3 Pillow==9.1.0 requests==2.27.1 torch==1.11.0+cu113 torchaudio==0.11.0+cu113 torchvision==0.12.0+cu113 typing_extensions==4.2.0 urllib3==1.26.9
Accessing from torch
>>> torch.cuda.is_available() C:\Users\acute\AppData\Local\Programs\Python\Python39\lib\site-packages\torch\cuda_init_.py:82: UserWarning: CUDA initialization: CUDA driver initialization failed, you might not have a CUDA gpu. (Triggered internally at C:\actions-runner_work\pytorch\pytorch\builder\windows\pytorch\c10\cuda\CUDAFunctions.cpp:112.) return torch._C._cuda_getDeviceCount() > 0 Falseenter code here
-
Where is Python Torch consuming all my hard drive space?
I'm running torch in Python to build an autoencoder. I've noticed each time I run torch code, any code, 5-10 GB are consumed on my hard drive, but there's no files/changes to any subdirectory I'm actually using in the code (and no change to the directory size before/after I run the code).
Where do I need to be mindful of that this might be occurring? I'd like to run a command if possible for torch to clear out any files it's made to date.
-
Pytorch model object has no attribute 'predict' BERT
I had train a BertClassifier model using pytorch. After creating my best.pt I would like to make in production my model and using it to predict and classifier starting from a sample, so I resume them from the checkpoint. Otherwise after put it in evaluation and freeze model, I use .predict to make in work on my sample but I'm encountering this Attribute Error. I had also inizialize it before calling the checkpoint. When I am wrong? Thank you for your help!
def save_ckp(state, is_best, checkpoint_path, best_model_path): """ function created to save checkpoint, the latest one and the best one. This creates flexibility: either you are interested in the state of the latest checkpoint or the best checkpoint. state: checkpoint we want to save is_best: is this the best checkpoint; min validation loss checkpoint_path: path to save checkpoint best_model_path: path to save best model """ f_path = checkpoint_path # save checkpoint data to the path given, checkpoint_path torch.save(state, f_path) # if it is a best model, min validation loss if is_best: best_fpath = best_model_path # copy that checkpoint file to best path given, best_model_path shutil.copyfile(f_path, best_fpath) def load_ckp(checkpoint_fpath, model, optimizer): """ checkpoint_path: path to save checkpoint model: model that we want to load checkpoint parameters into optimizer: optimizer we defined in previous training """ # load check point checkpoint = torch.load(checkpoint_fpath) # initialize state_dict from checkpoint to model model.load_state_dict(checkpoint['state_dict']) # initialize optimizer from checkpoint to optimizer optimizer.load_state_dict(checkpoint['optimizer']) # initialize valid_loss_min from checkpoint to valid_loss_min valid_loss_min = checkpoint['valid_loss_min'] # return model, optimizer, epoch value, min validation loss return model, optimizer, checkpoint['epoch'], valid_loss_min.item() #Create the BertClassfier class class BertClassifier(nn.Module): """Bert Model for Classification Tasks.""" def __init__(self, freeze_bert=True): """ @param bert: a BertModel object @param classifier: a torch.nn.Module classifier @param freeze_bert (bool): Set `False` to fine-tune the BERT model """ super(BertClassifier, self).__init__() ....... def forward(self, input_ids, attention_mask): ''' Feed input to BERT and the classifier to compute logits. @param input_ids (torch.Tensor): an input tensor with shape (batch_size, max_length) @param attention_mask (torch.Tensor): a tensor that hold attention mask information with shape (batch_size, max_length) @return logits (torch.Tensor): an output tensor with shape (batch_size, num_labels) ''' # Feed input to BERT outputs = self.bert(input_ids=input_ids, attention_mask=attention_mask) # Extract the last hidden state of the token `[CLS]` for classification task last_hidden_state_cls = outputs[0][:, 0, :] # Feed input to classifier to compute logits logits = self.classifier(last_hidden_state_cls) return logits def initialize_model(epochs): """ Initialize the Bert Classifier, the optimizer and the learning rate scheduler.""" # Instantiate Bert Classifier bert_classifier = BertClassifier(freeze_bert=False) # Tell PyTorch to run the model on GPU bert_classifier = bert_classifier.to(device) # Create the optimizer optimizer = AdamW(bert_classifier.parameters(), lr=lr, # Default learning rate eps=1e-8 # Default epsilon value ) # Total number of training steps total_steps = len(train_dataloader) * epochs # Set up the learning rate scheduler scheduler = get_linear_schedule_with_warmup(optimizer, num_warmup_steps=0, # Default value num_training_steps=total_steps) return bert_classifier, optimizer, scheduler def train(model, train_dataloader, val_dataloader, valid_loss_min_input, checkpoint_path, best_model_path, start_epochs, epochs, evaluation=True): """Train the BertClassifier model.""" # Start training loop logging.info("--Start training...\n") # Initialize tracker for minimum validation loss valid_loss_min = valid_loss_min_input for epoch_i in range(start_epochs, epochs): # ======================================= # Training # ======================================= # Print the header of the result table logging.info((f"{'Epoch':^7} | {'Batch':^7} | {'Train Loss':^12} | {'Val Loss':^10} | {'Val Acc':^9} | {'Elapsed':^9}")) # Measure the elapsed time of each epoch t0_epoch, t0_batch = time.time(), time.time() # Reset tracking variables at the beginning of each epoch total_loss, batch_loss, batch_counts = 0, 0, 0 # Put the model into the training mode model.train() # For each batch of training data... for step, batch in enumerate(train_dataloader): batch_counts +=1 # Load batch to GPU b_input_ids, b_attn_mask, b_labels = tuple(t.to(device) for t in batch) # Zero out any previously calculated gradients model.zero_grad() # Perform a forward pass. This will return logits. logits = model(b_input_ids, b_attn_mask) # Compute loss and accumulate the loss values loss = loss_fn(logits, b_labels) batch_loss += loss.item() total_loss += loss.item() # Perform a backward pass to calculate gradients loss.backward() # Clip the norm of the gradients to 1.0 to prevent "exploding gradients" torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0) # Update parameters and the learning rate optimizer.step() scheduler.step() # Print the loss values and time elapsed for every 20 batches if (step % 500 == 0 and step != 0) or (step == len(train_dataloader) - 1): # Calculate time elapsed for 20 batches time_elapsed = time.time() - t0_batch # Print training results logging.info(f"{epoch_i + 1:^7} | {step:^7} | {batch_loss / batch_counts:^12.6f} | {'-':^10} | {'-':^9} | {time_elapsed:^9.2f}") # Reset batch tracking variables batch_loss, batch_counts = 0, 0 t0_batch = time.time() # Calculate the average loss over the entire training data avg_train_loss = total_loss / len(train_dataloader) logging.info("-"*70) # ======================================= # Evaluation # ======================================= if evaluation == True: # After the completion of each training epoch, measure the model's performance # on our validation set. val_loss, val_accuracy = evaluate(model, val_dataloader) # Print performance over the entire training data time_elapsed = time.time() - t0_epoch logging.info(f"{epoch_i + 1:^7} | {'-':^7} | {avg_train_loss:^12.6f} | {val_loss:^10.6f} | {val_accuracy:^10.6f} | {time_elapsed:^9.2f}") logging.info("-"*70) logging.info("\n") # create checkpoint variable and add important data checkpoint = { 'epoch': epoch_i + 1, 'valid_loss_min': val_loss, 'state_dict': model.state_dict(), 'optimizer': optimizer.state_dict(), } # save checkpoint save_ckp(checkpoint, False, checkpoint_path, best_model_path) ## TODO: save the model if validation loss has decreased if val_loss <= valid_loss_min: print('Validation loss decreased ({:.6f} --> {:.6f}). Saving model ...'.format(valid_loss_min,val_loss)) # save checkpoint as best model save_ckp(checkpoint, True, checkpoint_path, best_model_path) valid_loss_min = val_loss logging.info("-----------------Training complete--------------------------") def evaluate(model, val_dataloader): """After the completion of each training epoch, measure the model's performance on our validation set.""" # Put the model into the evaluation mode. The dropout layers are disabled during the test time. model.eval() # Tracking variables val_accuracy = [] val_loss = [] # For each batch in our validation set... for batch in val_dataloader: # Load batch to GPU b_input_ids, b_attn_mask, b_labels = tuple(t.to(device) for t in batch) # Compute logits with torch.no_grad(): logits = model(b_input_ids, b_attn_mask) # Compute loss loss = loss_fn(logits, b_labels) val_loss.append(loss.item()) # Get the predictions preds = torch.argmax(logits, dim=1).flatten() # Calculate the accuracy rate accuracy = (preds == b_labels).cpu().numpy().mean() * 100 val_accuracy.append(accuracy) # Compute the average accuracy and loss over the validation set. val_loss = np.mean(val_loss) val_accuracy = np.mean(val_accuracy) return val_loss, val_accuracy bert_classifier, optimizer, scheduler = initialize_model(epochs=n_epochs) train(model = bert_classifier ......) bert_classifier, optimizer, scheduler = initialize_model(epochs=n_epochs) model, optimizer, start_epoch, valid_loss_min = load_ckp(r"./best_model/best_model.pt", bert_classifier, optimizer) model.eval() model.freeze() sample = { "seq": "ABCDE",} predictions = model.predict(sample)
AttributeError: 'BertClassifier' object has no attribute 'predict'
-
what is the correct transformer for generating text out of image features
I am working on a project about image captioning. I wanna make a full transformer model using SWIN as the encoder and for the decoder part I need another transformer to learn the captions in the COCO 2017 dataset and make text using features extracted from each image. the problem is I do not know if this is a good idea although I have found two papers with fully transformer architecture. they used only one encoder and a custom decoder not BERT or .. so I do not know and I can not decide which language transformer is good to use for the purpose.