Python: Input type (torch.FloatTensor) and weight type (torch.cuda.FloatTensor) should be the same
I'm developing a Speech to Text model. However while training I'm getting the following error:
***** Running training *****
Num examples = 531
Num Epochs = 2
Instantaneous batch size per device = 8
Total train batch size (w. parallel, distributed & accumulation) = 8
Gradient Accumulation steps = 1
Total optimization steps = 134
---------------------------------------------------------------------------
RuntimeError Traceback (most recent call last)
<ipython-input-72-9f65de613925> in <module>()
12
13 # with torch.nn.DataParallel():
---> 14 trainer.train()
13 frames
/usr/local/lib/python3.7/dist-packages/torch/nn/modules/conv.py in _conv_forward(self, input, weight, bias)
297 _single(0), self.dilation, self.groups)
298 return F.conv1d(input, weight, bias, self.stride,
--> 299 self.padding, self.dilation, self.groups)
300
301 def forward(self, input: Tensor) -> Tensor:
RuntimeError: Input type (torch.FloatTensor) and weight type (torch.cuda.FloatTensor) should be the same or input should be a MKLDNN tensor and weight is a dense tensor
I understand it is complaining that input is on CPU while the model is on GPU. Can someone please help me fix this? I'm not sure how to put input values on CUDA.
############# Training ####################
import torch
torch.cuda.empty_cache()
from dataclasses import dataclass, field
from typing import Any, Dict, List, Optional, Union
@dataclass
class DataCollatorCTCWithPadding:
processor: Wav2Vec2Processor
padding: Union[bool, str] = True
max_length: Optional[int] = None
max_length_labels: Optional[int] = None
pad_to_multiple_of: Optional[int] = None
pad_to_multiple_of_labels: Optional[int] = None
def __call__(self, features: List[Dict[str, Union[List[int], torch.Tensor]]]) -> Dict[str, torch.Tensor]:
# split inputs and labels since they have to be of different lenghts and need
# different padding methods
input_features = [{"input_values": feature["input_values"]} for feature in features]
label_features = [{"input_ids": feature["labels"]} for feature in features]
batch = self.processor.pad(
input_features,
padding=self.padding,
max_length=self.max_length,
pad_to_multiple_of=self.pad_to_multiple_of,
return_tensors="pt",
)
with self.processor.as_target_processor():
labels_batch = self.processor.pad(
label_features,
padding=self.padding,
max_length=self.max_length_labels,
pad_to_multiple_of=self.pad_to_multiple_of_labels,
return_tensors="pt",
)
# replace padding with -100 to ignore loss correctly
labels = labels_batch["input_ids"].masked_fill(labels_batch.attention_mask.ne(1), -100)
batch["labels"] = labels
return batch
data_collator = DataCollatorCTCWithPadding(processor=processor, padding=True)
from transformers import Trainer
torch.cuda.empty_cache()
trainer = Trainer(
model=model,
data_collator=data_collator,
args=training_args,
compute_metrics=compute_metrics,
train_dataset=train_data,
eval_dataset=test_data,
tokenizer=processor.feature_extractor,
)
# with torch.nn.DataParallel():
trainer.train()
Link to notebook: https://colab.research.google.com/drive/1b5UmblSssdSfusXh-3R5PJ6y8phV8U3x?usp=sharing
How many English words
do you know?
do you know?
Test your English vocabulary size, and measure
how many words do you know
Online Test
how many words do you know
Powered by Examplum