Behavior
- After converting the model with WECHSEL method,
RuntimeError: expected scalar type Double but found Float
error occurs.
- README's en -> swahili example yields the error, as well as en -> korean conversion.
Replicating error
""" Example Code on README.md from WECHSEL"""
import torch
from transformers import AutoModel, AutoTokenizer
from datasets import load_dataset
from wechsel import WECHSEL, load_embeddings
source_tokenizer = AutoTokenizer.from_pretrained("roberta-base")
model = AutoModel.from_pretrained("roberta-base")
# check whether model accepts the input
source_input = source_tokenizer("Checking functionality of original model", return_tensors="pt")
model(**source_input)
BaseModelOutputWithPoolingAndCrossAttentions(last_hidden_state=tensor([[[-0.0444, 0.0719, -0.0131, ..., -0.0682, -0.0579, 0.0079],
[ 0.0839, -0.0649, 0.0547, ..., 0.2830, 0.1913, 0.3524],
[ 0.1558, 0.1616, 0.1473, ..., -0.0187, 0.1893, 0.4051],
...
# converting the model with WECHSEL class
target_tokenizer = source_tokenizer.train_new_from_iterator(
load_dataset("oscar", "unshuffled_deduplicated_sw", split="train")["text"],
vocab_size=len(source_tokenizer)
)
wechsel = WECHSEL(
load_embeddings("en"),
load_embeddings("sw"),
bilingual_dictionary="swahili"
)
target_embeddings, info = wechsel.apply(
source_tokenizer,
target_tokenizer,
model.get_input_embeddings().weight.detach().numpy(),
)
model.get_input_embeddings().weight.data = torch.from_numpy(target_embeddings)
Warning : `load_model` does not return WordVectorModel or SupervisedModel any more, but a `FastText` object which is very similar.
Warning : `load_model` does not return WordVectorModel or SupervisedModel any more, but a `FastText` object which is very similar.
100%|██████████| 50/50 [00:34<00:00, 1.43it/s]
# use `model` and `target_tokenizer` to continue training in Swahili!
inputs = target_tokenizer("سَوَاحِلِىّ", return_tensors='pt')
print(inputs)
{'input_ids': tensor([[ 0, 25945, 144, 182, 9465, 144, 182, 5796, 201, 144,
184, 4191, 144, 184, 9708, 144, 185, 2]]), 'attention_mask': tensor([[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]])}
# put swahili tensor inputs to the model
model(**inputs)
---------------------------------------------------------------------------
RuntimeError Traceback (most recent call last)
/var/folders/f8/9hn0rsx125vf87jp8_skr1l40000gn/T/ipykernel_11153/2767806960.py in <module>
3
4 # assign double for all inputs
----> 5 model(**inputs)
~/.pyenv/versions/3.8.3/envs/wechsel/lib/python3.8/site-packages/torch/nn/modules/module.py in _call_impl(self, *input, **kwargs)
1100 if not (self._backward_hooks or self._forward_hooks or self._forward_pre_hooks or _global_backward_hooks
1101 or _global_forward_hooks or _global_forward_pre_hooks):
-> 1102 return forward_call(*input, **kwargs)
1103 # Do not call functions when jit is used
1104 full_backward_hooks, non_full_backward_hooks = [], []
~/.pyenv/versions/3.8.3/envs/wechsel/lib/python3.8/site-packages/transformers/models/roberta/modeling_roberta.py in forward(self, input_ids, attention_mask, token_type_ids, position_ids, head_mask, inputs_embeds, encoder_hidden_states, encoder_attention_mask, past_key_values, use_cache, output_attentions, output_hidden_states, return_dict)
842 head_mask = self.get_head_mask(head_mask, self.config.num_hidden_layers)
843
--> 844 embedding_output = self.embeddings(
845 input_ids=input_ids,
846 position_ids=position_ids,
~/.pyenv/versions/3.8.3/envs/wechsel/lib/python3.8/site-packages/torch/nn/modules/module.py in _call_impl(self, *input, **kwargs)
1100 if not (self._backward_hooks or self._forward_hooks or self._forward_pre_hooks or _global_backward_hooks
1101 or _global_forward_hooks or _global_forward_pre_hooks):
-> 1102 return forward_call(*input, **kwargs)
1103 # Do not call functions when jit is used
1104 full_backward_hooks, non_full_backward_hooks = [], []
~/.pyenv/versions/3.8.3/envs/wechsel/lib/python3.8/site-packages/transformers/models/roberta/modeling_roberta.py in forward(self, input_ids, token_type_ids, position_ids, inputs_embeds, past_key_values_length)
136 position_embeddings = self.position_embeddings(position_ids)
137 embeddings += position_embeddings
--> 138 embeddings = self.LayerNorm(embeddings)
139 embeddings = self.dropout(embeddings)
140 return embeddings
~/.pyenv/versions/3.8.3/envs/wechsel/lib/python3.8/site-packages/torch/nn/modules/module.py in _call_impl(self, *input, **kwargs)
1100 if not (self._backward_hooks or self._forward_hooks or self._forward_pre_hooks or _global_backward_hooks
1101 or _global_forward_hooks or _global_forward_pre_hooks):
-> 1102 return forward_call(*input, **kwargs)
1103 # Do not call functions when jit is used
1104 full_backward_hooks, non_full_backward_hooks = [], []
~/.pyenv/versions/3.8.3/envs/wechsel/lib/python3.8/site-packages/torch/nn/modules/normalization.py in forward(self, input)
187
188 def forward(self, input: Tensor) -> Tensor:
--> 189 return F.layer_norm(
190 input, self.normalized_shape, self.weight, self.bias, self.eps)
191
~/.pyenv/versions/3.8.3/envs/wechsel/lib/python3.8/site-packages/torch/nn/functional.py in layer_norm(input, normalized_shape, weight, bias, eps)
2345 layer_norm, (input, weight, bias), input, normalized_shape, weight=weight, bias=bias, eps=eps
2346 )
-> 2347 return torch.layer_norm(input, normalized_shape, weight, bias, eps, torch.backends.cudnn.enabled)
2348
2349
RuntimeError: expected scalar type Double but found Float