Hello ! I want to add adapter approach in my text-classification pre-trained bert, but I did not find a good explanation in the documentation on how to that.
My model class is the following:
class BertClassifier(nn.Module):
"""Bert Model for Classification Tasks."""
def __init__(self, freeze_bert=True):
@param bert: a BertModel object
@param classifier: a torch.nn.Module classifier
@param freeze_bert (bool): Set `False` to fine-tune the BERT model
super(BertClassifier, self).__init__()
# Instantiate BERT model
# Specify hidden size of BERT, hidden size of our classifier, and number of labels
self.bert = BertAdapterModel.from_pretrained(PREETRAINED_MODEL')
self.D_in = 1024
self.H = 512
self.D_out = 2
# Add a new adapter
# Instantiate the classifier head with some one-layer feed-forward classifier
self.classifier = nn.Sequential(
nn.Linear(self.D_in, 512),
nn.Linear(512, self.D_out),
# Freeze the BERT model
if freeze_bert:
for param in self.bert.parameters():
param.requires_grad = True
def forward(self, input_ids, attention_mask):
''' Feed input to BERT and the classifier to compute logits.
@param input_ids (torch.Tensor): an input tensor with shape (batch_size,
@param attention_mask (torch.Tensor): a tensor that hold attention mask
information with shape (batch_size, max_length)
@return logits (torch.Tensor): an output tensor with shape (batch_size,
num_labels) '''
# Feed input to BERT
outputs = self.bert(input_ids=input_ids,
# Extract the last hidden state of the token `[CLS]` for classification task
last_hidden_state_cls = outputs[0][:, 0, :]
# Feed input to classifier to compute logits
logits = self.classifier(last_hidden_state_cls)
return logits
The training loop is the following:
def initialize_model(epochs):
""" Initialize the Bert Classifier, the optimizer and the learning rate scheduler."""
# Instantiate Bert Classifier
bert_classifier = BertClassifier(freeze_bert=False) #false=freezed
# Tell PyTorch to run the model on GPU
bert_classifier =
# Create the optimizer
optimizer = AdamW(bert_classifier.parameters(),
lr=lr, # Default learning rate
eps=1e-8 # Default epsilon value
# Total number of training steps
total_steps = len(train_dataloader) * epochs
# Set up the learning rate scheduler
scheduler = get_linear_schedule_with_warmup(optimizer,
num_warmup_steps=0, # Default value
return bert_classifier, optimizer, scheduler
def train(model, train_dataloader, val_dataloader, valid_loss_min_input, checkpoint_path, best_model_path, start_epochs, epochs, evaluation=True):
"""Train the BertClassifier model."""
# Start training loop"--Start training...\n")
# Initialize tracker for minimum validation loss
valid_loss_min = valid_loss_min_input
for epoch_i in range(start_epochs, epochs):
if evaluation == True:
# After the completion of each training epoch, measure the model's performance
# on our validation set.
val_loss, val_accuracy = evaluate(model, val_dataloader)
# Print performance over the entire training data
time_elapsed = time.time() - t0_epoch"{epoch_i + 1:^7} | {'-':^7} | {avg_train_loss:^12.6f} | {val_loss:^10.6f} | {val_accuracy:^10.6f} | {time_elapsed:^9.2f}")"-"*70)"\n")
# create checkpoint variable and add important data
checkpoint = {
'epoch': epoch_i + 1,
'valid_loss_min': val_loss,
'state_dict': model.state_dict(),
'optimizer': optimizer.state_dict(),
# save checkpoint
save_ckp(checkpoint, False, checkpoint_path, best_model_path)
## TODO: save the model if validation loss has decreased
if val_loss <= valid_loss_min:
print('Validation loss decreased ({:.6f} --> {:.6f}). Saving model ...'.format(valid_loss_min,val_loss))
# save checkpoint as best model
save_ckp(checkpoint, True, checkpoint_path, best_model_path)
valid_loss_min = val_loss
model.save_adapter("./final_adapter", "thermo_cl")"-----------------Training complete--------------------------")
bert_classifier, optimizer, scheduler = initialize_model(epochs=n_epochs)
train(model = bert_classifier....)
As you can see I have my own personalized classification head, so I don't want to use the .add_classification_head() method. Is it correct to train and activate the adapter in this way?
I would like to know if I'm using adapter properly and also how to save the checkpoint and my model weights because at the end of the training (where i suppose to save the adapter) I receive this error:
AttributeError: 'BertClassifier' object has no attribute 'save_adapter'
Thanks for the help!
question Stale