transformers training

tech2022-12-15  109

from transformers import BertForSequenceClassification model = BertForSequenceClassification.from_pretrained('bert-base-uncased', return_dict=True) model.train() from transformers import AdamW optimizer = AdamW(model.parameters(), lr=1e-5) no_decay = ['bias', 'LayerNorm.weight'] optimizer_grouped_parameters = [ {'params': [p for n, p in model.named_parameters() if not any(nd in n for nd in no_decay)], 'weight_decay': 0.01}, {'params': [p for n, p in model.named_parameters() if any(nd in n for nd in no_decay)], 'weight_decay': 0.0} ] optimizer = AdamW(optimizer_grouped_parameters, lr=1e-5) from transformers import BertTokenizer tokenizer = BertTokenizer.from_pretrained('bert-base-uncased') text_batch = ["I love Pixar.", "I don't care for Pixar."] encoding = tokenizer(text_batch, return_tensors='pt', padding=True, truncation=True) input_ids = encoding['input_ids'] attention_mask = encoding['attention_mask'] labels = torch.tensor([1,0]).unsqueeze(0) outputs = model(input_ids, attention_mask=attention_mask, labels=labels) loss = outputs.loss loss.backward() optimizer.step()

参考: https://huggingface.co/transformers/training.html

最新回复(0)