import torch import transformers from transformers import AdamW, BertTokenizer, BertForSequenceClassification from torch.utils.data import DataLoader, TensorDataset, random_split import torch.nn.functional as F # Load Pretrained BERT Tokenizer & Model pretrained_model_name = 'bert-base-uncased' tokenizer = BertTokenizer.from_pretrained(pretrained_model_name) model = BertForSequenceClassification.from_pretrained(pretrained_model_name, num_labels=2) # Move model to GPU if available device = torch.device("cuda" if torch.cuda.is_available() else "cpu") model.to(device) # Define a Larger Training Dataset train_texts = [ "I love this product, it's amazing!", # Positive "Absolutely fantastic experience, will buy again!", # Positive "Worst purchase ever. Completely useless.", # Negative "I hate this item, it doesn't work!", # Negative "The quality is top-notch, highly recommend!", # Positive "Terrible service, never coming back.", # Negative "This is the best thing I've ever bought!", # Positive "Very disappointing. Waste of money.", # Negative "Superb! Exceeded all my expectations.", # Positive "Not worth the price at all.", # Negative ] train_labels = torch.tensor([1, 1, 0, 0, 1, 0, 1, 0, 1, 0]).to(device) # 1 = Positive, 0 = Negative # Tokenize Training Data encoded_train = tokenizer(train_texts, padding=True, truncation=True, max_length=128, return_tensors='pt') train_input_ids = encoded_train['input_ids'].to(device) train_attention_masks = encoded_train['attention_mask'].to(device) # Create PyTorch Dataset & DataLoader train_dataset = TensorDataset(train_input_ids, train_attention_masks, train_labels) train_loader = DataLoader(train_dataset, batch_size=2, shuffle=True) # Mini-batches of size 2 # Training Parameters epochs = 5 optimizer = AdamW(model.parameters(), lr=2e-5) # Training Loop with Mini-Batch Processing model.train() for epoch in range(epochs): total_loss = 0 correct = 0 total = 0 for batch in train_loader: batch_input_ids, batch_attention_masks, batch_labels = batch optimizer.zero_grad() outputs = model(input_ids=batch_input_ids, attention_mask=batch_attention_masks, labels=batch_labels) loss = outputs.loss logits = outputs.logits total_loss += loss.item() loss.backward() optimizer.step() # Compute Training Accuracy preds = torch.argmax(F.softmax(logits, dim=1), dim=1) correct += (preds == batch_labels).sum().item() total += batch_labels.size(0) avg_loss = total_loss / len(train_loader) accuracy = correct / total * 100 print(f"Epoch {epoch+1} - Loss: {avg_loss:.4f}, Accuracy: {accuracy:.2f}%") # Save Fine-tuned Model torch.save(model.state_dict(), "fine_tuned_bert.pth") # Switch to Evaluation Mode model.eval() # Test Dataset test_texts = [ "This is a great product, I love it!", # Positive "Horrible experience, I want a refund!", # Negative "Highly recommended! Five stars.", # Positive "Not worth it. I regret buying this.", # Negative ] test_labels = torch.tensor([1, 0, 1, 0]).to(device) # Tokenize Test Data encoded_test = tokenizer(test_texts, padding=True, truncation=True, max_length=128, return_tensors='pt') test_input_ids = encoded_test['input_ids'].to(device) test_attention_masks = encoded_test['attention_mask'].to(device) # Run Model on Test Data with torch.no_grad(): outputs = model(input_ids=test_input_ids, attention_mask=test_attention_masks) predicted_labels = torch.argmax(outputs.logits, dim=1) # Compute Test Accuracy test_accuracy = (predicted_labels == test_labels).sum().item() / len(test_labels) * 100 print(f"\nTest Accuracy: {test_accuracy:.2f}%") # Print Predictions for text, label in zip(test_texts, predicted_labels): print(f'Text: {text}\nPredicted Label: {label.item()}\n')