环境搭建
Conda(Anaconda公司)相比于虚拟环境+pip更适合深度学习:venv+pip只能管理Python包,无法处理深度学习框架依赖的非Python组件,如CUDA运行库,OpenCV的C++底层库,而Conda可以
Pytorch:深度学习框架
本地环境配置
第一步:安装Anaconda
第二步:创建虚拟环境
conda create -n py310 python=3.10 # 创建虚拟环境py310
conda activate py310 # 进入虚拟环境py310
第三步:搭建深度学习框架pytorch:查看是否有NVIDIA显卡,并到Pytorch官网下载对应的pytorch-cpu或pytorch-gpu版本(用nvidia-smi查看最高可支持的CUDA版本)官方文档
注意:过去在安装pytorch时,通常需要先手动安装CUDA工具包和cuDNN(CUDA即统一计算设备架构,简单说,CUDA 是打通 “CPU 指挥 GPU 干活” 的桥梁,也是让 NVIDIA 显卡从 “只用来玩游戏 / 渲染” 变成 “能做高性能计算” 的核心工具。cuDNN是CUDA的扩展库),但是从pytorch1.0开始,自带了CUDA运行库,不需要手动安装
测试
ipython
import torch
torch.cuda.is_available() # 输出true
第四步:pycharm中使用存在的conda环境:anaconda/Scripts/conda.exe加载环境,选择py310;vscode中下载python和jupyter插件,选择py310环境
云服务器环境配置
ssh到云服务器后配置tmux和vim
scp -P 32579 .bashrc .tmux.conf .vimrc featurize@workspace.featurize.cn:
AutoDL平台需要自己下载tmux,并且终端不能识别conda命令,可以在.bash中添加:source “/root/miniconda3/etc/profile.d/conda.sh”
创建虚拟环境
conda create -n py38 python=3.8 # 创建虚拟环境py38
conda activate py38 # 进入虚拟环境py38
安装想要的pytorch版本,官方文档
conda install pytorch==1.13.1 torchvision==0.14.1 torchaudio==0.13.1 pytorch-cuda=11.6 -c pytorch -c nvidia
使用本地IDE远程调试
conda activate py38
which python # 得到创建的虚拟环境的python解释器位置
把pyhton解释器换成远程解释器 File-Settings-Build, Execution, Deployment-Deployment-Mappings可以修改同步文件夹
常用的conda命令
虚拟环境管理
# 查看所有虚拟环境
conda env list # 查看所有虚拟环境
conda create -n env_name python=3.10 # 创建虚拟环境env_name
conda activate env_name # 激活虚拟环境env_name
conda env remove -n env_name # 删除虚拟环境env_name
conda create -n new_env --clone old_env # 基于old_env创建new_env
包管理
conda list # 查看当前激活环境的包
conda install numpy=1.24 # 安装到当前环境
conda update numpy # 更新指定包
conda update --all # 更新当前环境所有包
conda update conda # 更新conda自身
conda remove numpy # 当前环境卸载
conda search tensorflow # 搜索可用的tensorflow版本
配置与其他常用命令
conda --version # 查看conda版本
conda env export > environment.yml # 导出当前环境到yaml文件(分享/备份)
conda env create -f environment.yml # 从yaml文件创建环境(一键复现)
torch包、torchvision包的使用
dir()和help()函数
dir(torch)能输出torch包下所有的包和文件,help()输出使用说明
jupyter
在虚拟环境中pip安装后
conda activate py310
jupyter notebook # 打开jupyter notebook
jupyter lab # 打开jupyter lab
读入图片
# 方法一:读入图片为PIL Image类型
from PIL import Image
img_path1 = "图片路径1"
img_PIL = Image.open(img_path1)
# 方法二:读入图片为numpy.adarray类型
import cv2
img_path2 = "图片路径2"
img_array = cv2.imread(img_path2)
无论是PIL Image类型还是numpy.adarray类型,都可以通过ToTensor()转换为tensor类型,PIL Image图片可以通过img_PIL.show()展示图片,numpy.adarray和tensor类型都能用tensorboard展示
Dataset父类
继承之后,需要重写__init__(),__len__(),__getitem__()函数
from torch.utils.data import Dataset
import os
from PIL import Image
class MyData(Dataset):
def __init__(self, root_dir, label_name):
super().__init__()
self.root_dir = root_dir # hymenoptera_data/train/
self.label_name = label_name # ants
self.path = os.path.join(self.root_dir, self.label_name) # hymenoptera_data/train/ants/
self.img_list = os.listdir(self.path) # 1.jpg, 2.jpg, ...
def __len__(self):
return len(self.img_list)
def __getitem__(self, idx):
img_name = self.img_list[idx]
img_item_path = os.path.join(self.path, img_name)
img = Image.open(img_item_path)
label = self.label_name
return img, label
ants_dataset = MyData("hymenoptera_data/train/", "ants")
bees_dataset = MyData("hymenoptera_data/train/", "bees")
train_dataset = ants_dataset + bees_dataset
tensorboard
可视化工具
from torch.utils.tensorboard import SummaryWriter
import numpy as np
from PIL import Image
from torchvision import transforms
writer = SummaryWriter("logs")
image_path = "hymenoptera_data/train/ants/0013035.jpg"
img_PIL = Image.open(image_path)
img_array = np.array(img_PIL) # 也可以用cv2.imread(img_path)直接读,也是numpy.adarray类型的
to_tensor = transforms.ToTensor()
img_tensor = to_tensor(img_PIL)
# numpy.ndarray类型必须加dataformats='HWC',tensor的不用
writer.add_image("img_array", img_array, 1, dataformats='HWC')
writer.add_image("img_tensor", img_tensor, 1)
for i in range(100):
writer.add_scalar("y=2x", 2 * i, i)
writer.close()
打开终端,输入
tensorboard --logdir=代码中自定义的文件夹名 --port=自定义端口号 # tensorboard --logdir=logs 使用默认端口
torchvision.transforms
from torchvision import transforms
from PIL import Image
import cv2
img_path = "hymenoptera_data/train/ants/0013035.jpg"
img_PIL = Image.open(img_path) # PIL Image
img_array = cv2.imread(img_path) # numpy.ndarray (N-dimensional array, N维数组)
# ToTensor()
trans_totensor = transforms.ToTensor() # 把PIL Image 或 numpy.ndarray (H x W x C)转换成tensor
img_PIL_to_tensor = trans_totensor(img_PIL)
img_array_to_tensor = trans_totensor(img_array)
# Normalize
print(img_PIL_to_tensor[0][0][0])
trans_norm = transforms.Normalize(mean=[0.1, 0.1, 0.1], std=[0.2, 0.2, 0.2]) # input[idx]减均值除以标准差
img_norm = trans_norm(img_PIL_to_tensor) # 需要tensor类型
print(img_norm[0][0][0])
# Resize
print(img_PIL.size)
img_PIL.show() # 也可以用tensorboard的add_image,然后在tensorboard里查看
trans_resize = transforms.Resize((512, 512)) # 将PIL Image改为指定尺寸
img_resize = trans_resize(img_PIL)
print(img_resize.size)
img_resize.show()
# Compose
trans_resize2 = transforms.Resize(512) # 不改变原本的比例
trans_compose = transforms.Compose([trans_resize2, trans_totensor])
img_compose = trans_compose(img_PIL)
torchvision.dataset
https://docs.pytorch.org/vision/stable/index.html
import torchvision.transforms
from torchvision import datasets
from torch.utils.tensorboard import SummaryWriter
datasets_transform = torchvision.transforms.Compose([
torchvision.transforms.ToTensor()
])
train_dataset = datasets.CIFAR10(root="data", train=True, transform=datasets_transform, download=True)
test_dataset = datasets.CIFAR10(root="data", train=False, transform=datasets_transform, download=True)
print(train_dataset[0]) # 输出:图片类型,class_idx。 图片类型:PIL Image -> tensor
print(train_dataset.classes) # debug可以看到更多属属性
write = SummaryWriter("test_datasets")
for i in range(10):
img_tensor, class_idx = train_dataset[i]
write.add_image("tensor", img_tensor, i)
write.close()
DataLoader
有了数据集之后,通过DataLoader进行取数据,batch_size=4表示每次取4张图
from torch.utils.data import DataLoader
from torchvision import datasets
from torchvision import transforms
from torch.utils.tensorboard import SummaryWriter
test_dataset = datasets.CIFAR10(root="data", train=False, transform=transforms.ToTensor(), download=True)
test_loader = DataLoader(test_dataset, 4, False, num_workers=0, drop_last=False)
# test_dataset中的第一张图片
img, target = test_dataset[0]
print(img.shape, target)
# DataLoader每次取4张
write = SummaryWriter("test_DataLoader")
for data in test_loader:
imgs, targets = data
print(imgs.shape, targets)
write.add_images("test_dataset", imgs, 0)
break
write.close()
nn.Module父类
所有神经网络模块的基类,继承之后,需要重写__init__(), forward()函数
from torch import nn
import torch
class MyNet(nn.Module):
def __init__(self):
super().__init__()
def forward(self, input):
output = input + 1
return output
myNet = MyNet()
input = torch.tensor(1.0)
output = myNet(input) # 等价于output = myNet.forward(input)
print(output)
nn.Conv2d
卷积层
import torch
import torchvision
from torch.utils.data import DataLoader
import torch.nn as nn
dataset = torchvision.datasets.CIFAR10(root="data", train=False, transform=torchvision.transforms.ToTensor())
dataloader = DataLoader(dataset, batch_size=64)
class TestLinear(nn.Module):
def __init__(self):
super().__init__()
self.linear = nn.Linear(196608, 10)
def forward(self, x):
x = self.linear(x)
return x
test = TestLinear()
for data in dataloader:
imgs, targets = data
# 也可以用input = torch.flatten(imgs)
input = torch.reshape(imgs, (1, 1, 1, -1)) # torch.Size([64, 3, 32, 32]) -> torch.Size([1, 1, 1, 196608])
output = test(input)
print(output.shape)
nn.MaxPool2d
池化层
import torch.nn as nn
import torch
import torchvision
from torch.utils.data import DataLoader
from torch.utils.tensorboard import SummaryWriter
dataset = torchvision.datasets.CIFAR10(root="data", train=False, transform=torchvision.transforms.ToTensor(), download=True)
dataloader = DataLoader(dataset, batch_size=64)
class TestMaxPool2d(nn.Module):
def __init__(self):
super().__init__()
self.maxpool = nn.MaxPool2d(3, ceil_mode=True)
def forward(self, x):
x = self.maxpool(x)
return x
test = TestMaxPool2d()
writer = SummaryWriter('logs')
step = 0
for data in dataloader:
imgs, targets = data
writer.add_images("input", imgs, step)
ouput = test(imgs)
writer.add_images("output", ouput, step)
step += 1
writer.close()
nn.ReLu
非线性激活层
import torch
import torch.nn as nn
input = torch.tensor([[1, -0.5], [-1, 3]])
input = torch.reshape(input, (-1, 1, 2, 2))
print(input)
class TestReLu(nn.Module):
def __init__(self):
super().__init__()
self.relu = nn.ReLU()
def forward(self, input):
output = self.relu(input)
return output
test = TestReLu()
output = test(input)
print(output)
nn.Linear
线性层
import torch
import torchvision
from torch.utils.data import DataLoader
import torch.nn as nn
dataset = torchvision.datasets.CIFAR10(root="data", train=False, transform=torchvision.transforms.ToTensor())
dataloader = DataLoader(dataset, batch_size=64)
class TestLinear(nn.Module):
def __init__(self):
super().__init__()
self.linear = nn.Linear(196608, 10)
def forward(self, x):
x = self.linear(x)
return x
test = TestLinear()
for data in dataloader:
imgs, targets = data
# 也可以用input = torch.flatten(imgs)
input = torch.reshape(imgs, (1, 1, 1, -1)) # torch.Size([64, 3, 32, 32]) -> torch.Size([1, 1, 1, 196608])
output = test(input)
print(output.shape)
nn.Sequential
import torch
import torch.nn as nn
from torch.utils.tensorboard import SummaryWriter
class Test(nn.Module):
def __init__(self):
super().__init__()
self.model1 = nn.Sequential(
nn.Conv2d(3, 32, 5, 1, 2),
nn.MaxPool2d(2),
nn.Conv2d(32, 32, 5, 1, 2),
nn.MaxPool2d(2),
nn.Conv2d(32, 64, 5, 1, 2),
nn.MaxPool2d(2),
nn.Flatten(),
nn.Linear(1024, 64),
nn.Linear(64, 10)
)
def forward(self, x):
x = self.model1(x)
return x
test = Test()
# 检查网络结构是否正确
input = torch.ones((64, 3, 32, 32))
output = test(input)
print(output.shape)
writer = SummaryWriter("logs")
writer.add_graph(test, input)
writer.close()
nn.MSELoss
损失
计算实际输出和目标之间的差距,为更新输出提供依据(反向传播,grad) L1Loss:绝对平均值损失,MSELoss:均方误差损失,CrossEntropyLoss:用于分类问题
loss = loss_fn(output, target)
loss.backward() # 反向传播,grad从None变成具体的值
1、计算实际输出和目标之间的差距
import torch
import torch.nn as nn
inputs = torch.tensor([1, 2, 3], dtype=torch.float32)
targets = torch.tensor([1, 2, 5], dtype=torch.float32)
inputs = torch.reshape(inputs, (1, 1, 1, 3))
targets = torch.reshape(targets, (1, 1, 1, 3))
loss = nn.MSELoss()
res = loss(inputs, targets)
print(res)
2、反向传播
import torch.nn as nn
import torchvision
from torch.utils.data import DataLoader
dataset = torchvision.datasets.CIFAR10(root="data", train=False, transform=torchvision.transforms.ToTensor(),
download=True)
dataloader = DataLoader(dataset, 64)
class Test(nn.Module):
def __init__(self):
super().__init__()
self.model1 = nn.Sequential(
nn.Conv2d(3, 32, 5, 1, 2),
nn.MaxPool2d(2),
nn.Conv2d(32, 32, 5, 1, 2),
nn.MaxPool2d(2),
nn.Conv2d(32, 64, 5, 1, 2),
nn.MaxPool2d(2),
nn.Flatten(),
nn.Linear(1024, 64),
nn.Linear(64, 10)
)
def forward(self, x):
x = self.model1(x)
return x
loss = nn.CrossEntropyLoss()
test = Test()
for data in dataloader:
imgs, targets = data
outputs = test(imgs)
res_loss = loss(outputs, targets)
res_loss.backward() # 反向传播,grad从None变成具体的值
print(res_loss)
torch.optim
优化器
for input, target in dataset:
output = model(input)
loss = loss_fn(output, target)
optimizer.zero_grad() # 梯度清零,即grad变为0
loss.backward() # 反向传播,更新grad
optimizer.step() # 优化grad
torchvision.models
下载模型(optional:预训练,未预训练),调整模型结构
import torchvision
import torch.nn as nn
import torch
# vgg基于ImageNet训练,有1000个分类,但是CIFAR10只有10个分类,因此需要调整网络模型
vgg16_false = torchvision.models.vgg16(weights=None)
vgg16_true = torchvision.models.vgg16(weights='DEFAULT')
train_dataset = torchvision.datasets.CIFAR10(root="data", train=True, transform=torchvision.transforms.ToTensor(),
download=True)
# 方法一:增加一层线性层
print("vgg16_true", vgg16_true)
vgg16_true.classifier.add_module("add_linear", nn.Linear(1000, 10))
print("modify vgg16_true", vgg16_true)
# 方法一:直接修改
print("vgg16_false", vgg16_false)
vgg16_false.classifier[6] = nn.Linear(4096, 10)
print("modify vgg16_false", vgg16_false)
torch.save和torch.load
模型保存和加载
import torch
import torchvision
vgg16 = torchvision.models.vgg16(weights=None)
# 保存方式1:保存模型结构 + 模型参数
torch.save(vgg16, "vgg16_method1.pth")
# 保存方式2:保存模型参数(官方推荐)
torch.save(vgg16.state_dict(), "vgg16_method2.pth")
# 加载方式1,对应保存方式1
model1 = torch.load("vgg16_method1.pth")
print(model1)
# 加载方式2
model2 = torchvision.models.vgg16(weights=None)
model2.load_state_dict(torch.load("vgg16_method2.pth"))
print(model2)
myNet.train()和myNet.eval()
- 调用 model.train():模型进入训练模式,Dropout/BatchNorm 按训练逻辑运行;
- 调用 model.eval():等价于 model.train(mode=False),模型进入评估模式,禁用 Dropout、固定 BatchNorm 统计量,避免评估时引入随机误差。
利用GPU训练
- 方法一:找到数据(input,target)、网络模型、损失函数,加上.cuda()
- 方法二:
device = torch.device("cuda:0") myNet = myNet.to(device)
标准流程
- 数据准备(Data Loading):Dataset和DataLoader
- 模型搭建(Model Building)
- 损失函数(Loss Function):分类问题用交叉熵损失(CrossEntropyLoss),回归问题用均方误差损失(MSELoss)
- 优化器(Optimizer):Adam优化器。optimizer.zero_grad(),optimizer_backward(),optimizer.step()
- 训练循环(Training Loop):写个for循环,把上面四个步骤串起来
在CIFAR10数据集上进行分类 train.py
import torchvision
from torch.utils.data import DataLoader
from torch import nn
import torch
from torch.utils.tensorboard import SummaryWriter
device = torch.device("cuda:0")
print(device)
# 数据准备
train_dataset = torchvision.datasets.CIFAR10(root="data", train=True, transform=torchvision.transforms.ToTensor(),
download=True)
test_dataset = torchvision.datasets.CIFAR10(root="data", train=False, transform=torchvision.transforms.ToTensor(),
download=False)
train_dataloader = DataLoader(train_dataset, batch_size=64)
test_dataloader = DataLoader(test_dataset, batch_size=64)
# 模型搭建
from net import TestCIFAR10
myNet = TestCIFAR10()
myNet = myNet.cuda()
# myNet = myNet.to(device)
# 损失函数
loss_fn = nn.CrossEntropyLoss()
loss_fn = loss_fn.cuda()
# 优化器
learning_rate = 0.01
optimizer = torch.optim.SGD(myNet.parameters(), lr=learning_rate)
# 循环训练
num_epochs = 10
total_train_step = 0
total_test_step = 0
writer = SummaryWriter("logs")
for epoch in range(1, num_epochs+1):
print("------------------第{}轮训练------------------".format(epoch))
# 1 训练
myNet.train()
epoch_loss = 0
for data in train_dataloader:
imgs, targets = data
imgs, targets = imgs.cuda(), targets.cuda()
outputs = myNet(imgs)
loss = loss_fn(outputs, targets)
epoch_loss += loss.item()
optimizer.zero_grad()
loss.backward()
optimizer.step()
total_train_step += 1
if total_train_step % 100 == 0:
print("训练step:{}, loss:{}".format(total_train_step, loss.item()))
writer.add_scalar("train_loss", loss.item(), total_train_step)
print("第{}轮训练整体训练集上的Loss:{}".format(epoch, epoch_loss))
# 2 验证
myNet.eval()
test_loss = 0
test_accuracy = 0
with torch.no_grad():
for data in test_dataloader:
imgs, targets = data
imgs, targets = imgs.cuda(), targets.cuda()
outputs = myNet(imgs)
loss = loss_fn(outputs, targets)
test_loss += loss.item()
accuracy = (outputs.argmax(1) == targets).sum()
test_accuracy += accuracy
print("整体测试集上的Loss:{}".format(test_loss))
print("整体测试集上的正确率Accuracy:{}".format(test_accuracy/len(test_dataset)))
total_test_step += 1
writer.add_scalar("test_accuracy", test_accuracy/len(test_dataset), total_test_step)
# 3 更新学习率
# 4 模型保存
torch.save(myNet, "myNet{}.pth".format(epoch))
writer.close()
test
import torch
import torchvision
from PIL import Image
device = torch.device("cuda:0")
img_path = "test/dog.jpg"
img = Image.open(img_path)
# print(img)
img = img.convert("RGB")
transform = torchvision.transforms.Compose([
torchvision.transforms.Resize((32, 32)),
torchvision.transforms.ToTensor()
])
img = transform(img)
# print(img.shape)
myNet = torch.load("myNet10.pth")
myNet = myNet.to(device)
img = torch.reshape(img, (1, 3, 32, 32))
img = img.to(device)
myNet.eval()
with torch.no_grad():
output = myNet(img)
print(output)
print(output.argmax(1))
论文复现
论文及代码查询
环境搭建
step1 克隆项目
git clone XXX
step2 创建和激活虚拟环境
如果有environment.yml跳过step2
conda create -n ENV_NAME
conda activate ENV_NAME
step3 下载项目需要的包
如果是requirements.txt
pip install -r requirements.txt
如果是environment.yml
conda env create -f environment.yml # 从yaml文件创建环境(一键复现)
step4 检查pytorch包
import torch
print(torch.cuda.is_available())
检查step3下载的pytorch包是否是gpu版本的,如果不是,到pytorch官网下载gpu版本的