pytorch2023

1_introduction


传统编程是输入+规则得到结果;机器学习是输入+预期结果推导出规则

2_pytorch_fundamentals

why use


机器学习不是银弹,不要不分场合地全用上

The Number 1 Rule of Machine Learning and What Is Deep Learning Good For


3. Machine Learning vs. Deep Learning

ML 适合结构化数据, DL 适合非结构化数据

4. Anatomy of Neural Networks



5. Different Types of Learning Paradigms

6. What Can Deep Learning Be Used For


7. What Is and Why PyTorch





8. What Are Tensors


9. What We Are Going To Cover With PyTorch




10. How To and How Not To Approach This Course


11. Important Resources For This Course

12. Getting Setup to Write PyTorch Code.en

选择 gpu




13. Introduction to PyTorch Tensors

# -*- coding: utf-8 -*-
"""pytorch2023_1.ipynb

Automatically generated by Colaboratory.

Original file is located at
    https://colab.research.google.com/drive/1Cwp9BwtatoEfJCFO6qKd-fXevd3ua52b
"""

## 00. pytorch fundamentals

print("hello i'm excited to learn pytorch!")

!nvidia-smi

import torch
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
# 2.0.0+cu118
print(torch.__version__)

# scalar
scalar = torch.tensor(7)
scalar # tensor(7)

# Returns the number of dimensions of self tensor.
# 返回张量的维度,同: Tensor.dim() → int (cpu)
scalar.ndim # gpu # 0

# 返回张量的元素个数
scalar.item() # 7

# vector
vector = torch.tensor([7,7])
vector # tensor([7, 7])

vector.ndim # 1 -> 维度和方括号的个数一样

vector.shape # torch.Size([2]) -> 方括号内元素的个数

# matrix
MATRIX = torch.tensor([[7,8],[9,10]])
MATRIX # tensor([[ 7,  8],
# [ 9, 10]])

MATRIX.ndim # 2

MATRIX[0] # tensor([7, 8])

MATRIX.shape # torch.Size([2, 2])

# tensor
TENSOR = torch.tensor([[[1,2,3],[3,6,9],[2,4,5]]])
TENSOR

TENSOR.ndim # 3

TENSOR.shape # torch.Size([1, 3, 3])

14. Creating Random Tensors in PyTorch


# -*- coding: utf-8 -*-
"""pytorch2023_1.ipynb

Automatically generated by Colaboratory.

Original file is located at
    https://colab.research.google.com/drive/1Cwp9BwtatoEfJCFO6qKd-fXevd3ua52b
"""
## Random tensors

random_tensor = torch.rand(3,4)
random_tensor

random_tensor.ndim

# create a random tensor with similar shape to an image tensor
random_image_size_tensor = torch.rand(size=(3,224,224)) # 224*224*3 224(H)*224(W)是图片像素,3是RGB三色
random_image_size_tensor.shape,random_image_size_tensor.ndim

15. Creating Tensors With Zeros and Ones in PyTorch

### zero and ones

# create a tensor of all zeros
zeros = torch.zeros(size=(3,4))
zeros

# create a tensor of all ones
ones = torch.ones(size=(3,4))
ones

ones.dtype # torch.float32 -> 向量里的数据的类型

16. Creating a Tensor Range and Tensors Like Other Tensors

# creating a range of tensors and tensors-like
# use torch.range() 将在未来版本废弃
torch.range(0,10)
# use torch.range() 将在未来版本废弃
torch.range(0,10)
# start end step -> 从start开始,每次加77,直到超过end的前一个数
torch.arange(start=1,end=1000,step=77)
# creating tensors like
ten_zeros = torch.zeros_like(input=one_to_ten)
ten_zeros

17. Dealing With Tensor Data Types


# tensor datatypes
# float 32 tensor
float_32_tensor = torch.tensor([3.0,6.0,9.0],dtype=None)
float_16_tensor = torch.tensor([3.0,6.0,9.0],dtype=torch.float16)
print(float_32_tensor)
print(float_32_tensor.dtype)
print(float_16_tensor)
print(float_16_tensor.dtype)
test_tensor = torch.tensor([3.0,6.0,9.0],
               dtype=None,
               device=None,
               requires_grad=False)
# requires_grad: 只要某一个输入需要相关梯度值,则输出也需要保存相关梯度信息,这样就保证了这个输入的梯度回传。
# 而反之,若所有的输入都不需要保存梯度,那么输出的requires_grad会自动设置为False。
# 既然没有了相关的梯度值,自然进行反向传播时会将这部分子图从计算中剔除。
test_tensor
float_16_tensor=float_32_tensor.type(torch.float16)
float_16_tensor
float_16_tensor * float_32_tensor
# int_32_tensor = torch.tensor([3,6,9],dtype=torch.int32)
int_32_tensor = torch.tensor([3,6,9],dtype=torch.long)
int_32_tensor
float_32_tensor * int_32_tensor

18. Getting Tensor Attributes

### getting information from tensors
# create a tensor
test_tensor = torch.rand(3,4)
print(test_tensor)
print(test_tensor.size())
print(test_tensor.dtype,test_tensor.size)
# find out details about tensor
print(test_tensor)
print(f'Datatype of tensor: {test_tensor.dtype}')
print(f'Shape of tensor: {test_tensor.size}')
print(f'Device tensor is on: {test_tensor.device}')

19. Manipulating Tensors (Tensor Operations)

# manipulating tensors
tensor = torch.tensor([1,2,3])
tensor + 10
# multiply tensor by 10
tensor * 10
# substract
tensor - 10
# try out pytorch in-build functions
torch.mul(tensor,10)
torch.add(tensor,10)

20-22. Matrix Multiplication




# Matrix multiplication
# elment wise multiplication
print(tensor,"*",tensor)
print(f"equals: {tensor*tensor}")
# matrix multiplication
# 14 = 1*1 + 2*2 + 3*3
torch.matmul(tensor,tensor)
%%time
value = 0
for i in range(len(tensor)):
  value += tensor[i] * tensor[i]
print(value)
%%time
torch.matmul(tensor,tensor)
%%time
tensor @ tensor
# one of the most common errors in deep learning: shape errors
# shapes for matrix multiplication
tensor_a = torch.tensor([[1,2],[3,4],[5,6]])
tensor_b = torch.tensor([[7,10],[8,11],[9,12]])
# torch.mm(tensor_a,tensor_b) # mm = matmul -> error
print(tensor_b.T) # 转置
torch.mm(tensor_a,tensor_b.T)

23. Finding the Min Max Mean and Sum of Tensors (Tensor Aggregation)

# finding the min,max,mean,sum,etc...(tensor aggregation)
# create a tensor
x = torch.arange(0,100,10)
x,x.dtype
# find the min
torch.min(x), x.min()
# find the max
torch.max(x), x.max()
# find the mean(need a tensor of float32 datatype to work)
torch.mean(x.type(torch.float32)), x.type(torch.float32).mean()
# find the sum
torch.sum(x), x.sum()

24. Finding The Positional Min and Max of Tensors

## Finding the positional min and max
# find the position in tensor that has the minimum value
x.argmin() # 找到最小值所在的下标
x[x.argmin()]
# find the position in tensor that has the maximum value\
x[x.argmax()]

25. Reshaping, Viewing and Stacking Tensors

## Reshaping, stacking, squeezing and unsqueezing tensors
x = torch.arange(1.,10.)
x, x.shape

# add an extra dimension 添加额外维度
# x_reshaped = x.reshape(1,7)
# x_reshaped = x.reshape(2,10)
# x = torch.arange(1.,11.)
# x_reshaped = x.reshape(2,5) # 2*5=1*10
x_reshaped = x.reshape(1,9)
# x_reshaped = x.reshape(9,1)
x_reshaped,x_reshaped.shape

# change the view 创建view
z = x.view(1,9)
z,z.shape

# changing z changes x (because a view of a tensor shares the same memory as the original input)
z[:,0] = 5
z,x

# stack tensors on top of each other
# x_stacked = torch.stack([x,x,x,x],dim=0) # 横向
x_stacked = torch.stack([x,x,x,x],dim=1) # 纵向
x_stacked

26. Squeezing, Unsqueezing and Permuting Tensors

# torch.squeeze() - removes all signle dimensions from a target tensor
print(f'Previous tensor: {x_reshaped}')
print(f'Previous shape: {x_reshaped.shape}')
# remove extra dimensions from x_reshaped
x_squeezed = x_reshaped.squeeze()
print(f'\nNew tensor: {x_squeezed}')
print(f'New shape: {x_squeezed.shape}')

# torch.unsqueeze() - adds a single dimension to a target tensor at a specific dim
print(f'Previous target: {x_squeezed}')
print(f'Previous shape: {x_squeezed.shape}')
# add an extra dimension with unsqueeze
# x_unsqueezed = x_squeezed.unsqueeze(dim=0)
x_unsqueezed = x_squeezed.unsqueeze(dim=1)
print(f'\nNew tensor: {x_unsqueezed}')
print(f'\nNew shape: {x_unsqueezed.shape}')

# torch.permute - rearranges the dimensions of a target tensor in a specified order
x_original = torch.rand(size=(224,224,3)) # [height,width,color_channels]
# permute the original tensor to rearrange the axis(or dim) order
# 把源tensor的第n位移到新tensor的第n位
# 源2->新0(3)
x_permuted = x_original.permute(2,0,1) # shifts axios 0->1,1->2,2->0
print(f'Previous shape: {x_original.shape}')
print(f'New shape: {x_permuted.shape}') # [color_channels,height,width]
# permute返回的是view

x_original[0,0,0] = 132734
x_original[0,0,0],x_permuted[0,0,0]

27. Selecting Data From Tensors (Indexing)

# indexing(selecting data from tensors)
## indexing with pytorch is similar to indexing with numpy

x = torch.arange(1,10).reshape(1,3,3)
x, x.shape

x[0]

x[0][0],x[0,0],x[0][0][0]

# : 表示所有
x[:,0]

x[:,:,1]

x[:,1,1]

x[0,0,:]

28. PyTorch Tensors and NumPy

## pytorch tensors & numpy

array = np.arange(1.0,8.0)
tensor = torch.from_numpy(array)
array,tensor

print(tensor.dtype)
tensor.type(torch.float32)
array.dtype, torch.arange(1.0,8.0).dtype

array = array+1
array, tensor # array改变,不会导致tensor改变

# tensor to numpy
tensor = torch.ones(7)
numpy_tensor = tensor.numpy()
tensor,numpy_tensor

tensor = tensor+1
tensor,numpy_tensor # 同样不会改变

29. PyTorch Reproducibility (Taking the Random Out of Random)


import torch
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

## reproducbility
# torch.rand(3,3)
random_tensor_a = torch.rand(3,4)
random_tensor_b = torch.rand(3,4)
print(random_tensor_a,random_tensor_b,random_tensor_a==random_tensor_b)

# 可重现的随机
RANDOM_SEED=42 # 随机种子
torch.manual_seed(RANDOM_SEED)
random_tensor_c = torch.rand(3,4)
torch.manual_seed(RANDOM_SEED)
random_tensor_d = torch.rand(3,4)
print(random_tensor_c,random_tensor_d,random_tensor_c==random_tensor_d)

30. Different Ways of Accessing a GPU in PyTorch



print(torch.cuda.is_available())
device = 'cuda' if torch.cuda.is_available() else 'cpu'
print(device)
print(torch.cuda.device_count())

32. PyTorch Fundamentals Exercises and Extra-Curriculum



3_PyTorch Workflow

1. Introduction and Where You Can Get Help


2. Getting Setup and What We Are Covering


import torch
from torch import nn # 包含所有pytorch的神经网络构建块
import matplotlib.pyplot as plt
print(torch.__version__) # 2.0.0+cpu(视频里是1.10.0)

3. Creating a Simple Dataset Using the Linear Regression Formula


## 线性回归: y=wx+b
weight = 0.7
bias = 0.3

start = 0
end = 1
step = 0.02
x = torch.arange(start, end,step).unsqueeze(dim=1)
y = weight * x + bias

print(x[:10],y[:10],len(x),len(y))

4. Splitting Our Data Into Training and Test Sets

# 划分数据集
train_split = int(0.8*len(x))
# print(train_split)
x_train,y_train = x[:train_split],y[:train_split]
x_test,y_test = x[train_split:],y[train_split:]

5. Building a function to Visualize Our Data

# 可视化
def plot_predictions(train_data=x_train,
                        train_labels=y_train,
                        test_data=x_test,
                        test_labels=y_test,
                        predictions=None):
    """
        plots training data,test data and compares predictions
    """
    plt.figure(figsize=(10,7))
    plt.scatter(train_data,train_labels,c='b',s=4,label='training data')
    plt.scatter(test_data,test_labels,c='g',s=4,label='testing data')
    if predictions is not None:
        # 用户输入的预测->predictions
        plt.scatter(test_data,predictions,c='r',s=4,label='predictions')
    plt.legend(prop={"size":14})


plot_predictions()
plt.show()

6. Creating Our First PyTorch Model for Linear Regression

# build a model -> y = wx+b
# 继承nn.Module(里面有pytorch大部分的模块)
class LinearRegressionModel(nn.Module):
    def __init__(self):
        super().__init__()
        # 模型参数 w
        self.weights=nn.Parameter(torch.randn(1,
                                              requires_grad=True,
                                              dtype=torch.float))
        # 偏置 b
        self.bias=nn.Parameter(torch.randn(1,
                                          requires_grad=True,
                                          dtype=torch.float ))
        # forward method to define the computation in the model
        def forward(self,x:torch.Tensor)->torch.Tensor:
            return self.weights * x + self.bias

7. Breaking Down What’s Happening in Our PyTorch Linear regression Model

继承了 nn.Module 需要重写 forward 方法

8. Discussing Some of the Most Important PyTorch Model Building Classes

9. Checking Out the Internals of Our PyTorch Model


# build a model -> y = wx+b
# 继承nn.Module(里面有pytorch大部分的模块)
class LinearRegressionModel(nn.Module):
    def __init__(self):
        super().__init__()
        # 模型参数 w
        self.weights=nn.Parameter(torch.randn(1,
                                              requires_grad=True,
                                              dtype=torch.float))
        # 偏置 b
        self.bias=nn.Parameter(torch.randn(1,
                                          requires_grad=True,
                                          dtype=torch.float))
        # forward method to define the computation in the model
        def forward(self,x:torch.Tensor)->torch.Tensor:
            return self.weights * x + self.bias

# random seed
# 随机种子可以让随机生成的数可以复现(随机结果一致)
torch.manual_seed(42)

model_0 = LinearRegressionModel()

# [Parameter containing:
# tensor([0.3367], requires_grad=True), Parameter containing:
# tensor([0.1288], requires_grad=True)]
print(list(model_0.parameters()))

# OrderedDict([('weights', tensor([0.3367])), ('bias', tensor([0.1288]))])
print(model_0.state_dict())

10. Making Predictions With Our Random Model Using Inference Mode

出现这个 error 时看一看自己的代码缩进是不是正确

import torch
from torch import nn # 包含所有pytorch的神经网络构建块
import matplotlib.pyplot as plt
print(torch.__version__) # 2.0.0+cpu(视频里是1.10.0)


## 线性回归: y=wx+b
weight = 0.7
bias = 0.3


start = 0
end = 1
step = 0.02
x = torch.arange(start, end,step).unsqueeze(dim=1)
y = weight * x + bias


print(x[:10],y[:10],len(x),len(y))


# 划分数据集
train_split = int(0.8*len(x))
# print(train_split)
x_train,y_train = x[:train_split],y[:train_split]
x_test,y_test = x[train_split:],y[train_split:]


# 可视化
def plot_predictions(train_data=x_train,
                        train_labels=y_train,
                        test_data=x_test,
                        test_labels=y_test,
                        predictions=None):
    """
        plots training data,test data and compares predictions
    """
    plt.figure(figsize=(10,7))
    plt.scatter(train_data,train_labels,c='b',s=4,label='training data')
    plt.scatter(test_data,test_labels,c='g',s=4,label='testing data')
    if predictions is not None:
        # 用户输入的预测->predictions
        plt.scatter(test_data,predictions,c='r',s=4,label='predictions')
    plt.legend(prop={"size":14})

# build a model -> y = wx+b
# 继承nn.Module(里面有pytorch大部分的模块)
class LinearRegressionModel(nn.Module):
    def __init__(self):
        super().__init__()
        # 模型参数 w
        self.weights=nn.Parameter(torch.randn(1,
                                              requires_grad=True,
                                              dtype=torch.float))
        # 偏置 b
        self.bias=nn.Parameter(torch.randn(1,
                                          requires_grad=True,
                                          dtype=torch.float))
        # forward method to define the computation in the model
    def forward(self,x:torch.Tensor)->torch.Tensor:
        return self.weights * x + self.bias


# random seed
# 随机种子可以让随机生成的数可以复现(随机结果一致)
torch.manual_seed(42)


model_0 = LinearRegressionModel()


# [Parameter containing:
# tensor([0.3367], requires_grad=True), Parameter containing:
# tensor([0.1288], requires_grad=True)]
print(list(model_0.parameters()))


# OrderedDict([('weights', tensor([0.3367])), ('bias', tensor([0.1288]))])
print(model_0.state_dict())


# try to predict
# inference_mode -> 推理模式 没有上下文,没有梯度,单纯使用模型预测
with torch.inference_mode():
    y_preds = model_0(x_test)
# no_grad和推理模式类似, 但是推荐用推理模式
# with torch.no_grad():
#     y_preds = model_0(x_test)


print(y_preds)
plot_predictions(predictions=y_preds)
plt.show()

11. Training a Model Intuition (The Things We Need)

12. Setting Up an Optimizer and a Loss Function


# loss function -> 衡量模型好坏
loss_fn = nn.L1Loss()
# optimizer -> 优化器
# stochastic gradient descent
optimizer = torch.optim.SGD(params=model_0.parameters(),
                            lr=0.01)

13. PyTorch Training Loop Steps and Intuition

epochs = 1
for epoch in range(epochs):
    # set the model to training mode
    model_0.train()
    model_0.eval() # turns off gradient tracking

14. Writing Code for a PyTorch Training Loop

epochs = 1
for epoch in range(epochs):
    # set the model to training mode
    model_0.train()
    # 1.forward pass
    y_pred=model_0(x_train)
    # 2.calculate loss
    loss = loss_fn(y_pred,y_train)
    # 3.optimizer zero grad
    optimizer.zero_grad()
    # 4.perform backpropagation on the loss with respect to the parameters of the model
    loss.backward()
    # 5.step the optimizer (perform gradient descent)
    optimizer.step() # 梯度改变
    # model_0.eval() # turns off gradient tracking

15. Reviewing the Steps in a Training Loop Step by Step

# build a model -> y = wx+b
# 继承nn.Module(里面有pytorch大部分的模块)
class LinearRegressionModel(nn.Module):
    def __init__(self):
        super().__init__()
        # 模型参数 w
        self.weights=nn.Parameter(torch.randn(1,
                                              requires_grad=True,
                                              dtype=torch.float))
        # 偏置 b
        self.bias=nn.Parameter(torch.randn(1,
                                          requires_grad=True,
                                          dtype=torch.float))
        # forward method to define the computation in the model
    def forward(self,x:torch.Tensor)->torch.Tensor:
        return self.weights * x + self.bias
# train model
# loss function -> 衡量模型好坏
loss_fn = nn.L1Loss()
# optimizer -> 优化器
# stochastic gradient descent
optimizer = torch.optim.SGD(params=model_0.parameters(),
                            lr=0.01)
# 训练
epochs = 5
for epoch in range(epochs):
    # set the model to training mode
    model_0.train()
    # 1.forward pass
    y_pred=model_0(x_train)
    # 2.calculate loss
    loss = loss_fn(y_pred,y_train)
    print(f'Loss: {loss}')
    # 3.optimizer zero grad
    optimizer.zero_grad()
    # 4.perform backpropagation on the loss with respect to the parameters of the model
    loss.backward()
    # 5.step the optimizer (perform gradient descent)
    optimizer.step() # 梯度改变
    model_0.eval() # turns off gradient tracking
    print(model_0.state_dict())

with torch.inference_mode():
    y_pred_new = model_0(x_test)

print(y_pred_new)
plot_predictions(predictions=y_preds)
plot_predictions(predictions=y_pred_new)
plt.show()


16. Running Our Training Loop Epoch by Epoch and Seeing What Happens

17. Writing Testing Loop Code and Discussing What’s Happening Step by Step

# 训练
epochs = 100
for epoch in range(epochs):
    # set the model to training mode
    model_0.train()
    # 1.forward pass
    y_pred=model_0(x_train)
    # 2.calculate loss
    loss = loss_fn(y_pred,y_train)
    # print(f'Loss: {loss}')
    # 3.optimizer zero grad
    optimizer.zero_grad()
    # 4.perform backpropagation on the loss with respect to the parameters of the model
    loss.backward()
    # 5.step the optimizer (perform gradient descent)
    optimizer.step() # 梯度改变
    # 评估模型前,关闭某些层的特殊模式(训练/评估时有不同的模式)
    model_0.eval() # turns off gradient tracking
    with torch.inference_mode():
        # 老代码会有with no_grad,但是现在用推理模式比较好
        # 1. do the forward pass
        test_pred = model_0(x_test)
        # 2. calculate the loss
        test_loss = loss_fn(test_pred, y_test)
    if epoch % 10 == 0:
        print(f'epoch: {epoch} | test: {loss} | test loss: {test_loss}')
        print(model_0.state_dict())

18. Reviewing What Happens in a Testing Loop Step by Step


# train model
# loss function -> 衡量模型好坏
loss_fn = nn.L1Loss()
# optimizer -> 优化器
# stochastic gradient descent
optimizer = torch.optim.SGD(params=model_0.parameters(),
                            lr=0.01)
# 训练
epochs = 100
# 用于记录每轮数据
epoch_count = []
loss_values = []
test_loss_values = []
for epoch in range(epochs):
    # set the model to training mode
    model_0.train()
    # 1.forward pass
    y_pred=model_0(x_train)
    # 2.calculate loss
    loss = loss_fn(y_pred,y_train)
    # print(f'Loss: {loss}')
    # 3.optimizer zero grad
    optimizer.zero_grad()
    # 4.perform backpropagation on the loss with respect to the parameters of the model
    loss.backward()
    # 5.step the optimizer (perform gradient descent)
    optimizer.step() # 梯度改变
    # 评估模型前,关闭某些层的特殊模式(训练/评估时有不同的模式)
    model_0.eval() # turns off gradient tracking
    with torch.inference_mode():
        # 老代码会有with no_grad,但是现在用推理模式比较好
        # 1. do the forward pass
        test_pred = model_0(x_test)
        # 2. calculate the loss
        test_loss = loss_fn(test_pred, y_test)
    if epoch % 10 == 0:
        epoch_count.append(epoch)
        loss_values.append(loss)
        test_loss_values.append(test_loss)
        print(f'epoch: {epoch} | test: {loss} | test loss: {test_loss}')
        print(model_0.state_dict())

with torch.inference_mode():
    y_pred_new = model_0(x_test)

print(y_pred_new)
# plot_predictions(predictions=y_preds)
# plot_predictions(predictions=y_pred_new)
plt.show()

print(f'epoch_count: {epoch_count}\nloss_values: {loss_values}\ntest_loss_values: {test_loss_values}')
plt.plot(epoch_count, np.array(torch.tensor(loss_values).cpu().numpy()),label="train loss")
plt.plot(epoch_count, test_loss_values,label="tset loss")
plt.title("Training and test loss curves")
plt.ylabel("Loss")
plt.xlabel("Epochs")
plt.legend()
plt.show()

19. Writing Code to Save a PyTorch Model


保存整个模型

# saving a model in pytorch
# 1.create model directory
MODEL_PATH = Path("models")
MODEL_PATH.mkdir(parents=True,exist_ok=True)
# 2.create model save path
MODEL_NAME = "01_pytorch_workflow_model_0.pth" # 后缀可以是.pt或.pth
MODEL_SAVE_PATH = MODEL_PATH / MODEL_NAME
print(MODEL_SAVE_PATH)
# 3.save the model state dict
print(f'saving model to: {MODEL_SAVE_PATH}')
torch.save(obj=model_0.state_dict(),f=MODEL_SAVE_PATH)

20. Writing Code to Load a PyTorch Model

# saving a model in pytorch
# 1.create model directory
MODEL_PATH = Path("models")
MODEL_PATH.mkdir(parents=True,exist_ok=True)
# 2.create model save path
MODEL_NAME = "01_pytorch_workflow_model_0.pth" # 后缀可以是.pt或.pth
MODEL_SAVE_PATH = MODEL_PATH / MODEL_NAME
print(MODEL_SAVE_PATH)
# 3.save the model state dict
print(f'saving model to: {MODEL_SAVE_PATH}')
torch.save(obj=model_0.state_dict(),f=MODEL_SAVE_PATH)
# loading a pytorch model
loaded_model_0 = LinearRegressionModel()
# 加载状态字典
loaded_model_0.load_state_dict(torch.load(f=MODEL_SAVE_PATH))
# make some prediction
loaded_model_0.eval()
with torch.inference_mode():
    loaded_model_preds = loaded_model_0(x_test)
print(loaded_model_preds)
print(y_preds==loaded_model_preds)
print(y_preds)
model_0.eval()
with torch.inference_mode():
    y_preds = model_0(x_test)
# 原模型和保存的模型预测结果一样
print(y_preds==loaded_model_preds)

21. Setting Up to Practice Everything We Have Done Using Device Agnostic code

full process

import torch
from torch import nn # 包含所有pytorch的神经网络构建块
import matplotlib.pyplot as plt
import numpy as np
from pathlib import Path

# setup device agnostic data
device = 'cuda' if torch.cuda.is_available() else 'cpu'
print(f'using device: {device}')

22. Putting Everything Together (Part 1) Data

# data
weight = 0.7
bias = 0.3
# create range values
start = 0
end = 1
step = 0.02
# create x and y (features & labels)
X = torch.arange(start,end,step).unsqueeze(dim=1)
y = weight * X + bias
print(X[:10],y[:10])
# split data
train_split = int(0.8*len(X))
x_train,y_train=X[:train_split],y[:train_split]
x_test,y_test=X[train_split:],y[train_split:]

# 可视化
def plot_predictions(train_data=x_train,
                        train_labels=y_train,
                        test_data=x_test,
                        test_labels=y_test,
                        predictions=None):
    """
        plots training data,test data and compares predictions
    """
    # 创建自定义图像,figsize为宽高
    plt.figure(figsize=(10,7))
    # scatter散点图
    plt.scatter(train_data,train_labels,c='b',s=4,label='training data')
    plt.scatter(test_data,test_labels,c='g',s=4,label='testing data')
    if predictions is not None:
        # 用户输入的预测->predictions
        plt.scatter(test_data,predictions,c='r',s=4,label='predictions')
    plt.legend(prop={"size":14})

plot_predictions(x_train,y_train,x_test,y_test)
plt.show()

23. Putting Everything Together (Part 2) Building a Model

# create a linear model by subclassing nn.Module
class LinearRegressionModelV2(nn.Module):
    def __init__(self) -> None:
        super().__init__()
        # use nn.Linear() for creating the model parameters
        # alse call: linear transform,probing layer(探测层),fully connected layer(全连接层),dense layer(密集层)
        # feature -> 输入输出的形状(shape)
        self.linear_layer = nn.Linear(in_features=1,
                                      out_features=1)
    def forward(self,x: torch.Tensor) -> torch.Tensor:
        return self.linear_layer(x)

# set the manual seed
torch.manual_seed(42)
model_1 = LinearRegressionModelV2()
print(model_1)
print(model_1.state_dict())

24. Putting Everything Together (Part 3) Training a Model

# make the data to the same device with model
x_train = x_train.to(device)
y_train = y_train.to(device)
x_test = x_test.to(device)
y_test = y_test.to(device)

# 3
print(model_1.state_dict())
# loss function
loss_fn = nn.L1Loss() # same as MAE
# optimizer
optimizer = torch.optim.SGD(params=model_1.parameters(),lr=0.01)
# train loop
torch.manual_seed(42)
epochs = 200
for epoch in range(epochs):
    model_1.train()
    # 1,forward pass
    y_prad = model_1(x_train)
    # 2,calculate the loss
    loss = loss_fn(y_prad,y_train)
    # 3,optimizer zero grad
    optimizer.zero_grad()
    # 4,perform backpropagation
    loss.backward()
    # 5,optimizer step
    optimizer.step()
    ## testing
    model_1.eval()
    with torch.inference_mode():
        test_pred = model_1(x_test)
        test_loss = loss_fn(test_pred,y_test)
    # print what happen
    if epoch % 10 == 0:
        print(f'epoch: {epoch} | loss: {loss} | test loss: {test_loss}')

print(model_1.state_dict())

25. Putting Everything Together (Part 4) Making Predictions With a Trained Model

# 4
# making and evaluating predictions
# turn model into evaluation mode
model_1.eval()
with torch.inference_mode():
    y_preds = model_1(x_test)
print(y_preds)
# check out our model predictions visually
# 因为matplotlib是基于numpy/cpu的所以要把数据(如果是在gpu上计算的)切换
plot_predictions(predictions=y_preds.cpu())
plt.show()

26. Putting Everything Together (Part 5) Saving and Loading a Trained Model

# 5
# saving and loading a trained model
# 1,create models directory
MODEL_PATH = Path("models")
MODEL_PATH.mkdir(parents=True,exist_ok=True)
# 2,create model save path
MODEL_NAME = "01_pytorch_workflow_model_1.pth"
MODEL_SAVE_PATH = MODEL_PATH / MODEL_NAME
# 3,save the model state dict
print(f"saving model to: {MODEL_SAVE_PATH}")
torch.save(obj=model_1.state_dict(),f=MODEL_SAVE_PATH)
# load a pytroch
# create a new instance of linear regression model v2
loaded_model_1 = LinearRegressionModelV2()
# load the saved model_1 state_dict
loaded_model_1.load_state_dict(torch.load(MODEL_SAVE_PATH))
# put the loaded model to device
loaded_model_1.to(device)
print(next(loaded_model_1.parameters()).device)
# evaluate loaded model
loaded_model_1.eval()
with torch.inference_mode():
    loaded_model_1_preds = loaded_model_1(x_test)
print(y_preds==loaded_model_1_preds)

4_PyTorch Neural Network Classification

1. Introduction to Machine Learning Classification With PyTorch


二元分类(eg.是否垃圾邮件)
多分类(eg.什么品种的牛排)
多标签分类(eg.文章)->多标签是一个物体多个分类,多分类是一个物体一个分类

2. Classification Problem Example Input and Output Shapes

batch_size 不超过 32 会比较好

3. Typical Architecture of a Classification Neural Network (Overview)


4. Making a Toy Classification Dataset

# 1 make classification data and get it ready
import sklearn
from sklearn.datasets import make_circles
import pandas as pd
import matplotlib.pyplot as plt
# make 1000 samples
n_samples = 1000
# create circles
# random_state 随机种子
x,y = make_circles(n_samples,noise=0.03,random_state=42)
print(f'first 5 samples of x:\n {x[:5]}')
print(f'first 5 samples of y:\n {y[:5]}')
# make dataframe of circle data
circles = pd.DataFrame({'x1':x[:,0],
                        'x2':x[:,1],
                        'label':y})
print(circles.head(10))
plt.scatter(x=x[:,0],y=x[:,1],c=y,cmap=plt.cm.RdYlBu)
plt.show()

5. Turning Our Data into Tensors and Making a Training and Test Split

## 1.1 check input and output shapes
print(x.shape,y.shape)
x_sample = x[0]
y_sample = y[0]
print(f'values for one sample of x: {x_sample} and the same for y: {y_sample}')
print(f'shapes for one sample of x: {x_sample.shape} and the same for y: {y_sample.shape}')
## 1.2 turn data into tensors and create train and test splits
x = torch.from_numpy(x).type(torch.float)
y = torch.from_numpy(y).type(torch.float)
print(x[:5],y[:5])
# split data
x_train,x_test,y_train,y_test = train_test_split(x,y,test_size=0.2,random_state=42)

6. Laying Out Steps for Modelling and Setting Up Device-Agnostic Code

import torch
from torch import nn
# 2 building a model
# make device agnostic code
device = "cuda" if torch.cuda.is_available() else "cpu"
print(device)

7. Coding a Small Neural Network to Handle Our Classification Data

class CircleModelV0(nn.Module):
    def __init__(self) -> None:
        super().__init__()
        # create 2 nn.Linear layers capable of handling the shapes of our data
        self.layer_1 = \
        nn.Linear(in_features=2,out_features=5) # 输入2个特征,升级为5个特征(5这个数可以随机取的)
        # 下一层的输入要和上一层的输出规格一致
        self.layer_2 = nn.Linear(in_features=5,out_features=1)
    def forward(self,x):
        return self.layer_2(self.layer_1(x)) # x -> layer1 -> layer2

model_0 = CircleModelV0().to(device)
print(model_0)

8. Making Our Neural Network Visual

9. Recreating and Exploring the Insides of Our Model Using nn.Sequential

# let's replicate the model above using nn.Sequential()
model_0 = nn.Sequential(
    nn.Linear(in_features=2,out_features=5),
    nn.Linear(in_features=5,out_features=1),
).to(device)
print(model_0)

# make predictions
with torch.inference_mode():
    untrained_preds = model_0(x_test.to(device))
print(f'length of predictions: {len(untrained_preds)}, shape: {untrained_preds.shape}')
print(f'length of test sample: {len(x_test)}, shape: {x_test.shape}')
print(f'\nfirst 10 predictions:\n{torch.round(untrained_preds[:10])}')
print(f'\nfirst 10 labels:\n{y_test[:10]}')

10. Loss Function Optimizer and Evaluation Function for Our Classification Network

# 2.1 setup loss function and optimizer
loss_fn = nn.BCEWithLogitsLoss() # sigmoid activation function
optimizer = torch.optim.SGD(params=model_0.parameters(),
                            lr=0.1)
# calculate accuracy - out of 100 examples, what percentage does our model get right?
def accuracy_fn(y_true,y_pred):
    correct = torch.eq(y_true,y_pred).sum().item()
    acc = (correct/len(y_pred)) * 100
    return acc

11. Going from Model Logits to Prediction Probabilities to Prediction Labels

# 3. train model
model_0.eval()
with torch.inference_mode():
    y_logits = model_0(x_test.to(device))[:5]
print(y_logits)
print(model_0.state_dict())

y_pred_probs = torch.sigmoid(y_logits)
print(y_pred_probs)
print(torch.round(y_pred_probs)) # 四舍五入

# find the predicted labels
y_preds = torch.round(y_pred_probs)
# in full(logits->pred probs->pred labels)
y_pred_probs = torch.round(torch.sigmoid(model_0(x_test.to(device))[:5]))
# check for equality
print(torch.eq(y_preds.squeeze(),y_pred_probs.squeeze()))
# get rid of extra dimension
print(y_preds.squeeze())

12. Coding a Training and Testing Optimization Loop for Our Classification Model


# 3.2 building a training and testing loop
torch.manual_seed(42)
torch.cuda.manual_seed(42)
# set the number of epochs
epochs = 100
# put data to target device
x_train,y_train = x_train.to(device),y_train.to(device)
x_test,y_test = x_test.to(device),y_test.to(device)
# build training and evaluation loop
for epoch in range(epochs):
    ### training
    model_0.train()
    # 1. forward pass
    y_logits = model_0(x_train).squeeze()
    # turn logits -> pred probs -> pred labels
    y_pred = torch.round(torch.sigmoid(y_logits))
    # 2. calculate loss/accuracy
    loss = loss_fn(y_logits,
                   y_train)
    acc = accuracy_fn(y_true=y_train,
                      y_pred=y_pred)
    # 3. optimizer zero grad
    optimizer.zero_grad()
    # 4. loss backward (backpropagation)
    loss.backward()
    # 5. optimizer step (gradient descent)
    optimizer.step()
    ### testing
    model_0.eval()
    with torch.inference_mode():
        # 1. forward pass
        test_logits = model_0(x_test).squeeze()
        test_pred = torch.round(torch.sigmoid(test_logits))
        # 2.calculate test loss/acc
        test_loss = loss_fn(test_logits,y_test)
        test_acc = accuracy_fn(y_true=y_test,y_pred=test_pred)
    # print out what's happenin'
    if epoch%10==0:
        print(f'epoch: {epoch} | loss: {loss:.5f}, acc: {acc:.2f}% | test loss: {test_loss:.5f}, test acc: {test_acc:.2f}%')

13. Writing Code to Download a Helper Function to Visualize Our Models Predictions

# download helper functions
if Path("helper_functions.py").is_file():
    print("helper_functions.py already exists")
else:
    print("download helper_functions.py")
    req = requests.get("https://raw.githubusercontent.com/mrdbourke/pytorch-deep-learning/main/helper_functions.py")
    with open("helper_functions.py", "wb") as f:
        f.write(req.content)
# plot decision boundary of the model
plt.figure(figsize=(12,6))
plt.subplot(1,2,1)
plt.title("Train")
plot_decision_boundary(model_0,x_train,y_train)
plt.subplot(1,2,2)
plt.title("Test")
plot_decision_boundary(model_0,x_test,y_test)
plt.show()

14. Discussing Options to Improve a Model



15. Creating a New Model with More Layers and Hidden Units

参数是指模型内部自己调整的变量,而超参数是我们指定的变量

# improve model
class CircleModelV1(nn.Module):
    def __init__(self):
        super().__init__()
        self.layer_1 = nn.Linear(in_features=2,out_features=10)
        self.layer_2 = nn.Linear(in_features=10,out_features=10)
        self.layer_3 = nn.Linear(in_features=10,out_features=1)
    def forward(self,x):
        z = self.layer_1(x)
        z = self.layer_2(z)
        z = self.layer_3(z)
        return z
model_1 = CircleModelV1().to(device)
print(model_1)

16. Writing Training and Testing Code to See if Our Upgraded Model Performs Better

# create a loss function
loss_fn = nn.BCEWithLogitsLoss()
# create an optimizer
optimizer = torch.optim.SGD(params=model_1.parameters() ,
                            lr=0.001)
# write a training and evaluation loop for model_1
torch.manual_seed(42)
# torch.cuda.manual_seed(42)
epochs= 1000
# put data on the target device
x_train,y_train = x_train.to(device),y_train.to(device)
x_test,y_test = x_test.to(device),y_test.to(device)

for epoch in range(epochs):
    ### Training
    model_1.train()
    # 1.forward pass
    y_logits = model_1(x_train).squeeze()
    y_pred = torch.round(torch.sigmoid(y_logits))
    # 2. calculate the loss/acc
    loss = loss_fn(y_logits,y_train)
    acc = accuracy_fn(y_true=y_train,
                      y_pred=y_pred)
    # 3. optimize zero grad
    optimizer.zero_grad()
    # 4. loss backword (backpropagation)
    loss.backward()
    # 5. optimizer step (gradient descent)
    optimizer.step()
    ### testing
    model_1.eval()
    with torch.inference_mode():
        # 1. forward pass
        test_logits = model_1(x_test).squeeze()
        test_pred = torch.round(torch.sigmoid(test_logits))
        # 2. calculate loss
        test_loss = loss_fn(test_logits,
                            y_test)
        test_acc = accuracy_fn(y_true=y_test,
                               y_pred=test_pred)
    # pring what's happen
    if epoch % 100 == 0:
        print(f'epoch: {epoch} | loss: {loss:.5f}, acc: {acc:.2f}% | test loss: {test_loss:.5f}, test acc: {test_acc:.2f}%')
# plot decision boundary of the model
plt.figure(figsize=(12,6))
plt.subplot(1,2,1)
plt.title("Train")
plot_decision_boundary(model_1,x_train,y_train)
plt.subplot(1,2,2)
plt.title("Test")
plot_decision_boundary(model_1,x_test,y_test)
plt.show()

17. Creating a Straight Line Dataset to See if Our Model is Learning Anything

# 5.1 preparing data to see if our model can fit a straight line
# create some data
weight = 0.7
bias = 0.3
start = 0
end = 1
step = 0.01

x_regression = torch.arange(start,end,step).unsqueeze(dim=1)
y_regression = weight * x_regression + bias
# create train and test splits
train_split = int(0.8*len(x_regression))
x_train_regression,y_train_regression = x_regression[:train_split],y_regression[:train_split]
x_test_regression,y_test_regression = x_regression[train_split:],y_regression[train_split:]

plot_predictions(train_data=x_train_regression,
                 train_labels=y_train_regression,
                 test_data=x_test_regression,
                 test_labels=y_test_regression)
plt.show()

18. Building and Training a Model to Fit on Straight Line Data

# 5.2 adjusting model_1 to fit a straight line
model_2 = nn.Sequential(
    nn.Linear(in_features=1,out_features=10),
    nn.Linear(in_features=10,out_features=10),
    nn.Linear(in_features=10,out_features=1)
).to(device)
# loss and optimizer
loss_fn = nn.L1Loss()
optimizer = torch.optim.SGD(params=model_2.parameters(),
                            lr=0.1)
# train
torch.manual_seed(42)
# torch.cuda.manual_seed(42)
# set the number of epochs
epochs = 1000
# put the data on the target device
x_train_regression,y_train_regression=x_train_regression.to(device),y_train_regression.to(device)
x_test_regression,y_test_regression=x_test_regression.to(device),y_test_regression.to(device)
# training
for epoch in range(epochs):
    y_pred = model_2(x_train_regression)
    loss = loss_fn(y_pred,y_train_regression)
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()
    # testing
    model_2.eval()
    with torch.inference_mode():
        test_pred = model_2(x_test_regression)
        test_loss = loss_fn(test_pred,y_test_regression)
    # print
    if epoch % 100 == 0:
        print(f'epoch: {epoch} | loss: {loss:.5f} | test loss: {test_loss:.5f}')

19. Evaluating Our Models Predictions on Straight Line Data

model_2.eval()
with torch.inference_mode():
    y_preds = model_2(x_test_regression)
plot_predictions(train_data=x_train_regression.cpu(),
                 train_labels=y_train_regression.cpu(),
                 test_data=x_test_regression.cpu(),
                 test_labels=y_test_regression.cpu(),
                 predictions=y_preds.cpu())
plt.show()

20. Introducing the Missing Piece for Our Classification Model Non-Linearity


  转载请注明: malred-blog pytorch2023

  目录