Deep Learning A-Z 2024 Neural Networks, AI & ChatGPT Prize

01 - Welcome to the course!

02 - ——————— Part 1 - Artificial Neural Networks ———————

03 - ANN Intuition

002 Plan of Attack

003 The Neuron




04 - Building an ANN

001 Business Problem Description

数据集 (记录某虚构银行的客户的各项指标, target为是否离开了该银行)

003 Building an ANN

# %% part1
import numpy as np
import pandas as pd
import tensorflow as tf

print(tf.__version__)  # 视频: '2.2.0'

# dataset
dataset = pd.read_csv('Churn_Modelling.csv')
X = dataset.iloc[:, 3:-1].values  # 所有行, 除了最后一列和前3列的所有列
y = dataset.iloc[:, -1].values  # 所有行, 最后一列

# encoding categorical data
# (gender数据从[female,male]转为[0,1])
from sklearn.preprocessing import LabelEncoder

le = LabelEncoder()
X[:, 2] = le.fit_transform(X[:, 2])

print(X)
# (geography从[france,spain...]转为one-hot[[0,0,1...],[1,0,0...],...])
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder

ct = ColumnTransformer(
    # 对输入数据索引为1的列进行编码 - onehot
    transformers=[('encoder', OneHotEncoder(), [1])],
    remainder='passthrough'
)
X = np.array(ct.fit_transform(X))
print(X)
# split train & test dataset
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)

# feature scaling 特征缩放 - 将不同特征的值量化到同一区间的方法。(归一化?)
from sklearn.preprocessing import StandardScaler

sc = StandardScaler()
# fit_transform是fit和transform的组合,既包括了训练又包含了转换。
X_train = sc.fit_transform(X_train)
# 在Fit的基础上,进行标准化,降维,归一化等操作(看具体用的是哪个工具,如PCA,StandardScaler等)。
X_test = sc.transform(X_test)

# %% part2 - build the ANN
ann = tf.keras.models.Sequential()
'''
tf.keras.layers.Dense(
    units,                                 # 正整数,输出空间的维数
    activation=None,                       # 激活函数,不指定则没有
    use_bias=True,                           # 布尔值,是否使用偏移向量
    kernel_initializer='glorot_uniform',   # 核权重矩阵的初始值设定项
    bias_initializer='zeros',              # 偏差向量的初始值设定项
    kernel_regularizer=None,               # 正则化函数应用于核权矩阵
    bias_regularizer=None,                 # 应用于偏差向量的正则化函数
    activity_regularizer=None,             # Regularizer function applied to the output of the layer (its "activation")
    kernel_constraint=None,                # Constraint function applied to the kernel weights matrix.
    bias_constraint=None, **kwargs         # Constraint function applied to the bias vector
)
'''
# 全连接层
ann.add(
    tf.keras.layers.Dense(units=6, activation='relu')
)
# 隐藏层
ann.add(
    tf.keras.layers.Dense(units=6, activation='relu')
)
# 输出层
ann.add(
    tf.keras.layers.Dense(units=1, activation='sigmoid')
)

# %% part3 - train ANN
ann.compile(
    optimizer='adam',  # 优化器
    loss='binary_crossentropy',  # 损失函数
    metrics=['accuracy']  # 评估指标
)
ann.fit(X_train, y_train, batch_size=32, epochs=100)
# %% predicting
print(
    ann.predict([
        # geography, credit score, gender, age, tenure, balance, number of products, have a card?, active? estimated salary
        sc.transform([1, 0, 0, 600, 1, 40, 3, 60000, 2, 1, 1, 50000])
    ])
)

y_pred = ann.predict(X_test)
y_pred = (y_pred > 0.5)
print(
    np.concatenate(
        y_pred.reshape(len(y_pred), 1),
        y_test.reshape(len(y_pred), 1),
        1
    )
)
# %% 混淆矩阵 主要用于比较分类结果和实际测得值,可以把分类结果的精度显示在一个混淆矩阵里面
from sklearn.metrics import confusion_matrix, accuracy_score

cm = confusion_matrix(y_test, y_pred)
print(cm)
'''
[[真阳, 假阴]
 [假阳, 真阴]]
'''
print(accuracy_score(y_test, y_pred))

05 - ——————– Part 2 - Convolutional Neural Networks ——————–

07 - Building a CNN

# %% import
import tensorflow as tf

import keras
from keras.preprocessing.image import ImageDataGenerator

# tf 2.2.0 tk 2.3.1
print(tf.__version__, keras.__version__)
# %% part1 data preprocessing
train_datagen = ImageDataGenerator(  # 图像增强器
    rescale=1. / 255,  # 将像素0~255归一化到0~1
    # 下面是图像增强部分
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True
)
train_set = train_datagen.flow_from_directory(  # 从图像集选图并增强
    'dataset/training_set',
    target_size=(64, 64),
    batch_size=32,
    class_mode='binary'  # 二分类
)

# 对测试set进行特征缩放
test_datagen = ImageDataGenerator(  # 图像增强器
    rescale=1. / 255  # 和训练集的缩放保持一致
)
test_set = validation_generator = test_datagen.flow_from_directory(
    'dataset/test_set',
    target_size=(64, 64),
    batch_size=32,
    class_mode='binary'  # 二分类
)

# %% part2 building the CNN
cnn = tf.keras.models.Sequential()
cnn.add(tf.keras.layers.Conv2D(
    filters=32,  # 滤波器个数
    kernel_size=3,
    activation='relu',
    input_shape=[64, 64, 3]
))
# 最大值池化(从指定矩阵窗口中选择最大的作为输出)
# 池化器大小 2x2, 步幅 2
cnn.add(tf.keras.layers.MaxPool2D(pool_size=2, strides=2))

cnn.add(tf.keras.layers.Conv2D(
    filters=32,  # 滤波器个数
    kernel_size=3,
    activation='relu',
))
# 最大值池化(从指定矩阵窗口中选择最大的作为输出)
# 池化器大小 2x2, 步幅 2
cnn.add(tf.keras.layers.MaxPool2D(pool_size=2, strides=2))

# flattening 展平
cnn.add(tf.keras.layers.Flatten())

# 全连接层
cnn.add(
    tf.keras.layers.Dense(units=128, activation='relu')
)

# 输出层
cnn.add(
    tf.keras.layers.Dense(units=1, activation='sigmoid')
)
# %% part3 training the CNN
cnn.compile(
    optimizer='adam',  # 优化器
    loss='binary_crossentropy',  # 损失函数
    metrics=['accuracy']  # 评估指标
)
cnn.fit(x=train_set, validation_data=test_set, epochs=25)

# %% part4 making a single prediction
import numpy as np
from keras.preprocessing import image

test_image = image.load_img('dataset/single_prediction/cat_or_dog_1.jpg', target_size=(64, 64))
test_image = image.img_to_array(test_image)  # 转为numpy数组
# 添加额外的batch维度
test_image = np.expand_dims(test_image, axis=0)
result = cnn.predict(test_image)
print(train_set.class_indices)  # 索引和类的对应关系
if result[0][0] == 1:
    prediction = 'dog'
else:
    prediction = 'cat'

print(prediction)

08 - ———————- Part 3 - Recurrent Neural Networks ———————-

09 - RNN Intuition

002 Plan of attack

003 The idea behind Recurrent Neural Networks


  • 权重是神经网络的长期记忆 - 颞叶 like ANN
  • 识别图像 - 枕叶 like CNN
  • 短期记忆 - 额叶 like RNN

从下看, 压扁
通过短期记忆来提炼更多信息, 推断输出

004 The Vanishing Gradient Problem

赛普-霍克莱特
x/x-1/x-2...表示向后回溯 梯度更新, 过小的权重导致梯度越来越小, 直到消失

005 LSTMs


RNNC代表记忆,ht是输出, x从左到右分别是遗忘阀|记忆阀|输出阀

006 Practical intuition

红色部分表示预测的

007 EXTRA LSTM Variations 3大变体

一些改进LSTM的工作

窥视, 在学习的过程中允许节点查看数据集标准答案记忆阀和遗忘阀连接起来
GRU

10 - Building a RNN

002 Building a RNN - Step 1

2col insert chart

003 Building a RNN - Step 2

# Recurrent Neural Network

# %% Part 1 - Data Preprocessing

# import library
import numpy as np
import matplotlib.pyplot as plt
matplotlib.use('tkagg')
import pandas as pd

# import train set
dataset_train = pd.read_csv('dataset/Google_Stock_Price_Train.csv')
training_set = dataset_train.iloc[:, 1:2].values  # 取所有行的第2列

# %% Part 2-Building the RNN

# %% Part 3-Making the predictions and visualising the results

004 Building a RNN - Step 3

特征缩放

# Recurrent Neural Network

# %% Part 1 - Data Preprocessing

# import library 
...

# Feature Scaling
from sklearn.preprocessing import MinMaxScaler

# feature_range 数据变换后的期望范围
sc = MinMaxScaler(feature_range=(0, 1))

training_set_scaled = sc.fit_transform(training_set)

# %% Part 2-Building the RNN

# %% Part 3-Making the predictions and visualising the results

005 Building a RNN - Step 4

# Recurrent Neural Network

# %% Part 1 - Data Preprocessing

# import library
...
# import train set
...
# Feature Scaling
...

# Creating a data structure with 60 timesteps and 1 output 查看前60天的数据然后预测/学习当前数据
X_train = []
y_train = []
for i in range(60, 1258):  # training_set.shape (1258,1)
    X_train.append(training_set_scaled[i - 60:i, 0])  # 前t个数据
    y_train.append(training_set_scaled[i, 0])  # 预测t+1的target

X_train, y_train = np.array(X_train), np.array(y_train)


# %% Part 2-Building the RNN

# %% Part 3-Making the predictions and visualising the results

006 Building a RNN - Step 5

Xtrain变换到keras需要的形状

# Reshaping (股票价格数, 时间步长, 指标数)
X_train = np.reshape(X_train, (X_train.shape[0], X_train.shape[1], 1)) 

007 Building a RNN - Step 6

# %% Part 2-Building the RNN
from keras.models import Sequential
from keras.layers import Dense, LSTM, Dropout

# Initialising the rnn
regressor = Sequential()  # 回归预测连续值, 分类预测一个类别

008 Building a RNN - Step 7

# Adding the first LSTM layer and some Dropout regularisation
regressor.add(LSTM(
    # 神经单元数
    units=50,
    # 是否返回序列, 如果需要下面继续堆叠LSTM, 就true, 不需要就false
    return_sequences=True,
    # 维度 (时间步长, target维度)
    # Input_Sizes是每个时间点输入x的维度, 对于语句来说,就是一个字的embedding的向量维度
    input_shape=(X_train.shape[1], 1)
))
regressor.add(Dropout(0.2))  # Dropout掉20%的神经元 正则化

009 Building a RNN - Step 8

# Adding a second LSTM layer and some Dropout regularisation
regressor.add(LSTM(units=50, return_sequences=True))  # 上面的LSTM已经说明, 自动推断input维度
regressor.add(Dropout(0.2))

# Adding a third LSTM layer and some Dropout regularisation
regressor.add(LSTM(units=50, return_sequences=True))
regressor.add(Dropout(0.2))

# Adding a fourth ISTM layer and some Dropout regularisation
# regressor.add(LSTM(units=50, return_sequences=False)) return_sequences 默认false
regressor.add(LSTM(units=50))
regressor.add(Dropout(0.2))

010 Building a RNN - Step 9

# Adding the output layer
regressor.add(Dense(units=1))

011 Building a RNN - Step 10

# Compiling the RNN - 使用Adam优化器, 均方误差损失函数
regressor.compile(optimizer='adam', loss='mean_squared_error')

012 Building a RNN - Step 11

# Fitting the RNN to the Training set
regressor.fit(X_train, y_train, epochs=100, batch_size=32)

013 Building a RNN - Step 12 …

# %% Part 3-Making the predictions and visualising the results

# Getting the real stock price of 2017
dataset_test = pd.read_csv('dataset/Google_Stock_Price_Test.csv')
real_stock_price = dataset_test.iloc[:, 1:2].values  # 取所有行的第2列

# Getting the predicted stock price of 2017
dataset_total = pd.concat((dataset_train['Open'], dataset_test['Open']), axis=0)  # 连接数据集
inputs = dataset_total[len(dataset_total) - len(dataset_test) - 60:].values  # 包含了test set前60天数的数据集
inputs = inputs.reshape(-1, 1)
inputs = sc.transform(inputs)  # 标准化
X_test = []
for i in range(60, 80):
    X_test.append(inputs[i - 60:i, 0])
X_test = np.array(X_test)
X_test = np.reshape(X_test, (X_test.shape[0], X_test.shape[1], 1))
predicted_stock_price = regressor.predict(X_test)
predicted_stock_price = sc.inverse_transform(predicted_stock_price)  # 撤销之前sc的缩放

# visualising the results
plt.plot(real_stock_price, color='red', label='Real Google Stock Price')
plt.plot(predicted_stock_price, color='blue', label='Predicted Google Stock Price')
plt.title('Google Stock Price Prediction')
plt.xlabel('Time')
plt.ylabel('Google Stock Price')
plt.legend()
plt.show()

11 - Evaluating and Improving the RNN

12 - ———————— Part 4 - Self Organizing Maps ————————

13 - SOMs Intuition

什么是自组织映射?

一个特别有趣的无监督系统是基于竞争性学习,其中输出神经元之间竞争激活,结果是在任意时间只有一个神经元被激活。
这个激活的神经元被称为胜者神经元(winner-takes-all neuron)。
这种竞争可以通过在神经元之间具有横向抑制连接(负反馈路径)来实现。
其结果是神经元被迫对自身进行重新组合,这样的网络我们称之为自组织映射(Self Organizing Map,SOM)。

001 Plan of attack

002 How do Self-Organizing Maps Work

SOMs开始, 进入无监督学习章节SOM用在降维
无监督, 降维, 所以也是可学习的

003 Why revisit K-Means

004 K-Means Clustering (Refresher)

消除决策复杂性



005 How do Self-Organizing Maps Learn (Part 1)

SOM的权重不是用来乘的(SOM没有激活函数), 而是一个可以学习的特征
遍历数据集, 找出最接近所有节点的数据的坐标
自组织, 根据数据集将自己的权重改变, 然后拟合到数据集上

006 How do Self-Organizing Maps Learn (Part 2)

007 Live SOM example

008 Reading an Advanced SOM

美国国会议员投票的数据因为自组织映射的简单性,有很多种实现和变体

009 EXTRA K-means Clustering (part 2)

随机初始化陷阱,初始化到一个离其他点很远,离一些点很近的位置,然后每次迭代都没有改变


010 EXTRA K-means Clustering (part 3)

我们需要多少个聚类
聚类越多,WCSS越小,表示拟合度越高

14 - Building a SOM

001 How to get the dataset

002 Building a SOM - Step 1


检测离群值

# Self Organizing Map

# Importing the libraries
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

# Importing the dataset
dataset = pd.read_csv('dataset/Credit_Card_Applications.csv')

X = dataset.iloc[:, :-1].values  # 除了最后一列的所有列
y = dataset.iloc[:, -1].values

# Feature Scaling
from sklearn.preprocessing import MinMaxScaler

sc = MinMaxScaler(feature_range=(0, 1))  # 缩放到0~1
X = sc.fit_transform(X)

003 Building a SOM - Step 2

# Training the SOM
from minisom import MiniSom

#  SOM 的维度 (x,y); sigma 不同相邻节点的半径
som = MiniSom(x=10, y=10, input_len=15, sigma=1.0, learning_rate=0.5)
som.random_weights_init(X)  # 初始化权重
som.train_random(data=X, num_iteration=100)

004 Building a SOM - Step 3

# Visualizing the results
from pylab import bone, pcolor, colorbar, plot, show

bone()  # 画白色窗口
pcolor(som.distance_map().T)  # distance_map 返回所有神经元间的平均距离MID
colorbar()  # 颜色-数值 对应条
markers = ['o', 's']
colors = ['r', 'g']
for i, x in enumerate(X):  # i是第i个迭代,x是当前迭代的数据
    w = som.winner(x)
    plot(w[0] + 0.5, w[1] + 0.5,
         # y不同,绘制不同图像(圆o 或 正方形s)
         markers[y[i]],
         # 给边缘画
         markeredgecolor=colors[y[i]],
         markerfacecolor='None', markersize=10, markeredgewidth=2)
show()

005 Building a SOM - Step 4

# Finding the frauds
mappings = som.win_map(X)
frauds = np.concatenate((mappings[(8, 1)], mappings[(6, 8)]), axis=0)  # (8,1)是从上面画的图里找的坐标
# 反转缩放变换
frauds = sc.inverse_transform(frauds)

15 - Mega Case Study

"""## Importing the libraries"""

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

"""## Importing the dataset"""

dataset = pd.read_csv('dataset/Credit_Card_Applications.csv')
X = dataset.iloc[:, :-1].values
y = dataset.iloc[:, -1].values

"""## Feature Scaling"""

from sklearn.preprocessing import MinMaxScaler

sc = MinMaxScaler(feature_range=(0, 1))
X = sc.fit_transform(X)

"""##Training the SOM"""

from minisom import MiniSom

som = MiniSom(x=10, y=10, input_len=15, sigma=1.0, learning_rate=0.5)
som.random_weights_init(X)
som.train_random(data=X, num_iteration=100)

"""##Visualizing the results"""

from pylab import bone, pcolor, colorbar, plot, show

bone()
pcolor(som.distance_map().T)
colorbar()
markers = ['o', 's']
colors = ['r', 'g']
for i, x in enumerate(X):
    w = som.winner(x)
    plot(w[0] + 0.5,
         w[1] + 0.5,
         markers[y[i]],
         markeredgecolor=colors[y[i]],
         markerfacecolor='None',
         markersize=10,
         markeredgewidth=2)
show()

"""## Finding the frauds"""

mappings = som.win_map(X)  # 不同点和获胜节点之间距离的map
frauds = np.concatenate((mappings[(6, 8)], mappings[(5, 1)]), axis=0)
frauds = sc.inverse_transform(frauds)

"""##Printing the Fraunch Clients"""

print('Fraud Customer IDs')
for i in frauds[:, 0]:
    print(int(i))

"""#Part 2 - Going from Unsupervised to Supervised Deep Learning

##Create Matrix of Features
"""

customers = dataset.iloc[:, 1:].values  # 第一列以外的所有列

"""## Create Dependent Variable"""

is_fraud = np.zeros(len(dataset))
for i in range(len(dataset)):
    if dataset.iloc[i, 0] in frauds:  # 客户ID在欺诈者列表里
        is_fraud[i] = 1

"""#Part 3 - ANN

### Feature Scaling
"""

from sklearn.preprocessing import StandardScaler

sc = StandardScaler()
customers = sc.fit_transform(customers)

"""## Building the ANN

##Import Tensorflow
"""

import tensorflow as tf

print(tf.__version__)

"""## Initializing the ANN"""

ann = tf.keras.models.Sequential()

"""##Adding the input layer and the first hidden layer"""

ann.add(tf.keras.layers.Dense(units=2, activation='relu'))

"""## Adding the output layer"""

ann.add(tf.keras.layers.Dense(units=1, activation='sigmoid'))

"""## Training the ANN

## Compiling the ANN
"""

ann.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

"""## Training the ANN on the Training set"""

ann.fit(customers, is_fraud, batch_size=1, epochs=10)

"""## Predicting test set results"""

y_pred = ann.predict(customers)
y_pred = np.concatenate((dataset.iloc[:, 0:1].values, y_pred), axis=1)
y_pred = y_pred[y_pred[:, 1].argsort()]

print(y_pred)

16 - ————————- Part 5 - Boltzmann Machines ————————-

17 - Boltzmann Machine Intuition

001 Plan of attack

002 Boltzmann Machine

玻尔兹曼机是无向图没有输出层, 每个节点都生成东西的一个系统
例子: 我们可以测量核电站各个地方的状态, 玻尔兹曼机有可测量的可视节点和不可测量的隐藏节点,
节点形成一个系统, 不需要输入, 自己产生状态, 不确定系统; 我们的输入数据帮助他调整自己的状态,
最终调整至稳态, 我们可以通过它来学习稳定的状态, 识别异常状态

003 Energy-Based Models (EBM)

玻尔兹曼分布(抽样分布) -> 玻尔兹曼机

p_i表示状态i, e是能量, k是常数, T是温度
如果空气挤在一块, 会混乱并碰撞, 而系统会趋于更低能量的稳态; 墨水滴入水中, 扩散; 玻尔兹曼机基于数据学习权重, 最终到达最低能量的状态

005 Restricted Boltzmann Machine

玻尔兹曼机的节点的增加可能带来连接的指数型增长, 我们往往不能复现完整的玻尔兹曼机, 所以提出了受限玻尔兹曼机

玻尔兹曼机是一种生成型模型, 不断产生状态

玻尔兹曼机初步学习后, 尝试重建输入, 填充缺失部分, 然后继续调整, 直到稳态

006 Contrastive Divergence

差异对比允许玻尔兹曼机进行学习

重建过程是隐藏层更新可视层吉布斯采样
辛顿捷径, 改变能量曲线

007 Deep Belief Networks

两种训练方法, 贪婪逐层训练法和唤醒-睡眠法(从上到下 从下到上 训练)

008 Deep Boltzmann Machines

DBM没有层和层之间的方向

18 - Building a Boltzmann Machine

003 Building a Boltzmann Machine - Introduction

005 Building a Boltzmann Machine - Step 1

# Importing the libraries 

import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.nn.parallel
import torch.optim as optim
import torch.utils.data
from torch.autograd import Variable

# importing the dataset
movies = pd.read_csv('ml-1m/movies.dat', sep='::', header=None, engine='python', encoding='latin-1')
users = pd.read_csv('ml-1m/users.dat', sep='::', header=None, engine='python', encoding='latin-1')
ratings = pd.read_csv('ml-1m/ratings.dat', sep='::', header=None, engine='python', encoding='latin-1')
print(movies.shape, users.shape, ratings.shape)

006 Building a Boltzmann Machine - Step 2

# Preparing the training set and the test set
training_set = pd.read_csv('ml-100k/u1.base', delimiter='\t')
training_set = np.array(training_set, dtype='int')
test_set = pd.read_csv('ml-100k/u1.test', delimiter='\t')
test_set = np.array(test_set, dtype='int')
print(training_set.shape, test_set.shape)  # (79999, 4) (19999, 4)

007 Building a Boltzmann Machine - Step 3

# Getting the number of users and movies
nb_users = int(max(max(training_set[:, 0]), max(test_set[:, 0])))  # 用户总数
nb_movies = int(max(max(training_set[:, 1]), max(test_set[:, 1])))  # movie总数
print(nb_users, nb_movies)  # 943 1682

008 Building a Boltzmann Machine - Step 4

# Converting the data into an array with users in lines and movies in columns
def convert(data):  # data: [uid, mid, rating, timestamp]
    new_data = []
    for id_users in range(1, nb_users + 1):  # user/movie的id是从1开始
        # 找到该用户评论过的movie & rating
        id_movies = data[:, 1][data[:, 0] == id_users]
        id_ratings = data[:, 2][data[:, 0] == id_users]
        ratings = np.zeros(nb_movies)  # 初始化0矩阵 - ratings [mid, rating]
        ratings[id_movies - 1] = id_ratings  # rating的id从0开始; 记录评分
        new_data.append(list(ratings))

    # new_data: [ [r1, r2, ... , rn], <- elements' line id = mid
    #             [r1, r2, ... , rn] ] <- line id = uid
    return new_data


training_set = convert(training_set)
test_set = convert(test_set)
print(len(training_set), len(test_set))  # 943 943

009 Building a Boltzmann Machine - Step 5

# Converting the data into Torch tensors
training_set = torch.FloatTensor(training_set)
test_set = torch.FloatTensor(test_set)

010 Building a Boltzmann Machine - Step 6

# Converting the ratings into binary ratings 1 (Liked) or 0 (Not Liked)
training_set[training_set == 0] = -1  # 用-1表示没有用户评分过
training_set[training_set == 1] = 0  # 评分1和2表示不喜欢
training_set[training_set == 2] = 0
training_set[training_set >= 3] = 1  # 评分3以上表示喜欢

test_set[test_set == 0] = -1  # 用-1表示没有用户评分过
test_set[test_set == 1] = 0  # 评分1和2表示不喜欢
test_set[test_set == 2] = 0
test_set[test_set >= 3] = 1  # 评分3以上表示喜欢

011 Building a Boltzmann Machine - Step 7

# Creating the architecture of the Neural Network
class RBM():
    def __init__(self, nv, nh):
        '''

        :param nv: 可视层节点数
        :param nh: 隐藏层节点数
        '''
        self.W = torch.randn(nh, nv)  # 符合正态分布的初始化
        self.a = torch.randn(1, nh)  # 初始化偏置, 用于pytorch不能一维输入, 所以加了个值为1的批次维度
        self.b = torch.randn(1, nv)

012 Building a Boltzmann Machine - Step 8

    def sample_h(self, x):  # 激活函数? 给定v的条件下h(隐藏神经元)为1的概率
        wx = torch.mm(x, self.W.t())  # 矩阵相乘
        activation = wx + self.a.expand_as(wx)
        # h对应电影特征, v对应评分
        p_h_given_v = torch.sigmoid(activation)
        return p_h_given_v, torch.bernoulli(p_h_given_v)  # 伯努利函数, 隐藏层神经元的采样值

013 Building a Boltzmann Machine - Step 9

    def sample_v(self, y):  # 激活函数? 给定v的条件下v(可见神经元)为1的概率
        wy = torch.mm(y, self.W)  # 矩阵相乘
        activation = wy + self.b.expand_as(wy)
        # h对应电影特征, v对应评分
        p_v_given_h = torch.sigmoid(activation)
        return p_v_given_h, torch.bernoulli(p_v_given_h)  # 伯努利函数, 隐藏层神经元的采样值

014 Building a Boltzmann Machine - Step 10

    def train(self, v0, vk, ph0, phk):
        self.W += (torch.mm(v0.t(), ph0) - torch.mm(vk.t(), phk)).t()
        self.b += torch.sum((v0 - vk), 0)  # 0是为了保持2维
        self.a += torch.sum((ph0 - phk), 0)

015 Building a Boltzmann Machine - Step 11

nv = len(training_set[0])  # 1682
nh = 100
batch_size = 100
rbm = RBM(nv, nh)

016 Building a Boltzmann Machine - Step 12

# Training the RBM
nb_epoch = 10
for epoch in range(1, nb_epoch + 1):
    train_loss = 0
    s = 0.
    for id_user in range(0, nb_users - batch_size, batch_size):
        vk = training_set[id_user:id_user + batch_size]
        v0 = training_set[id_user:id_user + batch_size]  # 用户已经评分过的数据
        ph0,_=rbm.sample_h(v0) # 用户真实评分, 隐藏节点=1的概率
        for k in range(10):

017 Building a Boltzmann Machine - Step 13

# Training the RBM
nb_epoch = 10
for epoch in range(1, nb_epoch + 1):
    train_loss = 0
    s = 0.
    for id_user in range(0, nb_users - batch_size, batch_size):
        vk = training_set[id_user:id_user + batch_size]
        v0 = training_set[id_user:id_user + batch_size]  # 用户已经评分过的数据
        ph0, _ = rbm.sample_h(v0)  # 用户真实评分, 隐藏节点=1的概率
        for k in range(10):  # 吉布斯采样
            # 推断hk
            _, hk = rbm.sample_h(vk)
            # 更新vk
            _, vk = rbm.sample_v(hk)
            # -1(没评价的)节点要被冻结(不更新)
            vk[v0 < 0] = v0[v0 < 0]
        phk, _ = rbm.sample_h(vk)
        rbm.train(v0, vk, ph0, phk)
        train_loss += torch.mean(torch.abs(v0[v0 >= 0] - vk[v0 >= 0]))
        # 更新计数器
        s += 1.
    print('epoch: ' + str(epoch) + ' loss: ' + str(train_loss / s))

018 Building a Boltzmann Machine - Step 14

# Testing the RBM
test_loss = 0
s = 0.
for id_user in range(nb_users):
    v = training_set[id_user:id_user + 1]  # 输入训练集数据, 激活RBM输出预测, 不包含测试集答案
    vt = test_set[id_user:id_user + 1]  # 用户已经评分过的数据
    # 一次吉布斯采样
    if len(vt[vt >= 0]) > 0:
        # 推断hk
        _, h = rbm.sample_h(v)
        # 更新vk
        _, v = rbm.sample_v(h)
        test_loss += torch.mean(torch.abs(vt[vt >= 0] - v[vt >= 0]))
        # 更新计数器
        s += 1.
print('test loss: ' + str(test_loss / s))

19 - —————————- Part 6 - AutoEncoders —————————-

20 - AutoEncoders Intuition

001 Plan of attack

002 Auto Encoders


最后输出用了softmax, softmax从所有输出中找到最大的作为输出1, 其他的设为0

003 A Note on Biases


  目录