# 用Python实现机器学习算法——Logistic 回归算法

05-07   0  11

Python 被称为是最接近 AI 的语言。最近一位名叫Anna-Lena Popkes（德国波恩大学计算机科学专业的研究生，主要关注机器学习和神经网络。）的小姐姐在GitHub上分享了自己如何使用Python（3.6及以上版本）实现7种机器学习算法的笔记，并附有完整代码。所有这些算法的实现都没有使用其他机器学习库。这份笔记可以帮大家对算法以及其底层结构有个基本的了解，但并不是提供最有效的实现。

• 数据集
• 是d-维向量
• 是一个二元的目标变量

Logistic 回归模型可以理解为一个非常简单的神经网络：

• 它有一个实值加权向量
• 它有一个实值偏置量 b
• 它使用 sigmoid 函数作为其激活函数

In [24]:

import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.datasets import make_blobs
import matplotlib.pyplot as plt
np.random.seed(123)

% matplotlib inline


In [25]:

# We will perform logistic regression using a simple toy dataset of two classes
X, y_true = make_blobs(n_samples= 1000, centers=2)

fig = plt.figure(figsize=(8,6))
plt.scatter(X[:,0], X[:,1], c=y_true)
plt.title("Dataset")
plt.xlabel("First feature")
plt.ylabel("Second feature")
plt.show()


In [26]:

# Reshape targets to get column vector with shape (n_samples, 1)
y_true = y_true[:, np.newaxis]
# Split the data into a training and test set
X_train, X_test, y_train, y_test = train_test_split(X, y_true)
print(f'Shape X_train: {X_train.shape}')
print(f'Shape y_train: {y_train.shape}')
print(f'Shape X_test: {X_test.shape}')
print(f'Shape y_test: {y_test.shape}')


Shape X_train: (750, 2)

Shape y_train: (750, 1)

Shape X_test: (250, 2)

Shape y_test: (250, 1)

Logistic回归分类

In [27]:

class LogisticRegression:

def __init__(self):
pass

def sigmoid(self, a):
return 1 / (1 + np.exp(-a))

def train(self, X, y_true, n_iters, learning_rate):
"""
Trains the logistic regression model on given data X and targets y
"""
# Step 0: Initialize the parameters
n_samples, n_features = X.shape
self.weights = np.zeros((n_features, 1))
self.bias = 0
costs = []

for i in range(n_iters):
# Step 1 and 2: Compute a linear combination of the input features and weights,
# apply the sigmoid activation function
y_predict = self.sigmoid(np.dot(X, self.weights) + self.bias)

# Step 3: Compute the cost over the whole training set.
cost = (- 1 / n_samples) * np.sum(y_true * np.log(y_predict) + (1 - y_true) * (np.log(1 - y_predict)))

# Step 4: Compute the gradients
dw = (1 / n_samples) * np.dot(X.T, (y_predict - y_true))
db = (1 / n_samples) * np.sum(y_predict - y_true)

# Step 5: Update the parameters
self.weights = self.weights - learning_rate * dw
self.bias = self.bias - learning_rate * db

costs.append(cost)
if i % 100 == 0:
print(f"Cost after iteration {i}: {cost}")

return self.weights, self.bias, costs

def predict(self, X):
"""
Predicts binary labels for a set of examples X.
"""
y_predict = self.sigmoid(np.dot(X, self.weights) + self.bias)
y_predict_labels = [1 if elem > 0.5 else 0 for elem in y_predict]

return np.array(y_predict_labels)[:, np.newaxis]


In [29]:

regressor = LogisticRegression()
w_trained, b_trained, costs = regressor.train(X_train, y_train, n_iters=600, learning_rate=0.009)

fig = plt.figure(figsize=(8,6))
plt.plot(np.arange(600), costs)
plt.title("Development of cost over training")
plt.xlabel("Number of iterations")
plt.ylabel("Cost")
plt.show()


Cost after iteration 0: 0.6931471805599453

Cost after iteration 100: 0.046514002935609956

Cost after iteration 200: 0.02405337743999163

Cost after iteration 300: 0.016354408151412207

Cost after iteration 400: 0.012445770521974634

Cost after iteration 500: 0.010073981792906512

In [31]:

y_p_train = regressor.predict(X_train)
y_p_test = regressor.predict(X_test)

print(f"train accuracy: {100 - np.mean(np.abs(y_p_train - y_train)) * 100}%")
print(f"test accuracy: {100 - np.mean(np.abs(y_p_test - y_test))}%")


train accuracy: 100.0%

test accuracy: 100.0%

{{ o.content }}

{{ i }}
1 ...
{{ i }}
{{ i }}