import numpy as np
import matplotlib.pyplot as plt
import math
from matplotlib import cm
Logistic Regression
Libraries Required
Some Plotting Fxn
def soup_bowl(x_train, y_train, w, b):
= np.meshgrid(np.linspace(-6, 12, 50),
wx, by 10, -20, 40))
np.linspace(= np.c_[wx.ravel(), by.ravel()]
points = np.zeros(points.shape[0])
cost
for i in range(points.shape[0]):
= points[i]
w_i, b_i = cost_fxn(x_train, y_train, w_i, b_i)
cost[i]
= cost.reshape(wx.shape)
cost
= plt.figure(figsize=(8, 8))
fig = fig.add_subplot(1, 1, 1, projection='3d')
ax =0.6, cmap=cm.jet)
ax.plot_surface(wx, by, cost, alpha'w', fontsize=20)
ax.set_xlabel('b', fontsize=20)
ax.set_ylabel("Cost", rotation=90, fontsize=20)
ax.set_zlabel('"Logistic" Squared Error Cost vs (w, b)', fontsize=20)
ax.set_title(= ax.scatter(w, b, s=100, color='red')
cscat
plt.tight_layout()
plt.show()
Dataset
= np.array([0, 1, 2, 3, 4, 5]).reshape(-1, 1)
x_train = np.array([0, 0, 0, 1, 1, 1])
y_train
= np.array([5])
w = 10 b
= y_train == 1
pos = y_train == 0
neg
=(6, 4))
plt.figure(figsize
plt.scatter(x_train[pos], y_train[pos],='x', s=80, c='red', label="y=1")
marker='o',
plt.scatter(x_train[neg], y_train[neg], marker=100, label="y=0", facecolors='blue', edgecolors='black', linewidth=1)
s-0.08, 1.1)
plt.ylim('y', fontsize=12)
plt.ylabel('x', fontsize=12)
plt.xlabel(
plt.legend()
plt.tight_layout() plt.show()
Sigmoid Fxn
\[g(z) = \frac{1}{1+e^{-z}}\]
def sigmoid(x):
return 1 / (1 + np.exp(-x))
= np.linspace(-8, 8, 100)
z
= sigmoid(z)
sigmoid_values
=(6, 4))
plt.figure(figsize='Sigmoid Function')
plt.plot(z, sigmoid_values, label'z')
plt.xlabel('sigmoid(z)')
plt.ylabel(
plt.legend()True)
plt.grid( plt.show()
Finding Function f_wb
\[ f_{\mathbf{w},b}(\mathbf{x}^{(i)}) = g(\mathbf{w} \cdot \mathbf{x}^{(i)} + b ) \]
def fxn(x, w, b):
= sigmoid(np.dot(x, w) + b)
f_wb return f_wb
Decision Boundary
\[\mathbf{w} \cdot \mathbf{x} = w_0 x_0 + w_1 x_1 = 0\]
Dataset
= np.array([[0.5, 1.5], [1, 1], [1.5, 0.5], [3, 0.5], [2, 2], [1, 2.5]])
x_db = np.array([0, 0, 0, 1, 1, 1]).reshape(-1, 1) y_db
= np.arange(0, 6)
x0
= 3 - x0
x1 = plt.subplots(1, 1, figsize=(5, 4))
fig, ax ="b")
ax.plot(x0, x1, c0, 4, 0, 3.5])
ax.axis([
=0.2)
ax.fill_between(x0, x1, alpha
= y_db == 1
pos = y_db == 0
neg = pos.reshape(-1,)
pos = neg.reshape(-1,)
neg
0], x_db[neg, 1], marker='o', s=80,
plt.scatter(x_db[neg, ="neg_label", facecolors='none', edgecolors="blue", lw=3)
label0], x_db[pos, 1], marker='x',
plt.scatter(x_db[pos, =80, c='red', label="pos_label")
s
plt.show() plt.show()
Loss Fxn
\[ loss(f_{\mathbf{w},b}(\mathbf{x}^{(i)}), y^{(i)}) = \begin{cases} - \log\left(f_{\mathbf{w},b}\left( \mathbf{x}^{(i)} \right) \right) & \text{if $y^{(i)}=1$}\\ - \log \left( 1 - f_{\mathbf{w},b}\left( \mathbf{x}^{(i)} \right) \right) & \text{if $y^{(i)}=0$} \end{cases} \]
\[= -y^{(i)} \log\left(f_{\mathbf{w},b}\left( \mathbf{x}^{(i)} \right) \right) - \left( 1 - y^{(i)}\right) \log \left( 1 - f_{\mathbf{w},b}\left( \mathbf{x}^{(i)} \right) \right)\]
def loss(x, y, w, b):
= fxn(x, w, b)
a = 1e-15 # Small constant to avoid taking log(0)
epsilon
= -y * math.log(a + epsilon) - (1 - y) * math.log(1 - a + epsilon)
loss return loss
Cost Fxn
\[ J(\mathbf{w},b) = \frac{1}{m} \sum_{i=0}^{m-1} \left[ loss(f_{\mathbf{w},b}(\mathbf{x}^{(i)}), y^{(i)}) \right]\]
def cost_fxn(X, y, w, b):
= X.shape[0]
m = 0
cost for i in range(m):
+= loss(X[i], y[i], w, b)
cost
= cost / m
cost return cost
Some Plots ->
Final w, b
if len(w) == 1:
= fxn(x_train, w, b)
fxn1
="red")
plt.scatter(x_train, y_train, color
plt.plot(x_train, fxn1)
plt.show()
soup_bowl(x_train, y_train, w, b)
C:\Users\vrajs\AppData\Local\Temp\ipykernel_16560\2055154098.py:5: DeprecationWarning: Conversion of an array with ndim > 0 to a scalar is deprecated, and will error in future. Ensure you extract a single element from your array before performing this operation. (Deprecated NumPy 1.25.)
loss = -y * math.log(a + epsilon) - (1 - y) * math.log(1 - a + epsilon)
Finding dJ/dw and dJ/db
\[\begin{align*} \frac{\partial J(\mathbf{w},b)}{\partial w_j} &= \frac{1}{m} \sum\limits_{i = 0}^{m-1} (f_{\mathbf{w},b}(\mathbf{x}^{(i)}) - y^{(i)})x_{j}^{(i)} \\ \frac{\partial J(\mathbf{w},b)}{\partial b} &= \frac{1}{m} \sum\limits_{i = 0}^{m-1} (f_{\mathbf{w},b}(\mathbf{x}^{(i)}) - y^{(i)}) \end{align*}\]
def compute_gradient(x, y, w, b):
= 0
dj_dw = 0
dj_db = x.shape[0]
m = fxn(x, w, b) - y
a = (np.dot(a, x)) / m
dj_dw = np.sum(a) / m
dj_db return dj_dw, dj_db
Gradient Descent
\[\begin{align*} &\text{repeat until convergence:} \; \lbrace \\ & \; \; \;w_j = w_j - \alpha \frac{\partial J(\mathbf{w},b)}{\partial w_j} \; & \text{for j := 0..n-1} \\ & \; \; \; \; \;b = b - \alpha \frac{\partial J(\mathbf{w},b)}{\partial b} \\ &\rbrace \end{align*}\]
def gradient_descent(x, y, w, b, alpha, num_iters):
= []
J_history = []
p_history
for i in range(num_iters+1):
= compute_gradient(x, y, w, b)
dj_dw, dj_db = b - alpha * dj_db
b = w - alpha * dj_dw
w
J_history.append(cost_fxn(x, y, w, b))
p_history.append([w, b])if i % math.ceil(num_iters/10) == 0:
print(f"Iteration {i:4}: Cost {J_history[-1]:0.2e}, w: {w}, b:{b}")
return w, b, J_history, p_history
= 10000
iterations = 1.0e-1
tmp_alpha
= gradient_descent(
w_final, b_final, J_hist, p_hist
x_train, y_train, w, b, tmp_alpha, iterations)print(f"(w,b) found by gradient descent: ({w_final},{b_final})")
= fxn(x_train, w_final, b_final)
f_wb print("Cost is", cost_fxn(x_train, y_train, w_final, b_final))
Iteration 0: Cost 7.45e+00, w: [4.95000001], b:9.950000761764102
Iteration 1000: Cost 1.32e-01, w: [1.94357846], b:-4.532187811486597
Iteration 2000: Cost 8.42e-02, w: [2.71585819], b:-6.535471646001672
Iteration 3000: Cost 6.46e-02, w: [3.22521954], b:-7.834466336781398
Iteration 4000: Cost 5.30e-02, w: [3.62055449], b:-8.83606124058392
Iteration 5000: Cost 4.51e-02, w: [3.94791911], b:-9.662597001748475
Iteration 6000: Cost 3.92e-02, w: [4.22882495], b:-10.37034842110977
Iteration 7000: Cost 3.48e-02, w: [4.47546538], b:-10.990900553853033
Iteration 8000: Cost 3.12e-02, w: [4.69557844], b:-11.544161480692926
Iteration 9000: Cost 2.83e-02, w: [4.89444965], b:-12.043663029887693
Iteration 10000: Cost 2.59e-02, w: [5.07588043], b:-12.499101969930441
(w,b) found by gradient descent: ([5.07588043],-12.499101969930441)
Cost is 0.025934093960807036
Some Plots ->
Final w, b
if len(w) == 1:
= fxn(x_train, w_final, b_final)
fxn2
="red")
plt.scatter(x_train, y_train, color
plt.plot(x_train, fxn2)
plt.show()
soup_bowl(x_train, y_train, w_final, b_final)
C:\Users\vrajs\AppData\Local\Temp\ipykernel_16560\2055154098.py:5: DeprecationWarning: Conversion of an array with ndim > 0 to a scalar is deprecated, and will error in future. Ensure you extract a single element from your array before performing this operation. (Deprecated NumPy 1.25.)
loss = -y * math.log(a + epsilon) - (1 - y) * math.log(1 - a + epsilon)
Regularized Linear Regression
Finding Cost Fxn
\[J(\mathbf{w},b) = \frac{1}{2m} \sum\limits_{i = 0}^{m-1} (f_{\mathbf{w},b}(\mathbf{x}^{(i)}) - y^{(i)})^2 + \frac{\lambda}{2m} \sum_{j=0}^{n-1} w_j^2 \]
def cost_fxn_regularized(X, y, w, b, lambda_ = 1):
=cost_fxn(X, y, w, b)
cost+= np.sum(w ** 2)
cost return cost
Finding dJ/dw and dJ/db
\[\begin{align*} \frac{\partial J(\mathbf{w},b)}{\partial w_j} &= \frac{1}{m} \sum\limits_{i = 0}^{m-1} (f_{\mathbf{w},b}(\mathbf{x}^{(i)}) - y^{(i)})x_{j}^{(i)} + \frac{\lambda}{m} w_j \\ \frac{\partial J(\mathbf{w},b)}{\partial b} &= \frac{1}{m} \sum\limits_{i = 0}^{m-1} (f_{\mathbf{w},b}(\mathbf{x}^{(i)}) - y^{(i)}) \end{align*}\]
def compute_gradient_regularized(X, y, w, b, lambda_):
= X.shape[0]
m = compute_gradient(X, y, w, b)
dj_dw, dj_db
+= (lambda_ / m) * w
dj_dw
return dj_db, dj_dw