import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.datasets import make_blobs
import math
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
'display.max_columns', None)
pd.set_option('display.max_colwidth', None) pd.set_option(
Softmax Function
Basic Softmax Function along with an example.
Import Libraries
Function
\[ \large a_j = \frac{e^{z_j}}{ \sum_{k=1}^{N}{e^{z_k} }} \]
def softmax(z):
= np.exp(z)
ez = ez/np.sum(ez)
sm return (sm)
Example
= np.array([1, 2, 3, 4])
input_array = softmax(input_array)
softmax_result
= ['z0', 'z1', 'z2', 'z3']
custom_labels
= plt.subplots(1, 2, figsize=(12, 6))
fig, axs
0].bar(np.arange(len(input_array)), input_array,
axs[=custom_labels, color='b')
tick_label0].set_title('Input Array')
axs[0].set_xlabel('Index (zi)')
axs[0].set_ylabel('Value')
axs[for i, v in enumerate(input_array):
0].text(i, v, str(v), ha='center', va='bottom', fontsize=12)
axs[
1].bar(np.arange(len(softmax_result)), softmax_result,
axs[=custom_labels, color='g')
tick_label1].set_title('Softmax Result')
axs[1].set_xlabel('Index (zi)')
axs[1].set_ylabel('Value')
axs[for i, v in enumerate(softmax_result):
1].text(i, v, f'{v:.2f}', ha='center', va='bottom', fontsize=12)
axs[
plt.tight_layout() plt.show()
Cost Function
\[ L(\mathbf{a},y)=\begin{cases} -log(a_1), & \text{if $y=1$}.\\ &\vdots\\ -log(a_N), & \text{if $y=N$} \end{cases} \]
\[ J(\mathbf{w},b) = -\frac{1}{m} \left[ \sum_{i=1}^{m} \sum_{j=1}^{N} 1\left\{y^{(i)} == j\right\} \log \frac{e^{z^{(i)}_j}}{\sum_{k=1}^N e^{z^{(i)}_k} }\right] \]
def loss(x, y):
= softmax(x)
a = 1e-15 # Small constant to avoid taking log(0)
epsilon return -math.log(a[y] + epsilon)
def cost_fxn(X, y):
= X.shape[0]
m = 0
cost for i in range(m):
+= loss(X[i], y[i])
cost
= cost / m
cost return cost
Dataset
= [[-5, 2], [-2, -2], [1, 2], [5, -2]]
centers = make_blobs(
X_train, y_train =2000, centers=centers, cluster_std=1.0, random_state=30) n_samples
=(8, 6))
plt.figure(figsize= plt.scatter(X_train[:, 0], X_train[:, 1],
scatter =y_train, cmap='viridis', marker='o', s=25)
c
= [f'Cluster {i}' for i in range(len(centers))]
legend_labels =scatter.legend_elements()[
plt.legend(handles0], labels=legend_labels, title="Clusters")
"Generated Dataset with Four Clusters")
plt.title("Feature 1")
plt.xlabel("Feature 2")
plt.ylabel(
plt.show()
Models
= Sequential(
model1
[25, activation='relu'),
Dense(15, activation='relu'),
Dense(4, activation='softmax') # <-- softmax activation here
Dense(
]
)compile(
model1.=tf.keras.losses.SparseCategoricalCrossentropy(),
loss=tf.keras.optimizers.Adam(0.001),
optimizer
)
model1.fit(
X_train, y_train,=10
epochs )
Epoch 1/10
63/63 [==============================] - 1s 2ms/step - loss: 1.0357
Epoch 2/10
63/63 [==============================] - 0s 2ms/step - loss: 0.4241
Epoch 3/10
63/63 [==============================] - 0s 2ms/step - loss: 0.1823
Epoch 4/10
63/63 [==============================] - 0s 3ms/step - loss: 0.0980
Epoch 5/10
63/63 [==============================] - 0s 2ms/step - loss: 0.0683
Epoch 6/10
63/63 [==============================] - 0s 2ms/step - loss: 0.0532
Epoch 7/10
63/63 [==============================] - 0s 2ms/step - loss: 0.0426
Epoch 8/10
63/63 [==============================] - 0s 2ms/step - loss: 0.0369
Epoch 9/10
63/63 [==============================] - 0s 2ms/step - loss: 0.0329
Epoch 10/10
63/63 [==============================] - 0s 2ms/step - loss: 0.0300
<keras.src.callbacks.History at 0x1eeea31c040>
= Sequential(
model2
[25, activation='relu'),
Dense(15, activation='relu'),
Dense(4, activation='linear') # <-- linear activation here
Dense(
]
)compile(
model2.=tf.keras.losses.SparseCategoricalCrossentropy(
loss=True),
from_logits=tf.keras.optimizers.Adam(0.001),
optimizer
)
model2.fit(
X_train, y_train,=10
epochs )
Epoch 1/10
63/63 [==============================] - 1s 2ms/step - loss: 1.0976
Epoch 2/10
63/63 [==============================] - 0s 2ms/step - loss: 0.4926
Epoch 3/10
63/63 [==============================] - 0s 2ms/step - loss: 0.2572
Epoch 4/10
63/63 [==============================] - 0s 2ms/step - loss: 0.1406
Epoch 5/10
63/63 [==============================] - 0s 2ms/step - loss: 0.0897
Epoch 6/10
63/63 [==============================] - 0s 2ms/step - loss: 0.0672
Epoch 7/10
63/63 [==============================] - 0s 2ms/step - loss: 0.0547
Epoch 8/10
63/63 [==============================] - 0s 3ms/step - loss: 0.0476
Epoch 9/10
63/63 [==============================] - 0s 2ms/step - loss: 0.0422
Epoch 10/10
63/63 [==============================] - 0s 2ms/step - loss: 0.0381
<keras.src.callbacks.History at 0x1eeeb1a0ee0>
Outputs
= model1.predict(X_train)
p_preferred = []
data
for i in range(5):
= {
row 'Prediction': p_preferred[i],
'Category': np.argmax(p_preferred[i]),
'Max': np.max(p_preferred[i]),
'Min': np.min(p_preferred[i])
}
data.append(row)
= pd.DataFrame(data)
df
print(df)
print("")
= np.argmax(p_preferred, axis=1)
y_pred = np.sum(y_pred == y_train)
correct_predictions = len(y_train)
total_predictions = (1 - (correct_predictions / total_predictions)) * 100
error_percentage print("Error percentage =", error_percentage, "%")
63/63 [==============================] - 0s 2ms/step
Prediction Category Max \
0 [0.00076469, 0.0032128657, 0.981888, 0.01413442] 2 0.981888
1 [0.9984345, 0.0015346134, 2.1353391e-05, 9.585833e-06] 0 0.998434
2 [0.9823111, 0.017138032, 0.00038771704, 0.00016316074] 0 0.982311
3 [0.0013412955, 0.99340725, 0.004864373, 0.00038706747] 1 0.993407
4 [0.003443484, 0.00016099671, 0.9962613, 0.00013426131] 2 0.996261
Min
0 0.000765
1 0.000010
2 0.000163
3 0.000387
4 0.000134
Error percentage = 0.8499999999999952 %
= model2.predict(X_train)
p_preferred = []
data
for i in range(5):
= {
row 'Prediction': p_preferred[i],
'Category': np.argmax(p_preferred[i]),
'Max': np.max(p_preferred[i]),
'Min': np.min(p_preferred[i])
}
data.append(row)
= pd.DataFrame(data)
df
print(df)
print("")
= np.argmax(p_preferred, axis=1)
y_pred = np.sum(y_pred == y_train)
correct_predictions = len(y_train)
total_predictions = (1 - (correct_predictions / total_predictions)) * 100
error_percentage print("Error percentage =", error_percentage, "%")
63/63 [==============================] - 0s 2ms/step
Prediction Category Max \
0 [-4.3254666, 0.14574976, 3.842623, -0.401972] 2 3.842623
1 [6.7959185, 1.1274883, -5.037459, -2.7135649] 0 6.795918
2 [4.8487134, 1.3494179, -3.651421, -2.2844274] 0 4.848713
3 [-2.8908267, 4.823671, -0.023022205, -1.1583089] 1 4.823671
4 [-1.3442385, 0.3332066, 4.854504, -3.0173476] 2 4.854504
Min
0 -4.325467
1 -5.037459
2 -3.651421
3 -2.890827
4 -3.017348
Error percentage = 1.0499999999999954 %