from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.patches as patches
import h5py
import random
import os

from sklearn.metrics import classification_report, confusion_matrix
import seaborn as sns

import tensorflow as tf
from tensorflow.keras import backend as K
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import (
    Input, Dense, Dropout, Flatten,
    Conv2D, MaxPooling2D,
    BatchNormalization, LeakyReLU
)
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.callbacks import (
    EarlyStopping, ReduceLROnPlateau, ModelCheckpoint
)

print('All libraries imported successfully.')

All libraries imported successfully.

print('TensorFlow version:', tf.__version__)

TensorFlow version: 2.19.0

# ── Seed utility ─────────────────────────────────────────────────────────────
# Fix seeds for numpy, Python's random, and TensorFlow before each model build.
# This ensures reproducibility within the same Colab environment.
# Note: full determinism across different GPU hardware is not guaranteed —
# parallel floating-point operations may produce slightly different results.
SEED = 42

def reset():
    """Clear Keras session and re-fix all random seeds."""
    K.clear_session()
    np.random.seed(SEED)
    random.seed(SEED)
    tf.random.set_seed(SEED)

reset()


# ── Plotting utility ──────────────────────────────────────────────────────────
def plot_history(history, title):
    """Plot training and validation accuracy and loss curves."""
    fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(14, 5))
    fig.suptitle(title, fontsize=14, fontweight='bold')
    ax1.plot(history.history['accuracy'],     label='Train',      color='steelblue')
    ax1.plot(history.history['val_accuracy'], label='Validation', color='darkorange', linestyle='--')
    ax1.set_title('Accuracy over Epochs'); ax1.set_xlabel('Epoch')
    ax1.legend(); ax1.grid(True, alpha=0.3)
    ax2.plot(history.history['loss'],         label='Train',      color='steelblue')
    ax2.plot(history.history['val_loss'],     label='Validation', color='darkorange', linestyle='--')
    ax2.set_title('Loss over Epochs'); ax2.set_xlabel('Epoch')
    ax2.legend(); ax2.grid(True, alpha=0.3)
    plt.tight_layout(); plt.show()


# ── ANN Model 1 ───────────────────────────────────────────────────────────────
# Architecture: 64 → 32 → 10 (Softmax)
# ReLU on hidden layers: fast, no vanishing gradient for positive inputs.
# Softmax on output: converts scores to a probability distribution summing to 1.
def build_ann1():
    model = Sequential([
        Input(shape=(1024,)),
        Dense(64, activation='relu'),
        Dense(32, activation='relu'),
        Dense(10, activation='softmax')
    ], name='ANN_Model_1')
    model.compile(
        loss='categorical_crossentropy',
        optimizer=Adam(learning_rate=0.001),
        metrics=['accuracy']
    )
    return model


# ── ANN Model 2 ───────────────────────────────────────────────────────────────
# Architecture: 256 → 128 → Dropout(0.2) → 64 → 64 → 32 → BatchNorm → 10
# Dropout(0.2): randomly zeroes 20% of activations — prevents co-adaptation.
# BatchNorm: normalizes layer inputs → stable, faster training.
# lr=0.0005: deeper network benefits from smaller, more careful update steps.
def build_ann2():
    model = Sequential([
        Input(shape=(1024,)),
        Dense(256, activation='relu'),
        Dense(128, activation='relu'),
        Dropout(0.2),
        Dense(64,  activation='relu'),
        Dense(64,  activation='relu'),
        Dense(32,  activation='relu'),
        BatchNormalization(),
        Dense(10,  activation='softmax')
    ], name='ANN_Model_2')
    model.compile(
        loss='categorical_crossentropy',
        optimizer=Adam(learning_rate=0.0005),
        metrics=['accuracy']
    )
    return model


# ── CNN Model 1 ───────────────────────────────────────────────────────────────
# Architecture: Conv(16) → Conv(32) → MaxPool → Dense(32) → Output
# LeakyReLU(negative_slope=0.1): prevents dying ReLU — conv filters produce many
#   negative pre-activations early in training. Standard ReLU zeros these permanently,
#   killing the filter. LeakyReLU keeps a 10% gradient, preserving all filters.
# MaxPooling(2,2): halves spatial dims (32→16), reduces params, gives translational
#   invariance — a digit shifted by 1px still produces the same output.
# padding='same': output stays 32x32 after each conv — no spatial shrinkage.
# Input() layer: avoids Keras deprecation warning from input_shape in Dense/Conv.
def build_cnn1():
    model = Sequential(name='CNN_Model_1')
    model.add(Input(shape=(32, 32, 1)))
    model.add(Conv2D(16, (3,3), padding='same'))
    model.add(LeakyReLU(negative_slope=0.1))
    model.add(Conv2D(32, (3,3), padding='same'))
    model.add(LeakyReLU(negative_slope=0.1))
    model.add(MaxPooling2D(2,2))
    model.add(Flatten())
    model.add(Dense(32))
    model.add(LeakyReLU(negative_slope=0.1))
    model.add(Dense(10, activation='softmax'))
    model.compile(
        loss='categorical_crossentropy',
        optimizer=Adam(learning_rate=0.001),
        metrics=['accuracy']
    )
    return model


# ── CNN Model 2 ───────────────────────────────────────────────────────────────
# Architecture: 2 conv blocks (16→32, 32→64) + BatchNorm + Dropout(0.5) head
# Filter progression 16→32→32→64: early filters detect edges and strokes;
#   deeper filters combine these into digit-part representations.
# BatchNorm after each MaxPool: re-normalizes activations before the next block,
#   preventing scale drift that destabilizes training in deeper networks.
# Dropout(0.5) in dense head ONLY: conv layers share weights spatially and are
#   inherently regularized. Dense layers are not — CNN Model 1 proved this:
#   97% train accuracy vs 85% val with no dropout. Dropout(0.5) fixed that gap.
def build_cnn2():
    model = Sequential(name='CNN_Model_2')
    model.add(Input(shape=(32, 32, 1)))
    # Block 1: low-level features — edges, stroke orientations
    model.add(Conv2D(16, (3,3), padding='same'))
    model.add(LeakyReLU(negative_slope=0.1))
    model.add(Conv2D(32, (3,3), padding='same'))
    model.add(LeakyReLU(negative_slope=0.1))
    model.add(MaxPooling2D(2,2))          # 32x32 → 16x16
    model.add(BatchNormalization())
    # Block 2: high-level features — curves, digit parts
    model.add(Conv2D(32, (3,3), padding='same'))
    model.add(LeakyReLU(negative_slope=0.1))
    model.add(Conv2D(64, (3,3), padding='same'))
    model.add(LeakyReLU(negative_slope=0.1))
    model.add(MaxPooling2D(2,2))          # 16x16 → 8x8
    model.add(BatchNormalization())
    # Dense head
    model.add(Flatten())                  # 8x8x64 = 4096
    model.add(Dense(32))
    model.add(LeakyReLU(negative_slope=0.1))
    model.add(Dropout(0.5))
    model.add(Dense(10, activation='softmax'))
    model.compile(
        loss='categorical_crossentropy',
        optimizer=Adam(learning_rate=0.001),
        metrics=['accuracy']
    )
    return model


print('All model builders and utilities defined.')

All model builders and utilities defined.

import gdown

FILE_ID   = '1YoTWTdShWdI55SmBp4Ie2IkaI_E3Gmp0'
DATA_PATH = 'svhn_data.h5'

# Remove any incomplete file before downloading
if os.path.exists(DATA_PATH):
    os.remove(DATA_PATH)

# fuzzy=True handles Google's virus-scan redirect for large files.
# Without it, gdown downloads the HTML warning page instead of the file,
# producing a truncated H5 that throws an OSError on open.
gdown.download(id=FILE_ID, output=DATA_PATH, quiet=False, fuzzy=True)

size_mb = os.path.getsize(DATA_PATH) / 1e6
print(f'Downloaded: {size_mb:.1f} MB')
assert size_mb > 460, f'File looks incomplete ({size_mb:.1f} MB) — re-run this cell'

Downloading...
From (original): https://drive.google.com/uc?id=1YoTWTdShWdI55SmBp4Ie2IkaI_E3Gmp0
From (redirected): https://drive.google.com/uc?id=1YoTWTdShWdI55SmBp4Ie2IkaI_E3Gmp0&confirm=t&uuid=839ad89e-cf32-426a-a8d6-9ea821388281
To: /content/svhn_data.h5
100%|██████████| 492M/492M [00:08<00:00, 58.7MB/s]

Downloaded: 491.6 MB

def inspect_h5(path):
    """Recursively print all groups and datasets in an H5 file."""
    def _visitor(name, obj):
        indent = '  ' * name.count('/')
        if isinstance(obj, h5py.Dataset):
            print(f'{indent}[DATASET]  /{name}')
            print(f'{indent}           shape : {obj.shape}')
            print(f'{indent}           dtype : {obj.dtype}')
        elif isinstance(obj, h5py.Group):
            print(f'{indent}[GROUP]    /{name}')
    with h5py.File(path, 'r') as hf:
        print(f'Top-level keys: {list(hf.keys())}')
        print('-' * 50)
        hf.visititems(_visitor)

inspect_h5(DATA_PATH)

Top-level keys: ['X_test', 'X_train', 'X_val', 'y_test', 'y_train', 'y_val']
--------------------------------------------------
[DATASET]  /X_test
           shape : (18000, 32, 32)
           dtype : float32
[DATASET]  /X_train
           shape : (42000, 32, 32)
           dtype : float32
[DATASET]  /X_val
           shape : (60000, 32, 32)
           dtype : float32
[DATASET]  /y_test
           shape : (18000,)
           dtype : uint8
[DATASET]  /y_train
           shape : (42000,)
           dtype : uint8
[DATASET]  /y_val
           shape : (60000,)
           dtype : uint8

with h5py.File(DATA_PATH, 'r') as hf:
    all_keys = list(hf.keys())
    print('Keys found:', all_keys)
    data = {key: hf[key][:] for key in all_keys}

print('\nLoaded arrays:')
for k, v in data.items():
    print(f'  {k:12s}  shape={v.shape}  dtype={v.dtype}  '
          f'min={v.min():.1f}  max={v.max():.1f}')

Keys found: ['X_test', 'X_train', 'X_val', 'y_test', 'y_train', 'y_val']

Loaded arrays:
  X_test        shape=(18000, 32, 32)  dtype=float32  min=0.0  max=255.0
  X_train       shape=(42000, 32, 32)  dtype=float32  min=0.0  max=255.0
  X_val         shape=(60000, 32, 32)  dtype=float32  min=0.0  max=255.0
  y_test        shape=(18000,)  dtype=uint8  min=0.0  max=9.0
  y_train       shape=(42000,)  dtype=uint8  min=0.0  max=9.0
  y_val         shape=(60000,)  dtype=uint8  min=0.0  max=9.0

# Assign to standard variable names
# Edit right-hand strings if inspect_h5() showed different key names
X_train = data['X_train']
y_train = data['y_train']
X_val   = data['X_val']
y_val   = data['y_val']
X_test  = data['X_test']
y_test  = data['y_test']

# Verify every image has a corresponding label — never assume the dataset is clean
assert len(X_train) == len(y_train), f'Mismatch: {len(X_train)} images but {len(y_train)} labels'
assert len(X_val)   == len(y_val),   f'Mismatch: {len(X_val)} images but {len(y_val)} labels'
assert len(X_test)  == len(y_test),  f'Mismatch: {len(X_test)} images but {len(y_test)} labels'
print('All splits matched — every image has a label.')

All splits matched — every image has a label.

print(f'Training images   : {X_train.shape[0]:,}')
print(f'Validation images : {X_val.shape[0]:,}')
print(f'Test images       : {X_test.shape[0]:,}')
print(f'Image dimensions  : {X_train.shape[1:]}')

Training images   : 42,000
Validation images : 60,000
Test images       : 18,000
Image dimensions  : (32, 32)

fig, axes = plt.subplots(2, 5, figsize=(14, 6))
fig.suptitle('First 10 Training Images', fontsize=16, fontweight='bold')
for i, ax in enumerate(axes.flat):
    ax.imshow(X_train[i], cmap='gray')
    ax.set_title(f'Label: {y_train[i]}', fontsize=12)
    ax.axis('off')
plt.tight_layout()
plt.show()
print('First 10 labels:', y_train[:10])

First 10 labels: [2 6 7 4 4 0 3 0 7 3]

# Labels confirmed 0-9 — verify before assuming
print('Unique label values:', np.unique(y_train))
y_display = y_train   # no remapping needed for this dataset version

unique, counts = np.unique(y_display, return_counts=True)

fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(14, 5))
fig.suptitle('Class Distribution — Training Set', fontsize=14, fontweight='bold')

bars = ax1.bar(unique, counts, color='steelblue', edgecolor='white')
ax1.set_xlabel('Digit class'); ax1.set_ylabel('Number of images')
ax1.set_title('Count per class'); ax1.set_xticks(unique)
ax1.grid(axis='y', alpha=0.3)
for bar, count in zip(bars, counts):
    ax1.text(bar.get_x()+bar.get_width()/2, bar.get_height()+50,
             str(count), ha='center', va='bottom', fontsize=9)

ax2.pie(counts, labels=unique, autopct='%1.1f%%',
        colors=plt.cm.tab10.colors, startangle=90)
ax2.set_title('Proportion per class')
plt.tight_layout(); plt.show()

print(f'Imbalance ratio (max/min): {counts.max()/counts.min():.2f}')

Unique label values: [0 1 2 3 4 5 6 7 8 9]

Imbalance ratio (max/min): 1.03

fig, axes = plt.subplots(10, 5, figsize=(10, 20))
fig.suptitle('5 Random Samples per Digit Class', fontsize=14, fontweight='bold')
for digit in range(10):
    idx = np.where(y_display == digit)[0]
    sample_idx = np.random.choice(idx, size=5, replace=False)
    for col, si in enumerate(sample_idx):
        axes[digit, col].imshow(X_train[si], cmap='gray')
        axes[digit, col].axis('off')
        if col == 0:
            axes[digit, col].set_ylabel(f'Digit {digit}', fontsize=11,
                                        rotation=0, labelpad=35, va='center')
plt.tight_layout(); plt.show()

fig, axes = plt.subplots(2, 5, figsize=(14, 6))
fig.suptitle('Mean Image per Digit Class', fontsize=14, fontweight='bold')
for digit, ax in enumerate(axes.flat):
    idx        = np.where(y_display == digit)[0]
    mean_image = X_train[idx].mean(axis=0)
    ax.imshow(mean_image, cmap='gray')
    ax.set_title(f'Digit {digit}  (n={len(idx):,})', fontsize=10)
    ax.axis('off')
plt.tight_layout(); plt.show()

print('Shape of first training image:', X_train[0].shape)
print('Pixel values of first training image:')
print(X_train[0])

Shape of first training image: (32, 32)
Pixel values of first training image:
[[ 33.0704  30.2601  26.852  ...  71.4471  58.2204  42.9939]
 [ 25.2283  25.5533  29.9765 ... 113.0209 103.3639  84.2949]
 [ 26.2775  22.6137  40.4763 ... 113.3028 121.775  115.4228]
 ...
 [ 28.5502  36.212   45.0801 ...  24.1359  25.0927  26.0603]
 [ 38.4352  26.4733  23.2717 ...  28.1094  29.4683  30.0661]
 [ 50.2984  26.0773  24.0389 ...  49.6682  50.853   53.0377]]

# Flatten (N, 32, 32) → (N, 1024) for ANN, normalize to [0, 1]
# Dividing by 255 maps pixel values to [0,1] — the range where
# gradient-based optimizers (Adam) work most efficiently.
# We also normalize the validation set using the same operation
# (no fitting on val/test — just the same linear transform).
X_train_ann = X_train.reshape(X_train.shape[0], -1).astype('float32') / 255.0
X_val_ann   = X_val.reshape(X_val.shape[0],     -1).astype('float32') / 255.0
X_test_ann  = X_test.reshape(X_test.shape[0],   -1).astype('float32') / 255.0

print(f'Pixel range: [{X_train_ann.min():.2f}, {X_train_ann.max():.2f}]')

Pixel range: [0.00, 1.00]

print('X_train_ann shape:', X_train_ann.shape)   # (42000, 1024)
print('X_val_ann   shape:', X_val_ann.shape)     # (60000, 1024)
print('X_test_ann  shape:', X_test_ann.shape)    # (18000, 1024)

X_train_ann shape: (42000, 1024)
X_val_ann   shape: (60000, 1024)
X_test_ann  shape: (18000, 1024)

# Check actual label range before any remapping — never assume
print('Unique y_train:', np.unique(y_train))
print('Unique y_val  :', np.unique(y_val))
print('Unique y_test :', np.unique(y_test))

# Standard SVHN uses label 10 for digit zero. This dataset version already
# uses 0-9. The function below handles both cases safely.
def remap_labels(y):
    y_out = y.copy()
    if 10 in y_out:
        print('  Label 10 found — remapping 10 → 0')
        y_out[y_out == 10] = 0
    else:
        print('  Labels already 0-9 — no remapping needed')
    return y_out

y_train_c = remap_labels(y_train)
y_val_c   = remap_labels(y_val)
y_test_c  = remap_labels(y_test)

y_train_ohe = to_categorical(y_train_c, num_classes=10)
y_val_ohe   = to_categorical(y_val_c,   num_classes=10)
y_test_ohe  = to_categorical(y_test_c,  num_classes=10)

print('\ny_train_ohe:', y_train_ohe.shape)
print('y_val_ohe  :', y_val_ohe.shape)
print('y_test_ohe :', y_test_ohe.shape)

Unique y_train: [0 1 2 3 4 5 6 7 8 9]
Unique y_val  : [0 1 2 3 4 5 6 7 8 9]
Unique y_test : [0 1 2 3 4 5 6 7 8 9]
  Labels already 0-9 — no remapping needed
  Labels already 0-9 — no remapping needed
  Labels already 0-9 — no remapping needed

y_train_ohe: (42000, 10)
y_val_ohe  : (60000, 10)
y_test_ohe : (18000, 10)

reset()
ann1 = build_ann1()
ann1.summary()

history_ann1 = ann1.fit(
    X_train_ann, y_train_ohe,
    validation_data=(X_val_ann, y_val_ohe),
    batch_size=128,
    epochs=20,
    verbose=1
)

Model: "ANN_Model_1"

┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┓
┃ Layer (type)                    ┃ Output Shape           ┃       Param # ┃
┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━┩
│ dense (Dense)                   │ (None, 64)             │        65,600 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ dense_1 (Dense)                 │ (None, 32)             │         2,080 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ dense_2 (Dense)                 │ (None, 10)             │           330 │
└─────────────────────────────────┴────────────────────────┴───────────────┘

 Total params: 68,010 (265.66 KB)

 Trainable params: 68,010 (265.66 KB)

 Non-trainable params: 0 (0.00 B)

Epoch 1/20
329/329 ━━━━━━━━━━━━━━━━━━━━ 9s 14ms/step - accuracy: 0.1416 - loss: 2.2706 - val_accuracy: 0.1881 - val_loss: 2.1862
Epoch 2/20
329/329 ━━━━━━━━━━━━━━━━━━━━ 2s 5ms/step - accuracy: 0.3016 - loss: 1.9930 - val_accuracy: 0.3623 - val_loss: 1.8384
Epoch 3/20
329/329 ━━━━━━━━━━━━━━━━━━━━ 2s 6ms/step - accuracy: 0.4135 - loss: 1.7342 - val_accuracy: 0.4519 - val_loss: 1.6469
Epoch 4/20
329/329 ━━━━━━━━━━━━━━━━━━━━ 2s 5ms/step - accuracy: 0.4759 - loss: 1.5901 - val_accuracy: 0.5028 - val_loss: 1.5227
Epoch 5/20
329/329 ━━━━━━━━━━━━━━━━━━━━ 2s 6ms/step - accuracy: 0.5190 - loss: 1.4735 - val_accuracy: 0.5426 - val_loss: 1.4053
Epoch 6/20
329/329 ━━━━━━━━━━━━━━━━━━━━ 4s 11ms/step - accuracy: 0.5504 - loss: 1.3855 - val_accuracy: 0.5648 - val_loss: 1.3445
Epoch 7/20
329/329 ━━━━━━━━━━━━━━━━━━━━ 2s 5ms/step - accuracy: 0.5664 - loss: 1.3407 - val_accuracy: 0.5795 - val_loss: 1.3051
Epoch 8/20
329/329 ━━━━━━━━━━━━━━━━━━━━ 2s 5ms/step - accuracy: 0.5779 - loss: 1.3124 - val_accuracy: 0.5862 - val_loss: 1.2871
Epoch 9/20
329/329 ━━━━━━━━━━━━━━━━━━━━ 2s 5ms/step - accuracy: 0.5863 - loss: 1.2916 - val_accuracy: 0.5932 - val_loss: 1.2683
Epoch 10/20
329/329 ━━━━━━━━━━━━━━━━━━━━ 2s 5ms/step - accuracy: 0.5947 - loss: 1.2722 - val_accuracy: 0.6003 - val_loss: 1.2505
Epoch 11/20
329/329 ━━━━━━━━━━━━━━━━━━━━ 2s 5ms/step - accuracy: 0.6009 - loss: 1.2549 - val_accuracy: 0.6091 - val_loss: 1.2336
Epoch 12/20
329/329 ━━━━━━━━━━━━━━━━━━━━ 2s 7ms/step - accuracy: 0.6078 - loss: 1.2401 - val_accuracy: 0.6147 - val_loss: 1.2201
Epoch 13/20
329/329 ━━━━━━━━━━━━━━━━━━━━ 2s 7ms/step - accuracy: 0.6153 - loss: 1.2261 - val_accuracy: 0.6200 - val_loss: 1.2075
Epoch 14/20
329/329 ━━━━━━━━━━━━━━━━━━━━ 2s 5ms/step - accuracy: 0.6198 - loss: 1.2128 - val_accuracy: 0.6227 - val_loss: 1.2006
Epoch 15/20
329/329 ━━━━━━━━━━━━━━━━━━━━ 2s 5ms/step - accuracy: 0.6227 - loss: 1.2015 - val_accuracy: 0.6269 - val_loss: 1.1901
Epoch 16/20
329/329 ━━━━━━━━━━━━━━━━━━━━ 2s 5ms/step - accuracy: 0.6258 - loss: 1.1902 - val_accuracy: 0.6305 - val_loss: 1.1826
Epoch 17/20
329/329 ━━━━━━━━━━━━━━━━━━━━ 2s 5ms/step - accuracy: 0.6299 - loss: 1.1813 - val_accuracy: 0.6306 - val_loss: 1.1812
Epoch 18/20
329/329 ━━━━━━━━━━━━━━━━━━━━ 2s 5ms/step - accuracy: 0.6332 - loss: 1.1724 - val_accuracy: 0.6334 - val_loss: 1.1734
Epoch 19/20
329/329 ━━━━━━━━━━━━━━━━━━━━ 2s 7ms/step - accuracy: 0.6368 - loss: 1.1650 - val_accuracy: 0.6352 - val_loss: 1.1688
Epoch 20/20
329/329 ━━━━━━━━━━━━━━━━━━━━ 3s 8ms/step - accuracy: 0.6402 - loss: 1.1575 - val_accuracy: 0.6386 - val_loss: 1.1605

plot_history(history_ann1, 'ANN Model 1 — Training vs Validation')

reset()
print('Backend cleared. Seeds re-fixed.')

Backend cleared. Seeds re-fixed.

reset()
ann2 = build_ann2()
ann2.summary()

history_ann2 = ann2.fit(
    X_train_ann, y_train_ohe,
    validation_data=(X_val_ann, y_val_ohe),
    batch_size=128,
    epochs=30,
    verbose=1
)

Model: "ANN_Model_2"

┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┓
┃ Layer (type)                    ┃ Output Shape           ┃       Param # ┃
┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━┩
│ dense (Dense)                   │ (None, 256)            │       262,400 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ dense_1 (Dense)                 │ (None, 128)            │        32,896 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ dropout (Dropout)               │ (None, 128)            │             0 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ dense_2 (Dense)                 │ (None, 64)             │         8,256 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ dense_3 (Dense)                 │ (None, 64)             │         4,160 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ dense_4 (Dense)                 │ (None, 32)             │         2,080 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ batch_normalization             │ (None, 32)             │           128 │
│ (BatchNormalization)            │                        │               │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ dense_5 (Dense)                 │ (None, 10)             │           330 │
└─────────────────────────────────┴────────────────────────┴───────────────┘

 Total params: 310,250 (1.18 MB)

 Trainable params: 310,186 (1.18 MB)

 Non-trainable params: 64 (256.00 B)

Epoch 1/30
329/329 ━━━━━━━━━━━━━━━━━━━━ 9s 16ms/step - accuracy: 0.1055 - loss: 2.3338 - val_accuracy: 0.1160 - val_loss: 2.2995
Epoch 2/30
329/329 ━━━━━━━━━━━━━━━━━━━━ 4s 11ms/step - accuracy: 0.2693 - loss: 1.9971 - val_accuracy: 0.3837 - val_loss: 1.7706
Epoch 3/30
329/329 ━━━━━━━━━━━━━━━━━━━━ 2s 6ms/step - accuracy: 0.4759 - loss: 1.5409 - val_accuracy: 0.5458 - val_loss: 1.3633
Epoch 4/30
329/329 ━━━━━━━━━━━━━━━━━━━━ 2s 7ms/step - accuracy: 0.5736 - loss: 1.3061 - val_accuracy: 0.5630 - val_loss: 1.3233
Epoch 5/30
329/329 ━━━━━━━━━━━━━━━━━━━━ 2s 6ms/step - accuracy: 0.6172 - loss: 1.1872 - val_accuracy: 0.6488 - val_loss: 1.1001
Epoch 6/30
329/329 ━━━━━━━━━━━━━━━━━━━━ 2s 6ms/step - accuracy: 0.6487 - loss: 1.1003 - val_accuracy: 0.6748 - val_loss: 1.0243
Epoch 7/30
329/329 ━━━━━━━━━━━━━━━━━━━━ 2s 6ms/step - accuracy: 0.6669 - loss: 1.0468 - val_accuracy: 0.6803 - val_loss: 1.0004
Epoch 8/30
329/329 ━━━━━━━━━━━━━━━━━━━━ 3s 8ms/step - accuracy: 0.6831 - loss: 1.0012 - val_accuracy: 0.7033 - val_loss: 0.9296
Epoch 9/30
329/329 ━━━━━━━━━━━━━━━━━━━━ 2s 7ms/step - accuracy: 0.6971 - loss: 0.9602 - val_accuracy: 0.6925 - val_loss: 0.9683
Epoch 10/30
329/329 ━━━━━━━━━━━━━━━━━━━━ 2s 6ms/step - accuracy: 0.6999 - loss: 0.9497 - val_accuracy: 0.7141 - val_loss: 0.9061
Epoch 11/30
329/329 ━━━━━━━━━━━━━━━━━━━━ 2s 6ms/step - accuracy: 0.7102 - loss: 0.9209 - val_accuracy: 0.7301 - val_loss: 0.8472
Epoch 12/30
329/329 ━━━━━━━━━━━━━━━━━━━━ 2s 6ms/step - accuracy: 0.7181 - loss: 0.8937 - val_accuracy: 0.7225 - val_loss: 0.8807
Epoch 13/30
329/329 ━━━━━━━━━━━━━━━━━━━━ 2s 6ms/step - accuracy: 0.7199 - loss: 0.8818 - val_accuracy: 0.7373 - val_loss: 0.8289
Epoch 14/30
329/329 ━━━━━━━━━━━━━━━━━━━━ 2s 7ms/step - accuracy: 0.7265 - loss: 0.8630 - val_accuracy: 0.7420 - val_loss: 0.8160
Epoch 15/30
329/329 ━━━━━━━━━━━━━━━━━━━━ 3s 8ms/step - accuracy: 0.7330 - loss: 0.8418 - val_accuracy: 0.7404 - val_loss: 0.8146
Epoch 16/30
329/329 ━━━━━━━━━━━━━━━━━━━━ 2s 6ms/step - accuracy: 0.7346 - loss: 0.8355 - val_accuracy: 0.7463 - val_loss: 0.8146
Epoch 17/30
329/329 ━━━━━━━━━━━━━━━━━━━━ 2s 6ms/step - accuracy: 0.7401 - loss: 0.8191 - val_accuracy: 0.7541 - val_loss: 0.7776
Epoch 18/30
329/329 ━━━━━━━━━━━━━━━━━━━━ 2s 6ms/step - accuracy: 0.7431 - loss: 0.8078 - val_accuracy: 0.7568 - val_loss: 0.7615
Epoch 19/30
329/329 ━━━━━━━━━━━━━━━━━━━━ 2s 6ms/step - accuracy: 0.7447 - loss: 0.7979 - val_accuracy: 0.7468 - val_loss: 0.8050
Epoch 20/30
329/329 ━━━━━━━━━━━━━━━━━━━━ 2s 6ms/step - accuracy: 0.7507 - loss: 0.7873 - val_accuracy: 0.7722 - val_loss: 0.7233
Epoch 21/30
329/329 ━━━━━━━━━━━━━━━━━━━━ 3s 8ms/step - accuracy: 0.7559 - loss: 0.7702 - val_accuracy: 0.7677 - val_loss: 0.7381
Epoch 22/30
329/329 ━━━━━━━━━━━━━━━━━━━━ 2s 7ms/step - accuracy: 0.7558 - loss: 0.7679 - val_accuracy: 0.7635 - val_loss: 0.7587
Epoch 23/30
329/329 ━━━━━━━━━━━━━━━━━━━━ 2s 6ms/step - accuracy: 0.7589 - loss: 0.7607 - val_accuracy: 0.7789 - val_loss: 0.7115
Epoch 24/30
329/329 ━━━━━━━━━━━━━━━━━━━━ 2s 6ms/step - accuracy: 0.7622 - loss: 0.7475 - val_accuracy: 0.7732 - val_loss: 0.7181
Epoch 25/30
329/329 ━━━━━━━━━━━━━━━━━━━━ 2s 6ms/step - accuracy: 0.7636 - loss: 0.7462 - val_accuracy: 0.7888 - val_loss: 0.6739
Epoch 26/30
329/329 ━━━━━━━━━━━━━━━━━━━━ 2s 7ms/step - accuracy: 0.7675 - loss: 0.7334 - val_accuracy: 0.7815 - val_loss: 0.6959
Epoch 27/30
329/329 ━━━━━━━━━━━━━━━━━━━━ 2s 6ms/step - accuracy: 0.7669 - loss: 0.7346 - val_accuracy: 0.7812 - val_loss: 0.6985
Epoch 28/30
329/329 ━━━━━━━━━━━━━━━━━━━━ 3s 8ms/step - accuracy: 0.7685 - loss: 0.7303 - val_accuracy: 0.7973 - val_loss: 0.6511
Epoch 29/30
329/329 ━━━━━━━━━━━━━━━━━━━━ 2s 6ms/step - accuracy: 0.7731 - loss: 0.7166 - val_accuracy: 0.7846 - val_loss: 0.6871
Epoch 30/30
329/329 ━━━━━━━━━━━━━━━━━━━━ 2s 6ms/step - accuracy: 0.7759 - loss: 0.7080 - val_accuracy: 0.7911 - val_loss: 0.6617

plot_history(history_ann2, 'ANN Model 2 — Training vs Validation')

y_pred_ann2 = np.argmax(ann2.predict(X_test_ann, verbose=0), axis=1)
print('Predictions complete. Sample:', y_pred_ann2[:10])

Predictions complete. Sample: [0 2 2 9 0 9 1 5 1 4]

y_test_labels = np.argmax(y_test_ohe, axis=1)
print('True labels (first 10):', y_test_labels[:10])

True labels (first 10): [1 7 2 9 0 9 1 8 4 4]

print('=' * 60)
print('   Classification Report — ANN Model 2')
print('=' * 60)
print(classification_report(y_test_labels, y_pred_ann2,
                             target_names=[str(i) for i in range(10)]))

cm_ann2 = confusion_matrix(y_test_labels, y_pred_ann2)
plt.figure(figsize=(10, 8))
sns.heatmap(cm_ann2, annot=True, fmt='d', cmap='Blues',
            xticklabels=range(10), yticklabels=range(10))
plt.title('Confusion Matrix — ANN Model 2', fontsize=14, fontweight='bold')
plt.xlabel('Predicted Label')
plt.ylabel('True Label')
plt.tight_layout()
plt.show()

============================================================
   Classification Report — ANN Model 2
============================================================
              precision    recall  f1-score   support

           0       0.78      0.81      0.80      1814
           1       0.74      0.83      0.79      1828
           2       0.76      0.82      0.79      1803
           3       0.72      0.73      0.72      1719
           4       0.73      0.87      0.79      1812
           5       0.76      0.73      0.75      1768
           6       0.79      0.75      0.77      1832
           7       0.86      0.79      0.82      1808
           8       0.78      0.67      0.72      1812
           9       0.84      0.71      0.77      1804

    accuracy                           0.77     18000
   macro avg       0.78      0.77      0.77     18000
weighted avg       0.78      0.77      0.77     18000

# The data is already in memory from the ANN section.
# We reassign to make the CNN section self-contained and clear.
with h5py.File(DATA_PATH, 'r') as hf:
    X_train = hf['X_train'][:]
    y_train = hf['y_train'][:]
    X_val   = hf['X_val'][:]
    y_val   = hf['y_val'][:]
    X_test  = hf['X_test'][:]
    y_test  = hf['y_test'][:]
print('Dataset reloaded.')

Dataset reloaded.

print(f'Training   : {X_train.shape[0]:,}  {X_train.shape[1:]}')
print(f'Validation : {X_val.shape[0]:,}  {X_val.shape[1:]}')
print(f'Test       : {X_test.shape[0]:,}  {X_test.shape[1:]}')

Training   : 42,000  (32, 32)
Validation : 60,000  (32, 32)
Test       : 18,000  (32, 32)

print('Shape of first training image (raw):', X_train[0].shape)
print('Pixel array:')
print(X_train[0])

Shape of first training image (raw): (32, 32)
Pixel array:
[[ 33.0704  30.2601  26.852  ...  71.4471  58.2204  42.9939]
 [ 25.2283  25.5533  29.9765 ... 113.0209 103.3639  84.2949]
 [ 26.2775  22.6137  40.4763 ... 113.3028 121.775  115.4228]
 ...
 [ 28.5502  36.212   45.0801 ...  24.1359  25.0927  26.0603]
 [ 38.4352  26.4733  23.2717 ...  28.1094  29.4683  30.0661]
 [ 50.2984  26.0773  24.0389 ...  49.6682  50.853   53.0377]]

# Reshape (N, 32, 32) → (N, 32, 32, 1) — add channel dimension
X_train_cnn = X_train.reshape(X_train.shape[0], 32, 32, 1)
X_val_cnn   = X_val.reshape(X_val.shape[0],     32, 32, 1)
X_test_cnn  = X_test.reshape(X_test.shape[0],   32, 32, 1)
print('After reshape — X_train_cnn:', X_train_cnn.shape)

After reshape — X_train_cnn: (42000, 32, 32, 1)

X_train_cnn = X_train_cnn.astype('float32') / 255.0
X_val_cnn   = X_val_cnn.astype('float32')   / 255.0
X_test_cnn  = X_test_cnn.astype('float32')  / 255.0
print(f'Pixel range: [{X_train_cnn.min():.2f}, {X_train_cnn.max():.2f}]')

Pixel range: [0.00, 1.00]

print('X_train_cnn shape:', X_train_cnn.shape)   # (42000, 32, 32, 1)
print('X_val_cnn   shape:', X_val_cnn.shape)     # (60000, 32, 32, 1)
print('X_test_cnn  shape:', X_test_cnn.shape)    # (18000, 32, 32, 1)

X_train_cnn shape: (42000, 32, 32, 1)
X_val_cnn   shape: (60000, 32, 32, 1)
X_test_cnn  shape: (18000, 32, 32, 1)

y_train_c = remap_labels(y_train)
y_val_c   = remap_labels(y_val)
y_test_c  = remap_labels(y_test)

y_train_ohe = to_categorical(y_train_c, num_classes=10)
y_val_ohe   = to_categorical(y_val_c,   num_classes=10)
y_test_ohe  = to_categorical(y_test_c,  num_classes=10)

print('y_train_ohe:', y_train_ohe.shape)
print('y_val_ohe  :', y_val_ohe.shape)
print('y_test_ohe :', y_test_ohe.shape)

  Labels already 0-9 — no remapping needed
  Labels already 0-9 — no remapping needed
  Labels already 0-9 — no remapping needed
y_train_ohe: (42000, 10)
y_val_ohe  : (60000, 10)
y_test_ohe : (18000, 10)

reset()
print('Backend cleared. Seeds fixed.')

Backend cleared. Seeds fixed.

reset()
cnn1 = build_cnn1()
cnn1.summary()

# EarlyStopping: stops training when val_accuracy stops improving for 5 epochs,
#   then restores the best weights. Prevents the severe overfitting we observed
#   without it (97% train vs 85% val).
# ReduceLROnPlateau: halves the learning rate if val_loss plateaus for 3 epochs.
#   Helps the model fine-tune once fast initial learning saturates.
callbacks_cnn1 = [
    EarlyStopping(monitor='val_accuracy', patience=5,
                  restore_best_weights=True, verbose=1),
    ReduceLROnPlateau(monitor='val_loss', factor=0.5,
                      patience=3, min_lr=1e-6, verbose=1),
]

history_cnn1 = cnn1.fit(
    X_train_cnn, y_train_ohe,
    validation_data=(X_val_cnn, y_val_ohe),
    batch_size=32,
    epochs=20,
    callbacks=callbacks_cnn1,
    verbose=1
)

Model: "CNN_Model_1"

┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┓
┃ Layer (type)                    ┃ Output Shape           ┃       Param # ┃
┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━┩
│ conv2d (Conv2D)                 │ (None, 32, 32, 16)     │           160 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ leaky_re_lu (LeakyReLU)         │ (None, 32, 32, 16)     │             0 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ conv2d_1 (Conv2D)               │ (None, 32, 32, 32)     │         4,640 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ leaky_re_lu_1 (LeakyReLU)       │ (None, 32, 32, 32)     │             0 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ max_pooling2d (MaxPooling2D)    │ (None, 16, 16, 32)     │             0 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ flatten (Flatten)               │ (None, 8192)           │             0 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ dense (Dense)                   │ (None, 32)             │       262,176 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ leaky_re_lu_2 (LeakyReLU)       │ (None, 32)             │             0 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ dense_1 (Dense)                 │ (None, 10)             │           330 │
└─────────────────────────────────┴────────────────────────┴───────────────┘

 Total params: 267,306 (1.02 MB)

 Trainable params: 267,306 (1.02 MB)

 Non-trainable params: 0 (0.00 B)

Epoch 1/20
1313/1313 ━━━━━━━━━━━━━━━━━━━━ 14s 8ms/step - accuracy: 0.6705 - loss: 1.0337 - val_accuracy: 0.8300 - val_loss: 0.5924 - learning_rate: 0.0010
Epoch 2/20
1313/1313 ━━━━━━━━━━━━━━━━━━━━ 9s 7ms/step - accuracy: 0.8474 - loss: 0.5322 - val_accuracy: 0.8684 - val_loss: 0.4649 - learning_rate: 0.0010
Epoch 3/20
1313/1313 ━━━━━━━━━━━━━━━━━━━━ 9s 6ms/step - accuracy: 0.8710 - loss: 0.4436 - val_accuracy: 0.8826 - val_loss: 0.4086 - learning_rate: 0.0010
Epoch 4/20
1313/1313 ━━━━━━━━━━━━━━━━━━━━ 8s 6ms/step - accuracy: 0.8877 - loss: 0.3851 - val_accuracy: 0.8928 - val_loss: 0.3700 - learning_rate: 0.0010
Epoch 5/20
1313/1313 ━━━━━━━━━━━━━━━━━━━━ 9s 7ms/step - accuracy: 0.9013 - loss: 0.3395 - val_accuracy: 0.8995 - val_loss: 0.3461 - learning_rate: 0.0010
Epoch 6/20
1313/1313 ━━━━━━━━━━━━━━━━━━━━ 8s 6ms/step - accuracy: 0.9133 - loss: 0.3009 - val_accuracy: 0.9055 - val_loss: 0.3296 - learning_rate: 0.0010
Epoch 7/20
1313/1313 ━━━━━━━━━━━━━━━━━━━━ 9s 7ms/step - accuracy: 0.9220 - loss: 0.2675 - val_accuracy: 0.9092 - val_loss: 0.3195 - learning_rate: 0.0010
Epoch 8/20
1313/1313 ━━━━━━━━━━━━━━━━━━━━ 9s 7ms/step - accuracy: 0.9312 - loss: 0.2372 - val_accuracy: 0.9126 - val_loss: 0.3162 - learning_rate: 0.0010
Epoch 9/20
1313/1313 ━━━━━━━━━━━━━━━━━━━━ 8s 6ms/step - accuracy: 0.9370 - loss: 0.2147 - val_accuracy: 0.9175 - val_loss: 0.3024 - learning_rate: 0.0010
Epoch 10/20
1313/1313 ━━━━━━━━━━━━━━━━━━━━ 9s 7ms/step - accuracy: 0.9437 - loss: 0.1902 - val_accuracy: 0.9130 - val_loss: 0.3232 - learning_rate: 0.0010
Epoch 11/20
1313/1313 ━━━━━━━━━━━━━━━━━━━━ 8s 6ms/step - accuracy: 0.9490 - loss: 0.1725 - val_accuracy: 0.9192 - val_loss: 0.3149 - learning_rate: 0.0010
Epoch 12/20
1305/1313 ━━━━━━━━━━━━━━━━━━━━ 0s 4ms/step - accuracy: 0.9530 - loss: 0.1583
Epoch 12: ReduceLROnPlateau reducing learning rate to 0.0005000000237487257.
1313/1313 ━━━━━━━━━━━━━━━━━━━━ 9s 7ms/step - accuracy: 0.9520 - loss: 0.1562 - val_accuracy: 0.9238 - val_loss: 0.3122 - learning_rate: 0.0010
Epoch 13/20
1313/1313 ━━━━━━━━━━━━━━━━━━━━ 9s 7ms/step - accuracy: 0.9603 - loss: 0.1294 - val_accuracy: 0.9383 - val_loss: 0.2706 - learning_rate: 5.0000e-04
Epoch 14/20
1313/1313 ━━━━━━━━━━━━━━━━━━━━ 8s 6ms/step - accuracy: 0.9712 - loss: 0.1033 - val_accuracy: 0.9396 - val_loss: 0.2760 - learning_rate: 5.0000e-04
Epoch 15/20
1313/1313 ━━━━━━━━━━━━━━━━━━━━ 10s 8ms/step - accuracy: 0.9771 - loss: 0.0882 - val_accuracy: 0.9411 - val_loss: 0.2829 - learning_rate: 5.0000e-04
Epoch 16/20
1301/1313 ━━━━━━━━━━━━━━━━━━━━ 0s 3ms/step - accuracy: 0.9798 - loss: 0.0855
Epoch 16: ReduceLROnPlateau reducing learning rate to 0.0002500000118743628.
1313/1313 ━━━━━━━━━━━━━━━━━━━━ 9s 7ms/step - accuracy: 0.9814 - loss: 0.0772 - val_accuracy: 0.9393 - val_loss: 0.2977 - learning_rate: 5.0000e-04
Epoch 17/20
1313/1313 ━━━━━━━━━━━━━━━━━━━━ 8s 6ms/step - accuracy: 0.9821 - loss: 0.0680 - val_accuracy: 0.9419 - val_loss: 0.2942 - learning_rate: 2.5000e-04
Epoch 18/20
1313/1313 ━━━━━━━━━━━━━━━━━━━━ 9s 7ms/step - accuracy: 0.9868 - loss: 0.0565 - val_accuracy: 0.9448 - val_loss: 0.2907 - learning_rate: 2.5000e-04
Epoch 19/20
1308/1313 ━━━━━━━━━━━━━━━━━━━━ 0s 3ms/step - accuracy: 0.9879 - loss: 0.0578
Epoch 19: ReduceLROnPlateau reducing learning rate to 0.0001250000059371814.
1313/1313 ━━━━━━━━━━━━━━━━━━━━ 9s 7ms/step - accuracy: 0.9896 - loss: 0.0490 - val_accuracy: 0.9441 - val_loss: 0.2997 - learning_rate: 2.5000e-04
Epoch 20/20
1313/1313 ━━━━━━━━━━━━━━━━━━━━ 8s 6ms/step - accuracy: 0.9908 - loss: 0.0437 - val_accuracy: 0.9517 - val_loss: 0.2792 - learning_rate: 1.2500e-04
Restoring model weights from the end of the best epoch: 20.

plot_history(history_cnn1, 'CNN Model 1 — Training vs Validation')

reset()
print('Backend cleared. Seeds re-fixed.')

Backend cleared. Seeds re-fixed.

reset()
cnn2 = build_cnn2()
cnn2.summary()

callbacks_cnn2 = [
    EarlyStopping(monitor='val_accuracy', patience=5,
                  restore_best_weights=True, verbose=1),
    ReduceLROnPlateau(monitor='val_loss', factor=0.5,
                      patience=3, min_lr=1e-6, verbose=1),
    ModelCheckpoint('best_cnn2.keras', monitor='val_accuracy',
                    save_best_only=True, verbose=1),
]

history_cnn2 = cnn2.fit(
    X_train_cnn, y_train_ohe,
    validation_data=(X_val_cnn, y_val_ohe),
    batch_size=128,
    epochs=30,
    callbacks=callbacks_cnn2,
    verbose=1
)

Model: "CNN_Model_2"

┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┓
┃ Layer (type)                    ┃ Output Shape           ┃       Param # ┃
┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━┩
│ conv2d (Conv2D)                 │ (None, 32, 32, 16)     │           160 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ leaky_re_lu (LeakyReLU)         │ (None, 32, 32, 16)     │             0 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ conv2d_1 (Conv2D)               │ (None, 32, 32, 32)     │         4,640 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ leaky_re_lu_1 (LeakyReLU)       │ (None, 32, 32, 32)     │             0 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ max_pooling2d (MaxPooling2D)    │ (None, 16, 16, 32)     │             0 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ batch_normalization             │ (None, 16, 16, 32)     │           128 │
│ (BatchNormalization)            │                        │               │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ conv2d_2 (Conv2D)               │ (None, 16, 16, 32)     │         9,248 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ leaky_re_lu_2 (LeakyReLU)       │ (None, 16, 16, 32)     │             0 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ conv2d_3 (Conv2D)               │ (None, 16, 16, 64)     │        18,496 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ leaky_re_lu_3 (LeakyReLU)       │ (None, 16, 16, 64)     │             0 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ max_pooling2d_1 (MaxPooling2D)  │ (None, 8, 8, 64)       │             0 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ batch_normalization_1           │ (None, 8, 8, 64)       │           256 │
│ (BatchNormalization)            │                        │               │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ flatten (Flatten)               │ (None, 4096)           │             0 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ dense (Dense)                   │ (None, 32)             │       131,104 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ leaky_re_lu_4 (LeakyReLU)       │ (None, 32)             │             0 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ dropout (Dropout)               │ (None, 32)             │             0 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ dense_1 (Dense)                 │ (None, 10)             │           330 │
└─────────────────────────────────┴────────────────────────┴───────────────┘

 Total params: 164,362 (642.04 KB)

 Trainable params: 164,170 (641.29 KB)

 Non-trainable params: 192 (768.00 B)

Epoch 1/30
329/329 ━━━━━━━━━━━━━━━━━━━━ 0s 15ms/step - accuracy: 0.3969 - loss: 1.7584
Epoch 1: val_accuracy improved from None to 0.45952, saving model to best_cnn2.keras

Epoch 1: finished saving model to best_cnn2.keras
329/329 ━━━━━━━━━━━━━━━━━━━━ 15s 25ms/step - accuracy: 0.5905 - loss: 1.2244 - val_accuracy: 0.4595 - val_loss: 1.6454 - learning_rate: 0.0010
Epoch 2/30
324/329 ━━━━━━━━━━━━━━━━━━━━ 0s 8ms/step - accuracy: 0.7886 - loss: 0.6922
Epoch 2: val_accuracy improved from 0.45952 to 0.86602, saving model to best_cnn2.keras

Epoch 2: finished saving model to best_cnn2.keras
329/329 ━━━━━━━━━━━━━━━━━━━━ 5s 16ms/step - accuracy: 0.8044 - loss: 0.6474 - val_accuracy: 0.8660 - val_loss: 0.4674 - learning_rate: 0.0010
Epoch 3/30
327/329 ━━━━━━━━━━━━━━━━━━━━ 0s 7ms/step - accuracy: 0.8335 - loss: 0.5591
Epoch 3: val_accuracy improved from 0.86602 to 0.88428, saving model to best_cnn2.keras

Epoch 3: finished saving model to best_cnn2.keras
329/329 ━━━━━━━━━━━━━━━━━━━━ 4s 12ms/step - accuracy: 0.8372 - loss: 0.5373 - val_accuracy: 0.8843 - val_loss: 0.3901 - learning_rate: 0.0010
Epoch 4/30
328/329 ━━━━━━━━━━━━━━━━━━━━ 0s 7ms/step - accuracy: 0.8498 - loss: 0.5056
Epoch 4: val_accuracy improved from 0.88428 to 0.89590, saving model to best_cnn2.keras

Epoch 4: finished saving model to best_cnn2.keras
329/329 ━━━━━━━━━━━━━━━━━━━━ 4s 11ms/step - accuracy: 0.8551 - loss: 0.4820 - val_accuracy: 0.8959 - val_loss: 0.3575 - learning_rate: 0.0010
Epoch 5/30
327/329 ━━━━━━━━━━━━━━━━━━━━ 0s 8ms/step - accuracy: 0.8649 - loss: 0.4573
Epoch 5: val_accuracy improved from 0.89590 to 0.91243, saving model to best_cnn2.keras

Epoch 5: finished saving model to best_cnn2.keras
329/329 ━━━━━━━━━━━━━━━━━━━━ 4s 13ms/step - accuracy: 0.8698 - loss: 0.4345 - val_accuracy: 0.9124 - val_loss: 0.2991 - learning_rate: 0.0010
Epoch 6/30
328/329 ━━━━━━━━━━━━━━━━━━━━ 0s 8ms/step - accuracy: 0.8758 - loss: 0.4171
Epoch 6: val_accuracy did not improve from 0.91243
329/329 ━━━━━━━━━━━━━━━━━━━━ 5s 11ms/step - accuracy: 0.8792 - loss: 0.4003 - val_accuracy: 0.9018 - val_loss: 0.3297 - learning_rate: 0.0010
Epoch 7/30
326/329 ━━━━━━━━━━━━━━━━━━━━ 0s 7ms/step - accuracy: 0.8827 - loss: 0.3890
Epoch 7: val_accuracy improved from 0.91243 to 0.91973, saving model to best_cnn2.keras

Epoch 7: finished saving model to best_cnn2.keras
329/329 ━━━━━━━━━━━━━━━━━━━━ 4s 11ms/step - accuracy: 0.8856 - loss: 0.3765 - val_accuracy: 0.9197 - val_loss: 0.2760 - learning_rate: 0.0010
Epoch 8/30
327/329 ━━━━━━━━━━━━━━━━━━━━ 0s 7ms/step - accuracy: 0.8930 - loss: 0.3580
Epoch 8: val_accuracy did not improve from 0.91973
329/329 ━━━━━━━━━━━━━━━━━━━━ 4s 12ms/step - accuracy: 0.8942 - loss: 0.3519 - val_accuracy: 0.9193 - val_loss: 0.2768 - learning_rate: 0.0010
Epoch 9/30
326/329 ━━━━━━━━━━━━━━━━━━━━ 0s 8ms/step - accuracy: 0.8959 - loss: 0.3467
Epoch 9: val_accuracy improved from 0.91973 to 0.92707, saving model to best_cnn2.keras

Epoch 9: finished saving model to best_cnn2.keras
329/329 ━━━━━━━━━━━━━━━━━━━━ 4s 12ms/step - accuracy: 0.8970 - loss: 0.3349 - val_accuracy: 0.9271 - val_loss: 0.2548 - learning_rate: 0.0010
Epoch 10/30
328/329 ━━━━━━━━━━━━━━━━━━━━ 0s 7ms/step - accuracy: 0.9021 - loss: 0.3206
Epoch 10: val_accuracy did not improve from 0.92707
329/329 ━━━━━━━━━━━━━━━━━━━━ 4s 11ms/step - accuracy: 0.9014 - loss: 0.3187 - val_accuracy: 0.9250 - val_loss: 0.2594 - learning_rate: 0.0010
Epoch 11/30
324/329 ━━━━━━━━━━━━━━━━━━━━ 0s 7ms/step - accuracy: 0.9045 - loss: 0.3110
Epoch 11: val_accuracy did not improve from 0.92707
329/329 ━━━━━━━━━━━━━━━━━━━━ 4s 11ms/step - accuracy: 0.9057 - loss: 0.3023 - val_accuracy: 0.9252 - val_loss: 0.2595 - learning_rate: 0.0010
Epoch 12/30
329/329 ━━━━━━━━━━━━━━━━━━━━ 0s 8ms/step - accuracy: 0.9124 - loss: 0.2893
Epoch 12: val_accuracy improved from 0.92707 to 0.94037, saving model to best_cnn2.keras

Epoch 12: finished saving model to best_cnn2.keras
329/329 ━━━━━━━━━━━━━━━━━━━━ 4s 13ms/step - accuracy: 0.9113 - loss: 0.2863 - val_accuracy: 0.9404 - val_loss: 0.2132 - learning_rate: 0.0010
Epoch 13/30
322/329 ━━━━━━━━━━━━━━━━━━━━ 0s 7ms/step - accuracy: 0.9136 - loss: 0.2817
Epoch 13: val_accuracy did not improve from 0.94037
329/329 ━━━━━━━━━━━━━━━━━━━━ 4s 11ms/step - accuracy: 0.9147 - loss: 0.2772 - val_accuracy: 0.9399 - val_loss: 0.2199 - learning_rate: 0.0010
Epoch 14/30
326/329 ━━━━━━━━━━━━━━━━━━━━ 0s 7ms/step - accuracy: 0.9139 - loss: 0.2745
Epoch 14: val_accuracy did not improve from 0.94037
329/329 ━━━━━━━━━━━━━━━━━━━━ 4s 11ms/step - accuracy: 0.9163 - loss: 0.2631 - val_accuracy: 0.9380 - val_loss: 0.2188 - learning_rate: 0.0010
Epoch 15/30
324/329 ━━━━━━━━━━━━━━━━━━━━ 0s 7ms/step - accuracy: 0.9181 - loss: 0.2582
Epoch 15: ReduceLROnPlateau reducing learning rate to 0.0005000000237487257.

Epoch 15: val_accuracy did not improve from 0.94037
329/329 ━━━━━━━━━━━━━━━━━━━━ 5s 15ms/step - accuracy: 0.9199 - loss: 0.2512 - val_accuracy: 0.9251 - val_loss: 0.2642 - learning_rate: 0.0010
Epoch 16/30
326/329 ━━━━━━━━━━━━━━━━━━━━ 0s 8ms/step - accuracy: 0.9278 - loss: 0.2287
Epoch 16: val_accuracy improved from 0.94037 to 0.94823, saving model to best_cnn2.keras

Epoch 16: finished saving model to best_cnn2.keras
329/329 ━━━━━━━━━━━━━━━━━━━━ 4s 12ms/step - accuracy: 0.9317 - loss: 0.2127 - val_accuracy: 0.9482 - val_loss: 0.1881 - learning_rate: 5.0000e-04
Epoch 17/30
324/329 ━━━━━━━━━━━━━━━━━━━━ 0s 7ms/step - accuracy: 0.9354 - loss: 0.2055
Epoch 17: val_accuracy improved from 0.94823 to 0.95043, saving model to best_cnn2.keras

Epoch 17: finished saving model to best_cnn2.keras
329/329 ━━━━━━━━━━━━━━━━━━━━ 4s 11ms/step - accuracy: 0.9387 - loss: 0.1931 - val_accuracy: 0.9504 - val_loss: 0.1828 - learning_rate: 5.0000e-04
Epoch 18/30
324/329 ━━━━━━━━━━━━━━━━━━━━ 0s 7ms/step - accuracy: 0.9415 - loss: 0.1878
Epoch 18: val_accuracy did not improve from 0.95043
329/329 ━━━━━━━━━━━━━━━━━━━━ 4s 12ms/step - accuracy: 0.9416 - loss: 0.1813 - val_accuracy: 0.9497 - val_loss: 0.1853 - learning_rate: 5.0000e-04
Epoch 19/30
324/329 ━━━━━━━━━━━━━━━━━━━━ 0s 8ms/step - accuracy: 0.9435 - loss: 0.1754
Epoch 19: val_accuracy improved from 0.95043 to 0.95415, saving model to best_cnn2.keras

Epoch 19: finished saving model to best_cnn2.keras
329/329 ━━━━━━━━━━━━━━━━━━━━ 4s 12ms/step - accuracy: 0.9432 - loss: 0.1752 - val_accuracy: 0.9542 - val_loss: 0.1778 - learning_rate: 5.0000e-04
Epoch 20/30
323/329 ━━━━━━━━━━━━━━━━━━━━ 0s 8ms/step - accuracy: 0.9424 - loss: 0.1747
Epoch 20: val_accuracy did not improve from 0.95415
329/329 ━━━━━━━━━━━━━━━━━━━━ 4s 12ms/step - accuracy: 0.9442 - loss: 0.1691 - val_accuracy: 0.9540 - val_loss: 0.1806 - learning_rate: 5.0000e-04
Epoch 21/30
323/329 ━━━━━━━━━━━━━━━━━━━━ 0s 7ms/step - accuracy: 0.9455 - loss: 0.1671
Epoch 21: val_accuracy did not improve from 0.95415
329/329 ━━━━━━━━━━━━━━━━━━━━ 4s 11ms/step - accuracy: 0.9470 - loss: 0.1616 - val_accuracy: 0.9526 - val_loss: 0.1841 - learning_rate: 5.0000e-04
Epoch 22/30
326/329 ━━━━━━━━━━━━━━━━━━━━ 0s 8ms/step - accuracy: 0.9480 - loss: 0.1636
Epoch 22: ReduceLROnPlateau reducing learning rate to 0.0002500000118743628.

Epoch 22: val_accuracy did not improve from 0.95415
329/329 ━━━━━━━━━━━━━━━━━━━━ 4s 13ms/step - accuracy: 0.9488 - loss: 0.1586 - val_accuracy: 0.9510 - val_loss: 0.1941 - learning_rate: 5.0000e-04
Epoch 23/30
325/329 ━━━━━━━━━━━━━━━━━━━━ 0s 8ms/step - accuracy: 0.9488 - loss: 0.1530
Epoch 23: val_accuracy improved from 0.95415 to 0.96233, saving model to best_cnn2.keras

Epoch 23: finished saving model to best_cnn2.keras
329/329 ━━━━━━━━━━━━━━━━━━━━ 5s 12ms/step - accuracy: 0.9529 - loss: 0.1388 - val_accuracy: 0.9623 - val_loss: 0.1538 - learning_rate: 2.5000e-04
Epoch 24/30
322/329 ━━━━━━━━━━━━━━━━━━━━ 0s 7ms/step - accuracy: 0.9549 - loss: 0.1339
Epoch 24: val_accuracy improved from 0.96233 to 0.96533, saving model to best_cnn2.keras

Epoch 24: finished saving model to best_cnn2.keras
329/329 ━━━━━━━━━━━━━━━━━━━━ 4s 11ms/step - accuracy: 0.9561 - loss: 0.1295 - val_accuracy: 0.9653 - val_loss: 0.1524 - learning_rate: 2.5000e-04
Epoch 25/30
327/329 ━━━━━━━━━━━━━━━━━━━━ 0s 8ms/step - accuracy: 0.9571 - loss: 0.1243
Epoch 25: val_accuracy improved from 0.96533 to 0.96602, saving model to best_cnn2.keras

Epoch 25: finished saving model to best_cnn2.keras
329/329 ━━━━━━━━━━━━━━━━━━━━ 4s 13ms/step - accuracy: 0.9595 - loss: 0.1204 - val_accuracy: 0.9660 - val_loss: 0.1514 - learning_rate: 2.5000e-04
Epoch 26/30
326/329 ━━━━━━━━━━━━━━━━━━━━ 0s 8ms/step - accuracy: 0.9591 - loss: 0.1213
Epoch 26: val_accuracy improved from 0.96602 to 0.96623, saving model to best_cnn2.keras

Epoch 26: finished saving model to best_cnn2.keras
329/329 ━━━━━━━━━━━━━━━━━━━━ 4s 12ms/step - accuracy: 0.9591 - loss: 0.1188 - val_accuracy: 0.9662 - val_loss: 0.1506 - learning_rate: 2.5000e-04
Epoch 27/30
323/329 ━━━━━━━━━━━━━━━━━━━━ 0s 7ms/step - accuracy: 0.9605 - loss: 0.1203
Epoch 27: val_accuracy improved from 0.96623 to 0.96737, saving model to best_cnn2.keras

Epoch 27: finished saving model to best_cnn2.keras
329/329 ━━━━━━━━━━━━━━━━━━━━ 4s 11ms/step - accuracy: 0.9608 - loss: 0.1167 - val_accuracy: 0.9674 - val_loss: 0.1471 - learning_rate: 2.5000e-04
Epoch 28/30
329/329 ━━━━━━━━━━━━━━━━━━━━ 0s 8ms/step - accuracy: 0.9607 - loss: 0.1147
Epoch 28: val_accuracy did not improve from 0.96737
329/329 ━━━━━━━━━━━━━━━━━━━━ 6s 13ms/step - accuracy: 0.9616 - loss: 0.1107 - val_accuracy: 0.9662 - val_loss: 0.1572 - learning_rate: 2.5000e-04
Epoch 29/30
329/329 ━━━━━━━━━━━━━━━━━━━━ 0s 8ms/step - accuracy: 0.9621 - loss: 0.1113
Epoch 29: val_accuracy improved from 0.96737 to 0.96882, saving model to best_cnn2.keras

Epoch 29: finished saving model to best_cnn2.keras
329/329 ━━━━━━━━━━━━━━━━━━━━ 5s 12ms/step - accuracy: 0.9620 - loss: 0.1107 - val_accuracy: 0.9688 - val_loss: 0.1449 - learning_rate: 2.5000e-04
Epoch 30/30
322/329 ━━━━━━━━━━━━━━━━━━━━ 0s 8ms/step - accuracy: 0.9622 - loss: 0.1099
Epoch 30: val_accuracy did not improve from 0.96882
329/329 ━━━━━━━━━━━━━━━━━━━━ 4s 11ms/step - accuracy: 0.9626 - loss: 0.1066 - val_accuracy: 0.9684 - val_loss: 0.1529 - learning_rate: 2.5000e-04
Restoring model weights from the end of the best epoch: 29.

plot_history(history_cnn2, 'CNN Model 2 — Training vs Validation')

y_pred_cnn2 = np.argmax(cnn2.predict(X_test_cnn, verbose=0), axis=1)
print('Predictions complete. Sample:', y_pred_cnn2[:10])

Predictions complete. Sample: [1 7 2 9 0 9 1 8 4 4]

y_test_labels_cnn = np.argmax(y_test_ohe, axis=1)
print('True labels (first 10):', y_test_labels_cnn[:10])

True labels (first 10): [1 7 2 9 0 9 1 8 4 4]

print('=' * 60)
print('   Classification Report — CNN Model 2')
print('=' * 60)
print(classification_report(y_test_labels_cnn, y_pred_cnn2,
                             target_names=[str(i) for i in range(10)]))

cm_cnn2 = confusion_matrix(y_test_labels_cnn, y_pred_cnn2)
plt.figure(figsize=(10, 8))
sns.heatmap(cm_cnn2, annot=True, fmt='d', cmap='YlOrRd',
            xticklabels=range(10), yticklabels=range(10))
plt.title('Confusion Matrix — CNN Model 2', fontsize=14, fontweight='bold')
plt.xlabel('Predicted Label')
plt.ylabel('True Label')
plt.tight_layout()
plt.show()

============================================================
   Classification Report — CNN Model 2
============================================================
              precision    recall  f1-score   support

           0       0.94      0.95      0.95      1814
           1       0.91      0.93      0.92      1828
           2       0.94      0.93      0.94      1803
           3       0.90      0.90      0.90      1719
           4       0.94      0.93      0.94      1812
           5       0.89      0.92      0.91      1768
           6       0.91      0.91      0.91      1832
           7       0.94      0.94      0.94      1808
           8       0.93      0.90      0.91      1812
           9       0.91      0.91      0.91      1804

    accuracy                           0.92     18000
   macro avg       0.92      0.92      0.92     18000
weighted avg       0.92      0.92      0.92     18000

# ── Step 1: Background normalize ─────────────────────────────────────────────
def normalize_background(images):
    """Invert images with dark backgrounds so all share light-BG / dark-digit convention."""
    out = images.copy().astype('float32')
    for i in range(len(out)):
        if out[i].mean() < 128:
            out[i] = 255.0 - out[i]
    return out

print('Background-normalizing training images...')
X_train_bg = normalize_background(X_train)
n_inv = sum(X_train[i].mean() < 128 for i in range(len(X_train)))
print(f'{n_inv}/{len(X_train)} images inverted ({100*n_inv/len(X_train):.1f}%)')

# ── Step 2-4: Build the data-driven mask ─────────────────────────────────────
X_train_inv = 255.0 - X_train_bg             # digit strokes become bright
mean_map    = X_train_inv.mean(axis=0)        # spatial mean across all images
data_mask   = ((mean_map - mean_map.min()) /  # normalize to [0, 1]
               (mean_map.max() - mean_map.min())).astype('float32')

print(f'\nMask built:')
print(f'  Center weight (16,16): {data_mask[16,16]:.4f}')
print(f'  Corner weight  (0, 0): {data_mask[0,0]:.4f}')
print(f'  Ratio center/corner  : {data_mask[16,16]/data_mask[0,0]:.1f}x')

Background-normalizing training images...
29166/42000 images inverted (69.4%)

Mask built:
  Center weight (16,16): 0.8621
  Corner weight  (0, 0): 0.3345
  Ratio center/corner  : 2.6x

# Visualize: mean map and data-driven mask
fig, axes = plt.subplots(1, 3, figsize=(14, 4))
fig.suptitle('Data-Driven Spatial Mask — Derived from Training Set',
             fontsize=13, fontweight='bold')

im0 = axes[0].imshow(mean_map, cmap='hot')
axes[0].set_title('Raw mean map\n(bright = digit strokes)', fontsize=10)
axes[0].axis('off'); plt.colorbar(im0, ax=axes[0])

im1 = axes[1].imshow(data_mask, cmap='hot', vmin=0, vmax=1)
axes[1].set_title('Data-driven mask\n(normalized to [0,1])', fontsize=10)
axes[1].axis('off'); plt.colorbar(im1, ax=axes[1])

# Show effect on a sample image
sample_bg = X_train_bg[0] / 255.0
sample_masked = sample_bg * data_mask
axes[2].imshow(sample_masked, cmap='gray', vmin=0, vmax=1)
axes[2].set_title(f'Sample image after masking\nLabel: {y_train[0]}', fontsize=10)
axes[2].axis('off')

plt.tight_layout(); plt.show()

# Apply mask to all splits
def apply_data_mask(images_raw, mask):
    out = []
    for img in images_raw:
        norm = img.astype('float32')
        if norm.mean() < 128:
            norm = 255.0 - norm
        out.append((norm / 255.0) * mask)
    return np.array(out, dtype='float32')

print('Applying mask to all splits...')
X_train_dm = apply_data_mask(X_train, data_mask).reshape(-1, 32, 32, 1)
X_val_dm   = apply_data_mask(X_val,   data_mask).reshape(-1, 32, 32, 1)
X_test_dm  = apply_data_mask(X_test,  data_mask).reshape(-1, 32, 32, 1)
print(f'Done. Shape: {X_train_dm.shape}')

Applying mask to all splits...
Done. Shape: (42000, 32, 32, 1)

# Train CNN Model 2 on masked inputs — identical architecture, different input
reset()
cnn2_dm = build_cnn2()

callbacks_dm = [
    EarlyStopping(monitor='val_accuracy', patience=5,
                  restore_best_weights=True, verbose=1),
    ReduceLROnPlateau(monitor='val_loss', factor=0.5,
                      patience=3, min_lr=1e-6, verbose=1),
]

history_dm = cnn2_dm.fit(
    X_train_dm, y_train_ohe,
    validation_data=(X_val_dm, y_val_ohe),
    batch_size=128, epochs=30,
    callbacks=callbacks_dm, verbose=1
)

Epoch 1/30
329/329 ━━━━━━━━━━━━━━━━━━━━ 14s 28ms/step - accuracy: 0.3147 - loss: 1.9394 - val_accuracy: 0.2217 - val_loss: 2.6657 - learning_rate: 0.0010
Epoch 2/30
329/329 ━━━━━━━━━━━━━━━━━━━━ 4s 11ms/step - accuracy: 0.6258 - loss: 1.1400 - val_accuracy: 0.7119 - val_loss: 0.9416 - learning_rate: 0.0010
Epoch 3/30
329/329 ━━━━━━━━━━━━━━━━━━━━ 4s 11ms/step - accuracy: 0.7068 - loss: 0.9193 - val_accuracy: 0.7612 - val_loss: 0.7378 - learning_rate: 0.0010
Epoch 4/30
329/329 ━━━━━━━━━━━━━━━━━━━━ 4s 13ms/step - accuracy: 0.7422 - loss: 0.8170 - val_accuracy: 0.7839 - val_loss: 0.6934 - learning_rate: 0.0010
Epoch 5/30
329/329 ━━━━━━━━━━━━━━━━━━━━ 4s 11ms/step - accuracy: 0.7686 - loss: 0.7426 - val_accuracy: 0.8176 - val_loss: 0.5978 - learning_rate: 0.0010
Epoch 6/30
329/329 ━━━━━━━━━━━━━━━━━━━━ 4s 11ms/step - accuracy: 0.7849 - loss: 0.6973 - val_accuracy: 0.8276 - val_loss: 0.5539 - learning_rate: 0.0010
Epoch 7/30
329/329 ━━━━━━━━━━━━━━━━━━━━ 4s 12ms/step - accuracy: 0.7952 - loss: 0.6526 - val_accuracy: 0.8319 - val_loss: 0.5381 - learning_rate: 0.0010
Epoch 8/30
329/329 ━━━━━━━━━━━━━━━━━━━━ 4s 12ms/step - accuracy: 0.8027 - loss: 0.6328 - val_accuracy: 0.8204 - val_loss: 0.5817 - learning_rate: 0.0010
Epoch 9/30
329/329 ━━━━━━━━━━━━━━━━━━━━ 4s 11ms/step - accuracy: 0.8111 - loss: 0.6007 - val_accuracy: 0.7840 - val_loss: 0.6830 - learning_rate: 0.0010
Epoch 10/30
323/329 ━━━━━━━━━━━━━━━━━━━━ 0s 7ms/step - accuracy: 0.8173 - loss: 0.5926
Epoch 10: ReduceLROnPlateau reducing learning rate to 0.0005000000237487257.
329/329 ━━━━━━━━━━━━━━━━━━━━ 4s 11ms/step - accuracy: 0.8177 - loss: 0.5847 - val_accuracy: 0.8334 - val_loss: 0.5432 - learning_rate: 0.0010
Epoch 11/30
329/329 ━━━━━━━━━━━━━━━━━━━━ 4s 13ms/step - accuracy: 0.8418 - loss: 0.5003 - val_accuracy: 0.8684 - val_loss: 0.4332 - learning_rate: 5.0000e-04
Epoch 12/30
329/329 ━━━━━━━━━━━━━━━━━━━━ 4s 12ms/step - accuracy: 0.8492 - loss: 0.4800 - val_accuracy: 0.8858 - val_loss: 0.3803 - learning_rate: 5.0000e-04
Epoch 13/30
329/329 ━━━━━━━━━━━━━━━━━━━━ 4s 11ms/step - accuracy: 0.8517 - loss: 0.4723 - val_accuracy: 0.8778 - val_loss: 0.4039 - learning_rate: 5.0000e-04
Epoch 14/30
329/329 ━━━━━━━━━━━━━━━━━━━━ 4s 12ms/step - accuracy: 0.8559 - loss: 0.4542 - val_accuracy: 0.8793 - val_loss: 0.4010 - learning_rate: 5.0000e-04
Epoch 15/30
324/329 ━━━━━━━━━━━━━━━━━━━━ 0s 8ms/step - accuracy: 0.8594 - loss: 0.4388
Epoch 15: ReduceLROnPlateau reducing learning rate to 0.0002500000118743628.
329/329 ━━━━━━━━━━━━━━━━━━━━ 4s 12ms/step - accuracy: 0.8605 - loss: 0.4391 - val_accuracy: 0.8778 - val_loss: 0.4160 - learning_rate: 5.0000e-04
Epoch 16/30
329/329 ━━━━━━━━━━━━━━━━━━━━ 4s 11ms/step - accuracy: 0.8729 - loss: 0.3983 - val_accuracy: 0.9032 - val_loss: 0.3250 - learning_rate: 2.5000e-04
Epoch 17/30
329/329 ━━━━━━━━━━━━━━━━━━━━ 4s 11ms/step - accuracy: 0.8741 - loss: 0.3827 - val_accuracy: 0.8985 - val_loss: 0.3427 - learning_rate: 2.5000e-04
Epoch 18/30
329/329 ━━━━━━━━━━━━━━━━━━━━ 4s 13ms/step - accuracy: 0.8775 - loss: 0.3741 - val_accuracy: 0.8949 - val_loss: 0.3497 - learning_rate: 2.5000e-04
Epoch 19/30
329/329 ━━━━━━━━━━━━━━━━━━━━ 5s 11ms/step - accuracy: 0.8809 - loss: 0.3680 - val_accuracy: 0.9068 - val_loss: 0.3165 - learning_rate: 2.5000e-04
Epoch 20/30
329/329 ━━━━━━━━━━━━━━━━━━━━ 4s 11ms/step - accuracy: 0.8820 - loss: 0.3608 - val_accuracy: 0.8984 - val_loss: 0.3497 - learning_rate: 2.5000e-04
Epoch 21/30
329/329 ━━━━━━━━━━━━━━━━━━━━ 4s 12ms/step - accuracy: 0.8829 - loss: 0.3548 - val_accuracy: 0.9090 - val_loss: 0.3129 - learning_rate: 2.5000e-04
Epoch 22/30
329/329 ━━━━━━━━━━━━━━━━━━━━ 4s 12ms/step - accuracy: 0.8869 - loss: 0.3462 - val_accuracy: 0.9113 - val_loss: 0.3069 - learning_rate: 2.5000e-04
Epoch 23/30
329/329 ━━━━━━━━━━━━━━━━━━━━ 4s 11ms/step - accuracy: 0.8870 - loss: 0.3428 - val_accuracy: 0.9000 - val_loss: 0.3376 - learning_rate: 2.5000e-04
Epoch 24/30
329/329 ━━━━━━━━━━━━━━━━━━━━ 4s 11ms/step - accuracy: 0.8900 - loss: 0.3350 - val_accuracy: 0.9088 - val_loss: 0.3132 - learning_rate: 2.5000e-04
Epoch 25/30
329/329 ━━━━━━━━━━━━━━━━━━━━ 4s 13ms/step - accuracy: 0.8934 - loss: 0.3248 - val_accuracy: 0.9163 - val_loss: 0.2959 - learning_rate: 2.5000e-04
Epoch 26/30
329/329 ━━━━━━━━━━━━━━━━━━━━ 4s 11ms/step - accuracy: 0.8918 - loss: 0.3207 - val_accuracy: 0.9167 - val_loss: 0.2941 - learning_rate: 2.5000e-04
Epoch 27/30
329/329 ━━━━━━━━━━━━━━━━━━━━ 4s 11ms/step - accuracy: 0.8945 - loss: 0.3148 - val_accuracy: 0.9140 - val_loss: 0.2991 - learning_rate: 2.5000e-04
Epoch 28/30
329/329 ━━━━━━━━━━━━━━━━━━━━ 4s 13ms/step - accuracy: 0.8972 - loss: 0.3090 - val_accuracy: 0.9192 - val_loss: 0.2845 - learning_rate: 2.5000e-04
Epoch 29/30
329/329 ━━━━━━━━━━━━━━━━━━━━ 4s 12ms/step - accuracy: 0.8975 - loss: 0.3064 - val_accuracy: 0.9176 - val_loss: 0.2913 - learning_rate: 2.5000e-04
Epoch 30/30
329/329 ━━━━━━━━━━━━━━━━━━━━ 4s 11ms/step - accuracy: 0.9001 - loss: 0.2981 - val_accuracy: 0.9096 - val_loss: 0.3238 - learning_rate: 2.5000e-04
Restoring model weights from the end of the best epoch: 28.

# Compare: raw CNN2 vs masked CNN2
y_pred_dm  = np.argmax(cnn2_dm.predict(X_test_dm, verbose=0), axis=1)
y_true     = np.argmax(y_test_ohe, axis=1)
acc_raw    = np.mean(y_pred_cnn2 == y_true)
acc_dm     = np.mean(y_pred_dm   == y_true)

# Side-by-side training curves
fig, axes = plt.subplots(1, 2, figsize=(14, 5))
fig.suptitle('CNN Model 2 — Raw inputs vs Data-Driven Mask',
             fontsize=13, fontweight='bold')
axes[0].plot(history_cnn2.history['accuracy'],     color='steelblue', label='Raw train')
axes[0].plot(history_cnn2.history['val_accuracy'], color='steelblue', linestyle='--', label='Raw val')
axes[0].plot(history_dm.history['accuracy'],       color='seagreen',  label='Mask train')
axes[0].plot(history_dm.history['val_accuracy'],   color='seagreen',  linestyle='--', label='Mask val')
axes[0].set_title('Accuracy'); axes[0].set_xlabel('Epoch')
axes[0].legend(); axes[0].grid(True, alpha=0.3)

axes[1].plot(history_cnn2.history['loss'],     color='steelblue', label='Raw train')
axes[1].plot(history_cnn2.history['val_loss'], color='steelblue', linestyle='--', label='Raw val')
axes[1].plot(history_dm.history['loss'],       color='seagreen',  label='Mask train')
axes[1].plot(history_dm.history['val_loss'],   color='seagreen',  linestyle='--', label='Mask val')
axes[1].set_title('Loss'); axes[1].set_xlabel('Epoch')
axes[1].legend(); axes[1].grid(True, alpha=0.3)

plt.tight_layout(); plt.show()

print(f'CNN Model 2 — Raw inputs      : {acc_raw:.4f} ({acc_raw:.2%})')
print(f'CNN Model 2 — Data-driven mask: {acc_dm:.4f} ({acc_dm:.2%})')
print(f'Delta                         : {acc_dm - acc_raw:+.4f} ({(acc_dm-acc_raw)*100:+.2f}%)')

CNN Model 2 — Raw inputs      : 0.9222 (92.22%)
CNN Model 2 — Data-driven mask: 0.8711 (87.11%)
Delta                         : -0.0511 (-5.11%)

np.random.seed(99)
sample_idx    = np.random.choice(len(X_test), size=20, replace=False)
sample_probs  = cnn2.predict(X_test_cnn[sample_idx], verbose=0)
sample_preds  = np.argmax(sample_probs, axis=1)
sample_confs  = np.max(sample_probs, axis=1)
sample_true   = y_test_labels_cnn[sample_idx]

fig, axes = plt.subplots(4, 5, figsize=(15, 13))
fig.suptitle('CNN Model 2 — Predictions on 20 Random Test Images\n'
             'Green = correct  |  Red = incorrect',
             fontsize=14, fontweight='bold')

for i, ax in enumerate(axes.flat):
    ax.imshow(X_test[sample_idx[i]], cmap='gray')
    correct = sample_preds[i] == sample_true[i]
    color   = 'green' if correct else 'red'
    for spine in ax.spines.values():
        spine.set_edgecolor(color)
        spine.set_linewidth(4)
    ax.set_title(
        f'True: {sample_true[i]}   Pred: {sample_preds[i]}\n'
        f'Conf: {sample_confs[i]:.1%}',
        fontsize=9, color=color
    )
    ax.set_xticks([]); ax.set_yticks([])

plt.tight_layout(); plt.show()

n_correct = (sample_preds == sample_true).sum()
print(f'Correct: {n_correct}/20')
for i in range(20):
    if sample_preds[i] != sample_true[i]:
        print(f'  Wrong — True: {sample_true[i]}  Pred: {sample_preds[i]}  '
              f'Conf: {sample_confs[i]:.1%}')

Correct: 19/20
  Wrong — True: 2  Pred: 7  Conf: 54.0%

# Compute test accuracies
def test_acc(model, X_te, y_te_ohe):
    y_pred = np.argmax(model.predict(X_te, verbose=0), axis=1)
    y_true = np.argmax(y_te_ohe, axis=1)
    return np.mean(y_pred == y_true)

results = [
    ('ANN Model 1', '2 layers (64→32)',                  max(history_ann1.history['accuracy']),
     max(history_ann1.history['val_accuracy']),          test_acc(ann1, X_test_ann, y_test_ohe),
     'Shallow baseline — no spatial awareness'),
    ('ANN Model 2', '5 layers + Dropout + BatchNorm',    max(history_ann2.history['accuracy']),
     max(history_ann2.history['val_accuracy']),          test_acc(ann2, X_test_ann, y_test_ohe),
     'Deeper — still no spatial structure'),
    ('CNN Model 1', '2 conv blocks',                     max(history_cnn1.history['accuracy']),
     max(history_cnn1.history['val_accuracy']),          test_acc(cnn1, X_test_cnn, y_test_ohe),
     'Spatial features work — overfits without Dropout'),
    ('CNN Model 2', '4 conv blocks + BatchNorm + Dropout', max(history_cnn2.history['accuracy']),
     max(history_cnn2.history['val_accuracy']),          test_acc(cnn2, X_test_cnn, y_test_ohe),
     'Best — depth + regularization + clean train/val gap'),
]

df = pd.DataFrame(results,
    columns=['Model','Architecture','Train Acc','Val Acc','Test Acc','Key characteristic'])
for col in ['Train Acc','Val Acc','Test Acc']:
    df[col] = df[col].map('{:.2%}'.format)
print(df.to_string(index=False))

      Model                        Architecture Train Acc Val Acc Test Acc                                  Key characteristic
ANN Model 1                    2 layers (64→32)    64.02%  63.86%   63.57%             Shallow baseline — no spatial awareness
ANN Model 2      5 layers + Dropout + BatchNorm    77.59%  79.73%   77.33%                 Deeper — still no spatial structure
CNN Model 1                       2 conv blocks    99.08%  95.17%   87.30%    Spatial features work — overfits without Dropout
CNN Model 2 4 conv blocks + BatchNorm + Dropout    96.26%  96.88%   92.22% Best — depth + regularization + clean train/val gap

# Bar chart
models  = ['ANN 1', 'ANN 2', 'CNN 1', 'CNN 2']
t_accs  = [test_acc(ann1, X_test_ann, y_test_ohe),
            test_acc(ann2, X_test_ann, y_test_ohe),
            test_acc(cnn1, X_test_cnn, y_test_ohe),
            test_acc(cnn2, X_test_cnn, y_test_ohe)]
colors  = ['#5b9bd5','#2e75b6','#ed7d31','#c55a11']

fig, ax = plt.subplots(figsize=(10, 6))
bars = ax.bar(models, t_accs, color=colors, edgecolor='white', width=0.5)
for bar, acc in zip(bars, t_accs):
    ax.text(bar.get_x()+bar.get_width()/2, bar.get_height()+0.003,
            f'{acc:.1%}', ha='center', va='bottom', fontweight='bold', fontsize=11)
ax.set_ylabel('Test Accuracy'); ax.set_ylim(0, 1.05)
ax.set_title('Test Accuracy by Model', fontsize=14, fontweight='bold')
ax.axhline(0.9, color='grey', linestyle=':', linewidth=1, alpha=0.7)
ax.grid(axis='y', alpha=0.3)
plt.tight_layout(); plt.show()

y_pred_all  = np.argmax(cnn2.predict(X_test_cnn, verbose=0), axis=1)
y_true_all  = np.argmax(y_test_ohe, axis=1)
y_probs_all = cnn2.predict(X_test_cnn, verbose=0)

wrong_idx = np.where(y_pred_all != y_true_all)[0]
print(f'Misclassified: {len(wrong_idx)}/{len(y_true_all)} ({100*len(wrong_idx)/len(y_true_all):.1f}%)')

np.random.seed(7)
sample_wrong = np.random.choice(wrong_idx, size=min(20, len(wrong_idx)), replace=False)

fig, axes = plt.subplots(4, 5, figsize=(15, 13))
fig.suptitle('CNN Model 2 — Misclassified Test Images\n'
             'True label → Predicted label (confidence)',
             fontsize=13, fontweight='bold')

for i, ax in enumerate(axes.flat):
    if i >= len(sample_wrong):
        ax.axis('off'); continue
    idx  = sample_wrong[i]
    conf = float(y_probs_all[idx][y_pred_all[idx]])
    ax.imshow(X_test[idx], cmap='gray')
    ax.set_title(f'True:{y_true_all[idx]} → Pred:{y_pred_all[idx]}\nConf:{conf:.1%}',
                 fontsize=9, color='red')
    for spine in ax.spines.values():
        spine.set_edgecolor('red'); spine.set_linewidth(3)
    ax.set_xticks([]); ax.set_yticks([])

plt.tight_layout(); plt.show()

Misclassified: 1401/18000 (7.8%)

Finding	What it changed
The H5 file has an explicit validation set (`X_val`, 60,000 images)	Used directly — no point carving one out of training data
Labels already in range 0–9 (confirmed at runtime)	No remapping needed
69.4% of images have dark backgrounds	Background normalization inverts them for consistency
Each crop shows more than one digit	Only the center one is labeled — the rest is noise
CNN learns center-focus on its own	Data-driven spatial mask scored 87.11% vs raw 92.22% (−5.11%)
CNN Model 1 overfits severely	99.08% train vs 87.30% test — fixed with Dropout(0.5) in Model 2

Finding	Implication
Classes balanced (~10% each)	Accuracy is a valid metric. No resampling needed.
High intra-class visual variation	Model needs sufficient capacity — shallow architectures will plateau early.
Each crop contains neighboring digits	The center digit is the label — neighbors are noise the model must learn to discount.
Mean images show center-dominant signal	Spatial structure is consistent and learnable — CNNs are well-suited to exploit this.

Approach	Problem	Outcome
Rectangular mask (columns 8-24)	Assumes a fixed horizontal band	Too rigid
Gaussian radial mask	Assumes circular falloff	Replaced by data-driven approach
DBSCAN clustering	Should find blobs without needing K	Failed — 32x32 is too small
Localization NN (bbox regression)	Learns digit position	−5% vs raw — noisy labels hurt
Mean image mask	No assumption — derived from the data	Tested below

Model	Train Acc	Val Acc	Test Acc
ANN Model 1	64.02%	63.86%	63.57%
ANN Model 2	77.59%	79.73%	77.33%
CNN Model 1	99.08%	95.17%	87.30%
CNN Model 2	96.26%	96.88%	92.22%

Deep Learning Project: Street View Housing Number Digit Recognition¶

Marks: 60¶

Context¶

Objective¶

Dataset¶

Things I found out before writing any model¶

Mount the drive¶

Importing the necessary libraries¶

Global Definitions — Seeds, Model Builders, Utilities¶

Why define models here rather than inline?¶

Load the dataset¶

Inspect the H5 file structure¶

Visualizing images¶

Exploratory Data Analysis (EDA)¶

1. Class Distribution¶

2. Sample Images per Class¶

3. Mean Image per Class¶

EDA Summary¶

Data preparation¶

Normalize the train and the test data¶

One-hot encode output¶

Model Building — ANN¶

Model Architecture — ANN Model 1¶

Build and train an ANN model as per the above mentioned architecture.¶

Plot the Training and Validation Accuracies and write down your Observations.¶

Second Model Architecture — ANN Model 2¶

Build and train the new ANN model as per the above mentioned architecture.¶

Plot the Training and Validation Accuracies and write down your Observations.¶

Predictions on the test data — ANN¶

Print the classification report and confusion matrix. Write your observations.¶

Using Convolutional Neural Networks¶

Load the dataset again and split into train, validation and test.¶

Data preparation for CNN¶

One-hot encode the labels in y_train, y_val and y_test.¶

Model Building — CNN¶

Model Architecture — CNN Model 1¶

Build and train a CNN model as per the above mentioned architecture.¶

Plot the Training and Validation Accuracies and write your observations.¶

Second Model Architecture — CNN Model 2¶

Build and train the second CNN model as per the above mentioned architecture.¶

Plot the Training and Validation accuracies and write your observations.¶

Predictions on the test data — CNN¶

Make predictions on the test data using the second model.¶

Write your final observations on the performance of the model on the test data.¶

Section 5 — Preprocessing Experiment: Data-Driven Spatial Mask¶

The question¶

Why a data-driven mask and not a geometric one?¶

How the mask is built¶

Section 7 — Full Model Comparison¶

Section 8 — Error Analysis: Misclassified Images¶

Final Conclusions¶