Skip to content

Commit d74290c

Browse files
committed
3 datasets, 4 models, 4 hardening techniques
1 parent 8c84bf0 commit d74290c

33 files changed

Lines changed: 1458 additions & 1592 deletions

manuscripts/Poison26/bin/train/AUDIOMNIST/MobileNet/MobileNet.py

Lines changed: 2 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -9,14 +9,12 @@ class DepthwiseSeparableConv(nn.Module):
99
def __init__(self, in_channels, out_channels, stride=1):
1010
super(DepthwiseSeparableConv, self).__init__()
1111

12-
# Depthwise layer with BN and ReLU6
1312
self.depthwise = nn.Sequential(
1413
nn.Conv2d(in_channels, in_channels, kernel_size=3, stride=stride, padding=1, groups=in_channels, bias=False),
1514
nn.BatchNorm2d(in_channels),
1615
nn.ReLU6(inplace=True)
1716
)
1817

19-
# Pointwise layer with BN and ReLU6
2018
self.pointwise = nn.Sequential(
2119
nn.Conv2d(in_channels, out_channels, kernel_size=1, stride=1, padding=0, bias=False),
2220
nn.BatchNorm2d(out_channels),
@@ -32,10 +30,9 @@ def forward(self, x):
3230
# MobileNetV1 (Model Definition)
3331
# -------------------------
3432
class MobileNet(nn.Module):
35-
def __init__(self, one_batch=None, num_classes=1000):
33+
def __init__(self, one_batch=None, num_classes=10):
3634
super(MobileNet, self).__init__()
3735

38-
# Handle dynamic input sizes
3936
if one_batch is not None:
4037
_, in_channels, H, W = one_batch.shape
4138
self.input_channels = in_channels
@@ -44,18 +41,12 @@ def __init__(self, one_batch=None, num_classes=1000):
4441
self.input_channels = 3
4542
self.input_size = (3, 224, 224)
4643

47-
# -------------------------
48-
# Stem
49-
# -------------------------
5044
self.stem = nn.Sequential(OrderedDict([
5145
('conv1', nn.Conv2d(self.input_channels, 32, kernel_size=3, stride=2, padding=1, bias=False)),
5246
('bn1', nn.BatchNorm2d(32)),
5347
('relu1', nn.ReLU6(inplace=True)),
5448
]))
5549

56-
# -------------------------
57-
# Full MobileNetV1 Architecture
58-
# -------------------------
5950
layers = [
6051
DepthwiseSeparableConv(32, 64, stride=1),
6152
DepthwiseSeparableConv(64, 128, stride=2),
@@ -65,29 +56,19 @@ def __init__(self, one_batch=None, num_classes=1000):
6556
DepthwiseSeparableConv(256, 512, stride=2)
6657
]
6758

68-
# 5x repeating blocks of 512 channels
6959
for _ in range(5):
7060
layers.append(DepthwiseSeparableConv(512, 512, stride=1))
7161

72-
# Final expansion to 1024 channels
7362
layers.extend([
7463
DepthwiseSeparableConv(512, 1024, stride=2),
7564
DepthwiseSeparableConv(1024, 1024, stride=1)
7665
])
7766

78-
# Pack the layers into an nn.Sequential for cleaner forward pass
7967
self.features = nn.Sequential(*layers)
80-
81-
# -------------------------
82-
# Classifier Setup
83-
# -------------------------
8468
self.pool = nn.AdaptiveAvgPool2d(1)
8569
self.fc_input_features = self._get_flattened_feature_size(one_batch)
8670
self.fc = nn.Linear(self.fc_input_features, num_classes)
8771

88-
# -------------------------
89-
# Compute FC feature size dynamically
90-
# -------------------------
9172
def _get_flattened_feature_size(self, one_batch):
9273
was_training = self.training
9374
self.eval()
@@ -109,13 +90,10 @@ def _get_flattened_feature_size(self, one_batch):
10990

11091
return out_features
11192

112-
# -------------------------
113-
# Forward
114-
# -------------------------
11593
def forward(self, x):
11694
x = self.stem(x)
11795
x = self.features(x)
11896
x = self.pool(x)
11997
x = torch.flatten(x, 1)
12098
x = self.fc(x)
121-
return x
99+
return x

manuscripts/Poison26/bin/train/AUDIOMNIST/MobileNet/model_aug.py

Lines changed: 72 additions & 73 deletions
Original file line numberDiff line numberDiff line change
@@ -7,51 +7,46 @@
77

88
import torch
99
import torch.nn as nn
10+
import torch.nn.functional as F
1011
import torch.optim as optim
11-
from torch.utils.data import DataLoader, Dataset
12+
from torch.utils.data import DataLoader, Dataset, Subset
1213

1314
import librosa
1415
from sklearn.metrics import roc_auc_score, average_precision_score
1516

1617
import hashlib
1718
import csv
18-
19-
# ----------------------------
20-
# Import the 2D Vision Models
21-
# ----------------------------
22-
from RegNetX import RegNetX_400MF
23-
from MobileNet import MobileNet
24-
# from ConvNetX import ConvNeXt
19+
import random
2520

2621
# ----------------------------
2722
# Constants
2823
# ----------------------------
2924
SAMPLING_RATE = 16000
3025
NUM_CLASSES = 10
3126
MAX_AUDIO_LENGTH = 16000
32-
BATCH_SIZE = 32
3327

3428
# ----------------------------
3529
# Audio Preprocessing
3630
# ----------------------------
3731
def normalize_audio(x):
38-
return x / np.max(np.abs(x))
32+
max_val = np.max(np.abs(x))
33+
return x / max_val if max_val > 0 else x
3934

4035
def pad_audio(audio, max_len=MAX_AUDIO_LENGTH):
4136
return audio[:max_len] if len(audio) > max_len else np.pad(audio, (0, max_len - len(audio)), 'constant')
4237

4338
# ----------------------------
44-
# Dataset
39+
# Dataset & Wrapper
4540
# ----------------------------
46-
class AudioMNISTDataset(Dataset):
41+
class AudioMNISTBaseDataset(Dataset):
42+
"""Loads all audio into memory ONCE. Returns raw numpy arrays."""
4743
def __init__(self, data_path):
4844
self.data = []
4945
self.labels = []
5046

5147
wav_files = glob.glob(os.path.join(data_path, '*', '*.wav'))
52-
# Deterministic shuffle using md5 hash of path
5348
wav_files = sorted(wav_files, key=lambda x: hashlib.md5(x.encode()).hexdigest())
54-
self.wav_files = wav_files.copy() # store for TSV
49+
self.wav_files = wav_files.copy()
5550

5651
for audio_path in tqdm(wav_files, desc="Loading audio files"):
5752
audio, _ = librosa.load(audio_path, sr=SAMPLING_RATE)
@@ -65,12 +60,60 @@ def __len__(self):
6560
return len(self.data)
6661

6762
def __getitem__(self, idx):
68-
audio = torch.tensor(self.data[idx], dtype=torch.float32).unsqueeze(0)
69-
label = self.labels[idx]
70-
return audio, label
63+
return self.data[idx], self.labels[idx]
64+
65+
class AudioSubsetWrapper(Dataset):
66+
"""Wraps a subset to apply dynamic augmentation and convert to Tensors."""
67+
def __init__(self, subset, augment=False):
68+
self.subset = subset
69+
self.augment = augment
70+
71+
def __len__(self):
72+
return len(self.subset)
73+
74+
def apply_augmentation(self, x):
75+
if random.random() < 0.5:
76+
x = np.clip(x + np.random.randn(len(x)) * 0.005, -1.0, 1.0) # noise
77+
if random.random() < 0.5:
78+
x = np.roll(x, np.random.randint(-200, 200)) # time shift
79+
if random.random() < 0.5:
80+
x = np.clip(x * np.random.uniform(0.8, 1.2), -1.0, 1.0) # random gain
81+
return x
82+
83+
def __getitem__(self, idx):
84+
x, y = self.subset[idx]
85+
if self.augment:
86+
x = self.apply_augmentation(x)
87+
x = torch.tensor(x, dtype=torch.float32).unsqueeze(0) # (1, length)
88+
return x, y
89+
90+
def load_data(data_path, batch_size, augment_train=False, split_tsv="split_indices_standard.tsv"):
91+
base_dataset = AudioMNISTBaseDataset(data_path)
92+
93+
train_size = int(0.8 * len(base_dataset))
94+
train_indices = list(range(0, train_size))
95+
test_indices = list(range(train_size, len(base_dataset)))
96+
97+
# Isolate augmentation using the wrapper
98+
train_dataset = AudioSubsetWrapper(Subset(base_dataset, train_indices), augment=augment_train)
99+
test_dataset = AudioSubsetWrapper(Subset(base_dataset, test_indices), augment=False)
100+
101+
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
102+
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)
103+
104+
with open(split_tsv, "w", newline="") as f:
105+
writer = csv.writer(f, delimiter="\t")
106+
writer.writerow(["index", "split", "label", "file_path"])
107+
for idx in train_indices:
108+
writer.writerow([idx, "train", base_dataset.labels[idx], base_dataset.wav_files[idx]])
109+
for idx in test_indices:
110+
writer.writerow([idx, "test", base_dataset.labels[idx], base_dataset.wav_files[idx]])
111+
print(f"Saved split information to {split_tsv}")
112+
113+
return train_loader, test_loader
71114

72115
# ----------------------------
73-
# Model Definition (Updated Wrapper)
116+
# Model Definition
74117
# ----------------------------
75118
class AudioMNISTModel(nn.Module):
76119
"""
@@ -97,41 +140,12 @@ def forward(self, x):
97140
x = x.view(x.size(0), *self.reshape_dims)
98141
return self.backbone(x)
99142

100-
# ----------------------------
101-
# Load Data
102-
# ----------------------------
103-
def load_data(data_path, batch_size, split_tsv="split_indices_model1.tsv"):
104-
dataset = AudioMNISTDataset(data_path)
105-
# Fixed 80/20 split (after deterministic shuffle)
106-
train_size = int(0.8 * len(dataset))
107-
train_indices = list(range(0, train_size))
108-
test_indices = list(range(train_size, len(dataset)))
109-
110-
train_dataset = torch.utils.data.Subset(dataset, range(0, train_size))
111-
test_dataset = torch.utils.data.Subset(dataset, range(train_size, len(dataset)))
112-
113-
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
114-
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)
115-
116-
# --- Write split info to TSV ---
117-
with open(split_tsv, "w", newline="") as f:
118-
writer = csv.writer(f, delimiter="\t")
119-
writer.writerow(["index", "split", "label", "file_path"])
120-
for idx in train_indices:
121-
writer.writerow([idx, "train", dataset.labels[idx], dataset.wav_files[idx]])
122-
for idx in test_indices:
123-
writer.writerow([idx, "test", dataset.labels[idx], dataset.wav_files[idx]])
124-
print(f"Saved split information to {split_tsv}")
125-
126-
return train_loader, test_loader
127-
128143
# ----------------------------
129144
# Training loop
130145
# ----------------------------
131146
def train(model, train_loader, device, epochs=10, lr=0.001):
132147
criterion = nn.CrossEntropyLoss()
133148
optimizer = optim.Adam(model.parameters(), lr=lr)
134-
135149
model.to(device)
136150
model.train()
137151

@@ -144,7 +158,7 @@ def train(model, train_loader, device, epochs=10, lr=0.001):
144158
for images, labels in tqdm(train_loader, desc=f"Epoch {epoch+1}/{epochs}", unit="batch"):
145159
images, labels = images.to(device), labels.to(device)
146160

147-
optimizer.zero_grad()
161+
optimizer.zero_grad(set_to_none=True)
148162
outputs = model(images)
149163
loss = criterion(outputs, labels)
150164
loss.backward()
@@ -160,7 +174,6 @@ def train(model, train_loader, device, epochs=10, lr=0.001):
160174
elapsed = time.time() - start_time
161175
print(f"Epoch {epoch+1} finished in {elapsed:.2f}s - Loss: {avg_loss:.4f}, Accuracy: {avg_acc:.4f}")
162176

163-
164177
# ----------------------------
165178
# Evaluation
166179
# ----------------------------
@@ -196,9 +209,8 @@ def evaluate_model(model, test_loader, device):
196209

197210
y_true = np.array(y_true)
198211
y_pred = np.array(y_pred)
199-
200-
# compute AUROC and AUPRC
201-
y_true_onehot = np.eye(10)[y_true]
212+
y_true_onehot = np.eye(NUM_CLASSES)[y_true]
213+
202214
auroc = roc_auc_score(y_true_onehot, y_pred, multi_class="ovr")
203215
auprc = average_precision_score(y_true_onehot, y_pred)
204216

@@ -207,42 +219,29 @@ def evaluate_model(model, test_loader, device):
207219
print(f"Test auROC: {auroc:.4f}")
208220
print(f"Test auPRC: {auprc:.4f}")
209221

210-
211222
# ----------------------------
212223
# Main
213224
# ----------------------------
214225
def main():
215-
parser = argparse.ArgumentParser(description="MNIST training code (PyTorch) with Augmentation")
216-
parser.add_argument("--output", type=str, default="mnist_model_aug.pt", help="Model output name")
226+
parser = argparse.ArgumentParser(description="AudioMNIST Augmented Training")
227+
parser.add_argument("--data", type=str, default="./data/AudioMNIST", help="Path to dataset")
228+
parser.add_argument("--output", type=str, default="audiomnist_aug.pt", help="Model output name")
217229
parser.add_argument("--batch-size", type=int, default=64)
218-
parser.add_argument("--epochs", type=int, default=5, help="Number of training epochs")
230+
parser.add_argument("--epochs", type=int, default=10)
219231
args = parser.parse_args()
220232

221233
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
234+
model = AudioMNISTModel(backbone_class=MobileNet, num_classes=NUM_CLASSES)
222235

223-
224-
# Load data
225-
train_loader, test_loader = load_data(batch_size=args.batch_size)
226-
227-
# Initialize model
228-
# Pass a dummy batch to configure the MobileNet stem for 1-channel MNIST images
229-
# and properly calculate the fully-connected layer inputs for 28x28 resolution.
230-
231-
dummy_batch = train_loader.dataset[0][0].unsqueeze(0)
232-
model = MobileNet(one_batch=dummy_batch, num_classes=10)
236+
# ENABLE DATA AUGMENTATION HERE
237+
train_loader, test_loader = load_data(args.data, args.batch_size, augment_train=True, split_tsv="split_indices_aug.tsv")
233238

234-
235-
# Train
236239
train(model, train_loader, device, epochs=args.epochs)
237-
238-
# Save model
239240
torch.save(model.state_dict(), args.output)
240241
print(f"Model saved to {args.output}")
241242

242-
# Evaluate
243-
print("Model statistics on test dataset")
243+
print("Model statistics on clean test dataset")
244244
evaluate_model(model, test_loader, device)
245245

246-
247246
if __name__ == "__main__":
248247
main()

0 commit comments

Comments
 (0)