%matplotlib inline
import numpy as np
import matplotlib.pyplot as plt
import torch
from sklearn import metrics
import os

import py21cmnet
from py21cmnet.data import DATA_PATH
from py21cmnet.config import CONFIG_PATH

torch.set_default_dtype(torch.float32)

# can toggle between CPU, Apple-GPU, NVIDIA-GPU

torch.set_default_device('cpu')
#torch.set_default_device('mps')
#torch.set_default_device('cuda')

# load a dataset
fname = os.path.join(DATA_PATH, "train_21cmfast_basic.h5")
X, y = py21cmnet.utils.read_test_data(fname, ndim=2)

# only use the first channel in X and y
X = X[:, :1]
y = y[:, :1]

# visualize the data
fig, axes = plt.subplots(1, 2, figsize=(11, 6))
cax = axes[0].imshow(X[0,0].cpu())
axes[0].set_title('Density Field')
axes[0].set_xlabel('X [Mpc]'); axes[0].set_ylabel('Y [Mpc]')
fig.colorbar(cax, ax=axes[0], fraction=0.0473, pad=.01)

cax = axes[1].imshow(y[0,0].cpu(), cmap='bone')
axes[1].set_title('Neutral Fraction Field [0,1]')
axes[1].set_xlabel('X [Mpc]'); axes[1].set_ylabel('Y [Mpc]')
fig.colorbar(cax, ax=axes[1], fraction=0.0473, pad=.01);

# load a vanilla model with skip connections
params = py21cmnet.utils.load_autoencoder_params(os.path.join(CONFIG_PATH, "autoencoder.yaml"),
                                                 os.path.join(CONFIG_PATH, "autoencoder2d_defaults.yaml"))

# modify for the dataset we are using with only 1 input (density field) and output (ionization field) channel
params['encoder_layers'][0]['conv_layers'][0]['conv_kwargs']['in_channels'] = 1
params['decoder_layers'][-1]['conv_layers'][-1]['conv_kwargs']['out_channels'] = 1
params['final_transforms'] = params['final_transforms'][:1]

model = py21cmnet.models.AutoEncoder(**params)

model.encoder[0]

Encoder(
  (model): Sequential(
    (0): ConvNd(
      (model): Sequential(
        (0): Conv2d(1, 12, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), padding_mode=circular)
        (1): ReLU()
        (2): BatchNorm2d(12, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      )
    )
    (1): ConvNd(
      (model): Sequential(
        (0): Conv2d(12, 12, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), padding_mode=circular)
        (1): ReLU()
        (2): BatchNorm2d(12, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      )
    )
  )
  (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
)

# split the data into test and train
np.random.seed(0)
select = np.zeros(len(X), dtype=bool)
select[np.random.choice(np.arange(len(X)), 100, replace=False)] = True

X_train = X[select]
y_train = y[select]
X_test = X[~select]
y_test = y[~select]

# load data into a DataLoader: we will augment the images by include a random X & Y "roll"
ds_train = py21cmnet.dataset.BoxDataset(X_train, y_train,
                                        py21cmnet.utils.load_dummy,
                                        transform=py21cmnet.dataset.Roll(ndim=2))
dl_train = torch.utils.data.DataLoader(ds_train, batch_size=1)

# note that we do not perform augmentation on test set!
ds_test = py21cmnet.dataset.BoxDataset(X_test, y_test,
                                       py21cmnet.utils.load_dummy)
dl_test = torch.utils.data.DataLoader(ds_test, batch_size=len(X_test))

# define an accuracy function
def acc_fn(pred, true):
    # binarize the classes
    pred = pred.round()
    true = true.round()
    return (pred == true).sum() / pred.numel()

%%time
# train the model with the Adam optimizer for two epochs
info = py21cmnet.utils.train(model, dl_train, torch.nn.MSELoss(reduction='mean'), torch.optim.Adam, verbose=False,
                             optim_kwargs=dict(lr=0.01), Nepochs=5, valid_dloader=dl_test, acc_fn=acc_fn)

CPU times: user 19.9 s, sys: 31.4 s, total: 51.3 s
Wall time: 18.6 s

fig, axes = plt.subplots(1, 2, figsize=(12, 5))
axes[0].plot(info['train_loss'], c='k')
axes[0].set_xlabel('mini-batch'); axes[0].set_ylabel('training loss')
axes[1].plot(info['valid_acc'], c='k')
axes[1].set_xlabel('epoch'); axes[1].set_ylabel('validation accuracy')

Text(0, 0.5, 'validation accuracy')

with torch.no_grad():
    y_pred = model(X_test)

fig, axes = plt.subplots(1, 2, figsize=(11, 6))
cax = axes[0].imshow(y_pred[0,0].cpu(), cmap='bone')
axes[0].set_title('Model Prediction')
axes[0].set_xlabel('X [Mpc]'); axes[0].set_ylabel('Y [Mpc]')
fig.colorbar(cax, ax=axes[0], fraction=0.0473, pad=.01)

cax = axes[1].imshow(y_test[0,0].cpu(), cmap='bone')
axes[1].set_title('Test Set')
axes[1].set_xlabel('X [Mpc]');
fig.colorbar(cax, ax=axes[1], fraction=0.0473, pad=.01);

fig, axes = plt.subplots(1, 2, figsize=(11, 6))
cax = axes[0].imshow(y_pred[0,0].round(), cmap='bone')
axes[0].set_title('Rounded Model Prediction')
axes[0].set_xlabel('X [Mpc]'); axes[0].set_ylabel('Y [Mpc]')
fig.colorbar(cax, ax=axes[0], fraction=0.0473, pad=.01)

cax = axes[1].imshow(y_test[0,0].round(), cmap='bone')
axes[1].set_title('Rounded Test Set')
axes[1].set_xlabel('X [Mpc]');
fig.colorbar(cax, ax=axes[1], fraction=0.0473, pad=.01);

conf_mat = metrics.confusion_matrix(y_test.round().ravel(), y_pred.round().ravel())

disp = metrics.ConfusionMatrixDisplay(conf_mat, )
disp.plot()

<sklearn.metrics._plot.confusion_matrix.ConfusionMatrixDisplay at 0x2d4daa550>

# get roc and auc
fpr, tpr, thresh = metrics.roc_curve(y_test.round().ravel(), y_pred.ravel())
auc = metrics.auc(fpr, tpr)

plt.figure(figsize=(5,5))
plt.plot(fpr, tpr)
plt.plot([0,1],[0,1], c='k', ls='--')
plt.title("AUC = {:.2f}".format(auc))

Text(0.5, 1.0, 'AUC = 0.96')

Contents¶

1. Visualizing the Data ¶

2. Build and Train a U-Net Model ¶

1. Visualize the data¶

The task:¶

2. Build and Train a 2D UNet model¶

Contents¶

1. Visualizing the Data¶

2. Build and Train a U-Net Model¶

1. Visualize the data¶

The task:¶

2. Build and Train a 2D UNet model¶

1. Visualizing the Data ¶

2. Build and Train a U-Net Model ¶