I found a neat library for Astronomy datasets. AstroNN Is a collection of Keras-driven neural networks for astronomy data.
Getting Started
Install AstroNN
pip install astroNN matplotlib sklearn
Create a Jupyter notebook
jupyter notebook
Paste the following into the IN
%matplotlib inline%config InlineBackend.figure_format='retina'# import everything we need firstfrom tensorflow.keras import utilsimport numpy as npfrom sklearn.model_selection import train_test_splitimport pylab as pltfrom astroNN.models import Galaxy10CNNfrom astroNN.datasets import galaxy10from astroNN.datasets.galaxy10 import galaxy10cls_lookup, galaxy10_confusion# To load images and labels (will download automatically at the first time)# First time downloading location will be ~/.astroNN/datasets/images, labels = galaxy10.load_data()# To convert the labels to categorical 10 classeslabels = utils.to_categorical(labels, 10)# Select 10 of the images to inspectimg = Noneplt.ion()print('===================Data Inspection===================')for counter, i in enumerate(range(np.random.randint(0, labels.shape[0], size=10).shape[0])): img = plt.imshow(images[i]) plt.title('Class {}: {} \n Random Demo images {} of 10'.format(np.argmax(labels[i]), galaxy10cls_lookup(labels[i]), counter+1)) plt.draw() plt.pause(2.)plt.close('all')print('===============Data Inspection Finished===============')# To convert to desirable typelabels = labels.astype(np.float32)images = images.astype(np.float32)# Split the dataset into training set and testing settrain_idx, test_idx = train_test_split(np.arange(labels.shape[0]), test_size=0.1)train_images, train_labels, test_images, test_labels = images[train_idx], labels[train_idx], images[test_idx], labels[test_idx]# To create a neural network instancegalaxy10net = Galaxy10CNN()# set maximium epochs the neural network can run, set 5 to get quick resultgalaxy10net.max_epochs = 5# To train the nerual net# astroNN will normalize the data by defaultgalaxy10net.train(train_images, train_labels)# print model summary before traininggalaxy10net.keras_model.summary()# After the training, you can test the neural net performance# Please notice predicted_labels are labels predicted from neural network. test_labels are ground truth from the datasetpredicted_labels = galaxy10net.test(test_images)# Convert predicted_labels to classprediction_class = np.argmax(predicted_labels, axis=1)# Convert test_labels to classtest_class = np.argmax(test_labels, axis=1)# Prepare a confusion matrixconfusion_matrix = np.zeros((10,10))# create the confusion matrixfor counter, i in enumerate(prediction_class): confusion_matrix[i, test_class[counter]] += 1# Plot the confusion matrixgalaxy10_confusion(confusion_matrix)