This project was originally centered upon the open Pybrain python module. The auroral analysis is a neural network that uses backpropagation on a O(1024) neuron network to attempt the detection of aurora in small night-sky images. However, I had difficulties overcoming what may have been a memory problem with pybrain, though I did not ever have the time to completely understand the issue.

TensorFlow

In early 2016 I moved to TensorFlow, an open-source machine learning library developed by Google. Most of the work in setting this up was converting my dataset to the mnist idx format. After that, I used an initial steepest descent approach on 50 test images to bring the predictions up to 90% accuracy, followed by 500 steps of adaptive stochastic gradient training (ADAM) on clusters of 50 test images to reach 96% accuracy after step 100, 98% accuracy after step 300, and 100% accuracy over the final dataset after step 500.

I probably won't spend the time putting this online because as it turned out, not much original work was needed to get great performance. Following the advanced section of the tutorial on TensorFlow:Deep MNIST for Experts should be sufficient!

Original Pybrain code

I split the code into three segments: initializing the network, training the network, and testing the network. Fortunately, a library of night-sky images exists in static jpg form at USask Aurora Webcam.

Currently the neural network does not fully converge. My suspicion is that too few neurons exist for good convergence. The to-do here is to tweak pybrain's neural convergence algorithms from the default to more advanced routines, allowing for larger networks that are more prone to divergence. The notable bug encountered here was the lack of ordering upon unpickling a saved neural object. See the code for the reference.

Initialization Script


#!/usr/bin/python
from pybrain.tools.shortcuts import buildNetwork
from pybrain.datasets import SupervisedDataSet
#This dataset is the standard supervised learning class
#Must feed it input and target (output) values.
from pybrain.supervised.trainers import BackpropTrainer
import cPickle

#For a 64x32 image we have 2048 pixels with 3 values each.
#That is 6144 data points.
#1024 produced 224Mb network with double the internal channels
#neuronnumber = 6144
neuronnumber = 512
innerneurons = 511
#First create the network
mynetwork = buildNetwork(neuronnumber,innerneurons, 1, fast=False)
#We can set a fast network but it looks like 
#having too many inner neurons produces a divergent network.
#So I added fewer to give convergent numbers. We'll see how it works.

#more advanced layering is possible...check the docs

fileObject = open('auroral_network.pkl','w')
cPickle.dump(mynetwork,fileObject)
fileObject.close()

Training Script


#!/usr/bin/python
from pybrain.tools.shortcuts import buildNetwork
from pybrain.datasets import SupervisedDataSet
#This dataset is the standard supervised learning class
#Must feed it input and target (output) values.
from pybrain.supervised.trainers import BackpropTrainer
from pybrain.structure.modules import SoftmaxLayer
from pybrain.utilities import percentError
import sys
import os

import cPickle
from PIL import Image

input_neurons = 512
aurora_ds = 0

def create_dataset():
	global aurora_ds 
	aurora_ds = SupervisedDataSet(input_neurons,1)

def add_image(imagename,aurora):
	print "Opening image " + imagename
	im = Image.open(imagename)
	cr = [r for (r,g,b) in im.getdata()]
	cg = [g for (r,g,b) in im.getdata()]
	cb = [b for (r,g,b) in im.getdata()]
	dset = range(input_neurons)
	for x in xrange(input_neurons):
		if ( (x%3) == 0 ):
			dset[x] = float(cr[4*x])
		elif ( (x%3) == 1):
			dset[x] = float(cg[4*x])
		else:
			dset[x] = float(cb[4*x])
	aurora_ds.addSample(dset,(aurora,))

def trainondataset():
	#Train the network based on the dataset previously created
	fileObject = open('auroral_network.pkl','r')
	mynetwork = cPickle.load(fileObject)
	fileObject.close()

#Need to add this to fix pickling error (bug #57 on github)
	mynetwork.sorted = False
	mynetwork.sortModules()
#Also, this completely fails if the file is binary saved. So don't do that...



	mynetwork = buildNetwork(input_neurons, 2000, 1)
	trainer = BackpropTrainer(mynetwork, dataset=aurora_ds, momentum=0.1, weightdecay=0.1)
	maxval=1000
	for x in range(0,maxval):
		trainer.trainEpochs( 120 )
		print "On iteration " + str(x) + " out of " + str(maxval)
		print mynetwork.activate(aurora_ds['input'][0])
		print mynetwork.activate(aurora_ds['input'][130])
		print mynetwork.activate(aurora_ds['input'][470])
		print mynetwork.activate(aurora_ds['input'][490])
	#And, save the trained dataset back into the file.
	fileObject2 = open('auroral_network.pkl','w')
	cPickle.dump(mynetwork,fileObject2)
	fileObject2.close()

if __name__ == '__main__':
	print "Creating dataset."
	create_dataset()
	print "Dataset created. Adding images."
	for filename in os.listdir('have_aurora'):
		print filename
		add_image("have_aurora/"+filename,10)
	for filename in os.listdir('none_aurora'):
		print filename
		add_image("none_aurora/"+filename,-10)

	print "Images added to DB. Training..."
	trainondataset()

Testing Script


#!/usr/bin/python
from pybrain.tools.shortcuts import buildNetwork
from pybrain.datasets import SupervisedDataSet
#This dataset is the standard supervised learning class
#Must feed it input and target (output) values.
from pybrain.supervised.trainers import BackpropTrainer
from pybrain.structure.modules import SoftmaxLayer
from pybrain.utilities import percentError
import sys
import os
import cPickle
from PIL import Image

input_neurons = 512
aurora_ds = 0

def create_dataset():
	#Add the training file 
	#We want to call it aurora_ds.
	global aurora_ds 
	aurora_ds = SupervisedDataSet(input_neurons,1)

def add_image(imagename,aurora):
	print "Opening image " + imagename
	im = Image.open(imagename)
	cr = [r for (r,g,b) in im.getdata()]
	cg = [g for (r,g,b) in im.getdata()]
	cb = [b for (r,g,b) in im.getdata()]
	dset = range(input_neurons)
	for x in xrange(input_neurons):
		if ( (x%3) == 0 ):
			dset[x] = float(cr[4*x])
		elif ( (x%3) == 1):
			dset[x] = float(cg[4*x])
		else:
			dset[x] = float(cb[4*x])
	
	aurora_ds.addSample(dset,(aurora,))

def testdata():
	#Train the network based on the dataset previously created
	fileObject = open('auroral_network.pkl','r')
	mynetwork = cPickle.load(fileObject)
	fileObject.close()
	for x in xrange(50):
		print mynetwork.activate(aurora_ds['input'][10*x])

if __name__ == '__main__':
	print "Creating dataset."
	create_dataset()
	print "Dataset created. Processing image (only the first will be tested)."

	for filename in os.listdir('have_aurora'):
		add_image("have_aurora/"+filename,1)
	for filename in os.listdir('none_aurora'):
		add_image("none_aurora/"+filename,0)

	testdata()