MultiLayerPerceptron.java
/*
* MIT License
*
* Copyright (c) 2009-2016 Ignacio Calderon <https://github.com/kronenthaler>
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in all
* copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
package libai.nn.supervised;
import java.util.Arrays;
import libai.common.matrix.Column;
import libai.common.matrix.Matrix;
import libai.common.functions.Function;
import libai.nn.supervised.backpropagation.Backpropagation;
import libai.nn.supervised.backpropagation.StandardBackpropagation;
import java.util.Random;
/**
* Multi Layer Perceptron or MLP. MultiLayerPerceptron was the first algorithm
* proposed to train multilayer neurons using the general delta rule. This
* implementation supports multiple backpropagation implementations via a
* Backpropagation interface. Check the package
* {@code libai.nn.supervised.backpropagation} for more details about the
* supported implementations.
*
* @author kronenthaler
*/
public class MultiLayerPerceptron extends SupervisedLearning {
private static final long serialVersionUID = 3155220303024711102L;
private final Matrix W[];
private final Column Y[], b[], u[]; //WY + b = u
private final int nperlayer[]; //number of neurons per layer, including the input layer
private final int layers;
private final Function[] func;
private final transient Backpropagation trainer;
/**
* Constructor. Creates a MultiLayerPerceptron with {@code nperlayer.length}
* layers. The number of neurons per layer is defined in {@code nperlayer}.
* The {@code nperlayer[0]} means the input layer. For each layer {@code i}
* the neurons applies the output function {@code funcs[i]}. These functions
* must be derivable. The training algorithm is standard backpropagation.
*
* @param nperlayer Number of neurons per layer including the input layer.
* @param funcs Function to apply per layer. The function[0] could be null.
*/
public MultiLayerPerceptron(int[] nperlayer, Function[] funcs) {
this(nperlayer, funcs, new StandardBackpropagation());
}
/**
* Constructor. Creates a MultiLayerPerceptron with {@code nperlayer.length}
* layers. The number of neurons per layer is defined in {@code nperlayer}.
* The {@code nperlayer[0]} means the input layer. For each layer the
* neurons applies the output function {@code funcs[i]}. These functions
* must be derivable. The parameter {@code beta} means the momentum
* influence. A different implementation of the backpropagation algorithm
* can be provided on the {@code trainer} object.
*
* @param nperlayer Number of neurons per layer including the input layer.
* @param funcs Function to apply per layer. The function[0] could be null.
* @param trainer The backpropagation implementation to be used during
* training
*/
public MultiLayerPerceptron(int[] nperlayer, Function[] funcs, Backpropagation trainer) {
this(nperlayer, funcs, trainer, getDefaultRandomGenerator());
}
/**
* Constructor. Creates a MultiLayerPerceptron with {@code nperlayer.length}
* layers. The number of neurons per layer is defined in {@code nperlayer}.
* The {@code nperlayer[0]} means the input layer. For each layer the
* neurons applies the output function {@code funcs[i]}. These functions
* must be derivable. The training algorithm is standard backpropagation. A
* Random generator can be pass to initialize the matrices.
*
* @param nperlayer Number of neurons per layer including the input layer.
* @param funcs Function to apply per layer. The function[0] could be null.
* @param rand Random generator used for creating matrices
*/
public MultiLayerPerceptron(int[] nperlayer, Function[] funcs, Random rand) {
this(nperlayer, funcs, new StandardBackpropagation(), rand);
}
/**
* Constructor. Creates a MultiLayerPerceptron with {@code nperlayer.length}
* layers. The number of neurons per layer is defined in {@code nperlayer}.
* The {@code nperlayer[0]} means the input layer. For each layer the
* neurons applies the output function {@code funcs[i]}. These functions
* must be derivable. A different backpropagation implementation can be
* provided along with a random generator to initialize the matrices.
*
* @param nperlayer Number of neurons per layer including the input layer.
* @param funcs Function to apply per layer. The function[0] could be null.
* @param trainer The backpropagation implementation to be used during
* training
* @param rand Random generator used for creating matrices
*/
public MultiLayerPerceptron(int[] nperlayer, Function[] funcs, Backpropagation trainer, Random rand) {
super(rand);
this.nperlayer = nperlayer;
func = funcs;
this.trainer = trainer;
layers = nperlayer.length;
W = new Matrix[layers];//position zero reserved
b = new Column[layers];//position zero reserved
Y = new Column[layers];//position zero reserved for the input pattern
u = new Column[layers];//position zero reserved
initialize();
}
/**
* Initialize the matrix and auxiliary buffers.
*/
private void initialize() {
Y[0] = new Column(nperlayer[0]);
for (int i = 1; i < layers; i++) {
W[i] = new Matrix(nperlayer[i], nperlayer[i - 1]);
b[i] = new Column(nperlayer[i]);
W[i].fill(true, random); // fill randomly
b[i].fill(true, random); // fill randomly
u[i] = new Column(W[i].getRows());
Y[i] = new Column(u[i].getRows());
}
}
/**
* Train the network using the standard backpropagation algorithm. The
* pattern is propagated from the input to the final layer (the output).
* Then the error for the final layer is computed. The error is calculated
* backwards to the first hidden layer, calculating the differentials
* between input and expected output (backpropagation). Finally, the weights
* and biases are updated using the delta rule:<br>
* W[i] = W[i] + beta*(W[i]-Wprev[i]) - (1-beta)*alpha.d[i].Y[i-1]^t <br>
* B[i] = B[i] + beta*(B[i]-Bprev[i]) - (1-beta)*alpha.d[i]<br>
*
* @param patterns The patterns to be learned.
* @param answers The expected answers.
* @param alpha The learning rate.
* @param epochs The maximum number of iterations
* @param offset The first pattern position
* @param length How many patterns will be used.
* @param minerror The minimal error expected.
*/
@Override
public void train(Column[] patterns, Column[] answers, double alpha, int epochs, int offset, int length, double minerror) {
validatePreconditions(patterns, answers, epochs, offset, length, minerror);
initializeProgressBar(epochs);
// initialize the trainer with the set of matrices required
trainer.initialize(this, nperlayer, func, W, Y, b, u);
// train the network, a.k.a. update the weights of W, and b accordingly to the outputs.
trainer.train(patterns, answers, alpha, epochs, offset, length, minerror);
if (progress != null) {
progress.setValue(progress.getMaximum());
}
}
@Override
public Column simulate(Column pattern) {
simulate(pattern, null);
return Y[layers - 1];
}
@Override
public void simulate(Column pattern, Column result) {
//Y[0]=x
pattern.copy(Y[0]);
//Y[i]=Fi(<W[i],Y[i-1]>+b)
for (int j = 1; j < layers; j++) {
W[j].multiply(Y[j - 1], u[j]);
u[j].add(b[j], u[j]);
u[j].apply(func[j], Y[j]);
}
if (result != null) {
Y[layers - 1].copy(result);
}
}
public Matrix[] getWeights() {
return Arrays.copyOf(W, W.length);
}
}