RBF.java
/*
* MIT License
*
* Copyright (c) 2009-2016 Ignacio Calderon <https://github.com/kronenthaler>
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in all
* copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
package libai.nn.supervised;
import libai.common.matrix.Column;
import libai.nn.NeuralNetwork;
import java.util.ArrayList;
import java.util.PriorityQueue;
import java.util.Random;
/**
* Radial Basis Function or RBF. Is an hybrid neural network with 3 layers
* (1-input, 1-hidden, 1-output). The hidden layers is trained using a
* stochastic clustering algorithm: k-means. The final layer is trained using
* the Adaline rule. The k-means algorithm is used to set up the position of the
* "centers" of the radial basis functions, as this process is regardless of the
* output and invariant over the input, could be used a highly efficient
* algorithm. This implementation uses only Gaussian functions as radial basis
* functions.
*
* @author kronenthaler
*/
public class RBF extends Adaline {
private static final long serialVersionUID = 6772562276994202439L;
protected final int nperlayer[];//{#inputs,#Neurons,#outputs}
protected final double[] sigma;
private Column centers[];
/**
* Constructor. Receives an array with the information of the number of
* neurons per layer. Layer[0] is the input layer. Layer[1] is the hidden
* layer and represents the number radial functions to use. layer[2] is the
* output layer.
*
* @param nperlayer Neurons Per Layer.
*/
public RBF(int[] nperlayer) {
this(nperlayer, getDefaultRandomGenerator());
}
/**
* Constructor. Receives an array with the information of the number of
* neurons per layer. Layer[0] is the input layer. Layer[1] is the hidden
* layer and represents the number radial functions to use. layer[2] is the
* output layer.
*
* @param nperlayer Neurons Per Layer.
* @param rand Random generator used for creating matrices
*/
public RBF(int[] nperlayer, Random rand) {
super(nperlayer[1], nperlayer[2], rand); // input, outputs
this.nperlayer = nperlayer;
sigma = new double[nperlayer[1]];
}
/**
* Train the network using a hybrid scheme. First set the centers of the
* radial basis functions using k-means algorithm. After that the radius of
* the function is calculated using n-nearest neighbors. When n = the number
* of inputs. Then the output for the hidden layer are precalculated and
* used as input for the Adaline training.
*
* @param patterns The patterns to be learned.
* @param answers The expected answers.
* @param alpha The learning rate.
* @param epochs The maximum number of iterations
* @param offset The first pattern position
* @param length How many patterns will be used.
* @param minerror The minimal error expected.
*/
@Override
public void train(Column[] patterns, Column[] answers, double alpha, int epochs, int offset, int length, double minerror) {
validatePreconditions(patterns, answers, epochs, offset, length, minerror);
initializeProgressBar(epochs);
//apply k-means to the patterns
centers = kmeans(nperlayer[1], patterns, offset, length);
//calculate sigmas. p-closest neighbors, p is the input dimension
PriorityQueue<Double>[] neighbors = new PriorityQueue[nperlayer[1]];
for (int i = 0; i < nperlayer[1]; i++) {
neighbors[i] = new PriorityQueue<>();
}
for (int i = 0; i < nperlayer[1] - 1; i++) {
for (int j = i + 1; j < nperlayer[1]; j++) {
double current = Math.sqrt(euclideanDistance2(centers[i], centers[j]));
neighbors[i].add(-current);
neighbors[j].add(-current);
}
}
for (int i = 0; i < nperlayer[1]; i++) {
double acum = 0;
for (int j = 0; j < nperlayer[0] && !neighbors[i].isEmpty(); j++) {
acum += -neighbors[i].poll();
}
sigma[i] = acum / nperlayer[0];
}
//precalculate the ouputs for each pattern in the hidden layer
Column Y[] = new Column[patterns.length];
for (int j = 0; j < length; j++) {
Y[j + offset] = new Column(nperlayer[1]);
simulateNoChange(patterns[j + offset], Y[j + offset]);
}
//Train the adaline network, but keep the weights and biases in this instance.
final Adaline temp = new Adaline(nperlayer[1], nperlayer[2], random);
temp.W = W;
temp.b = b;
temp.setPlotter(plotter);
temp.setProgressBar(progress);
temp.train(Y, answers, alpha, epochs, offset, length, minerror);
if (progress != null) {
progress.setValue(progress.getMaximum());
}
}
/**
* k-means clustering algorithm. For the cloud of patterns found the k
* points closest to every point in the cloud. The centroids found probably
* will no be any of the patterns in the cloud.
*
* @param k Number of centroids to find.
* @param patterns The cloud of patterns
* @param offset Initial position of the cloud.
* @param length How many patterns are in the cloud.
* @return An array with the centroids.
*/
private Column[] kmeans(int k, Column[] patterns, int offset, int length) {
Column[] ctemp = new Column[k];
ArrayList<Integer>[] partitions = new ArrayList[k];
Column aux = new Column(patterns[0].getRows());
Column aux1 = new Column(patterns[0].getRows());
for (int i = 0; i < k; i++) {
ctemp[i] = new Column(patterns[0].getRows());
int index = random.nextInt(length) + offset;//abs((int)(ctemp[i].random(&xzxzx)*npatterns));;
patterns[index].copy(ctemp[i]);
partitions[i] = new ArrayList<>();
}
while (true) {
double min;
double current;
for (int l = 0; l < k; l++) {
partitions[l].clear();
}
for (int i = 0; i < length; i++) {
int j = 0;
min = Double.MAX_VALUE;
for (int l = 0; l < k; l++) {
current = euclideanDistance2(patterns[i + offset], ctemp[l]);
if (current < min) {
min = current;
j = l;
}
}
partitions[j].add(i + offset);
}
boolean exit = true;
for (int i = 0; i < k; i++) {
int total = 0;
aux1.setValue(0);
for (int j = 0; j < partitions[i].size(); j++) {
aux1.add(patterns[partitions[i].get(j)], aux1);
total++;
}
if (total == 0) {
//empty partition take a random pattern as centroid
ctemp[i] = new Column(patterns[0].getRows());
int index = random.nextInt(length) + offset;
patterns[index].copy(ctemp[i]);
exit = false;
} else {
aux1.multiply(1.0 / (double) total, aux);
if (!aux.equals(ctemp[i])) {
aux.copy(ctemp[i]);
exit = false;
}
}
}
if (exit) {
return ctemp;
}
}
}
@Override
public Column simulate(Column pattern) {
Column y = new Column(nperlayer[2]);
simulate(pattern, y);
return y;
}
@Override
public void simulate(Column pattern, Column result) {
Column aux = new Column(nperlayer[1]);
simulateNoChange(pattern, aux);
super.simulate(aux, result);
}
/**
* Calculate the distance from the pattern to each centroid and apply the
* Gaussian function to that distance left the result in
* <code>result</code>.
*
* @param pattern Pattern to evaluate
* @param result The matrix to put the result.
*/
private void simulateNoChange(Column pattern, Column result) {
for (int i = 0; i < nperlayer[1]; i++) {
double current = euclideanDistance2(pattern, centers[i]);
result.position(i, 0, NeuralNetwork.gaussian(current, sigma[i]));
}
}
}