% ELMAN NET DOCUMENTATION EXAMPLE
%
% DELAYED FEEDBACK FROM HIDDEN LAYER
%
% COMMENTS
%
% Unlike other timeseries functions (e.g. timedelaynet, narnet
% and narxnet), the Elman net uses delayed hidden layer
% feedback, instead of input delays and/or output feedback .
% Therefore, there is no direct way to use significant lags of
% the target autocorrelation or target/input crosscorrelation
% functions to guide the choice of hidden layer feedback
% delays, FD.
%
% In addition, there are no guaranteed apriori guidelines for
% estimating good values for the number of hidden nodes, H.
% A useful rule is to try to avoid overfitting. However, if that is
% not possible, overtraining mitigation using validation set
% early stopping or objective function regularization can be
% implemented.
%
% The default values of FD and H used in the Elman net
% documentation example lead to severe overfitting. However,
% mitigation via validation set stopping prevents the overfit
% net from being overtrained.
%
% Multiple trials using the default values of FD and H
% (not shown here) demonstrate the dependence of a good
% design on a fortunate random draw of initial weights.
%
% Final remarks about the documentation example
% (See help and/or doc elmannet)
%
% [X,T] = simpleseries_dataset;
% net = elmannet(1:2,10); % Default values
% [Xs,Xi,Ai,Ts] = preparets(net,X,T);
% net = train(net,Xs,Ts,Xi,Ai); % Default random data division
% view(net);
% Y = net(Xs,Xi,Ai);
% perf = perform(net,Y,Ts) % Depends on initial state of RNG
%
% 1. No attempt to vary FD or H to improve performance
% 2. No attempt to mitigate a possibly unfortunate choice
% of initial random weights.
% 3. Use of the default random data division function,
% 'dividerand' that destroys input, hidden and output
% time series correlations.
% 4. No final test set evaluation in terms of NMSE or R^2
% NOTE: Ending semicolons purposelly omitted below to display output
close all, clear all, clc, plt = 0;
tic
[X,T] = simpleseries_dataset;
whos
% Name Size Bytes Class
% T 1x100 6800 cell
% X 1x100 6800 cell
[ I N ] = size(X) % [ 1 100 ]
[ O N ] = size(T) % [ 1 100 ]
% No. of training equations
Ntrneq = 0.7*N*O % 70 (0.7 is default data division value)
FD = 1:2 % Feedback delay
ND = length(FD) % 2 = No. of delays
H = 10 % 10 = No. hidden nodes
% Network properties "b"efore Configuration
% rng(4151941) Unfortunate random draws
rng(0)
netb = elmannet(FD,H) % No semicolon
Ib = netb.inputs{:}.size % 0 = Input dimension
H = netb.layers{:}.size % 10
Ob = netb.outputs{1,2}.size % 0 = Output dimension
netb.inputWeights{:}.size % [ 10 0]
netb.biases{:}.size % 10
netb.layerWeights{:}.size % [ 10 20 ]
Nwb = netb.numWeightElements % 210 No. of weights
Nwb = ( Ib + ND*H +1)*H + (H + 1 )*Ob
% = (0 + 2*10 + 1)*10 + (10+1)*0 = 210
IW11b = netb.IW{1,1} % H x Ib, [ 10x0]
b1b = netb.b{1} % H x1 [10x1]
b2b = netb.b{2} % Ob x1 [ 0x1]
LW11b = netb.LW{1,1} % H x (ND*H) [10x20]
LW21b = netb.LW{2,1} % Ob x H [ 0x10]
view(netb)
% Net properties After Configuration
net = configure(netb,X,T);
I = net.inputs{:}.size % 1 = Input dimension
O = net.outputs{1,2}.size % 1 = Output dimension
Nw = net.numWeightElements % 231
Nw = ( I + ND*H +1)*H + (H + 1 )*O
% = (1 + 2*10 + 1)*10 + (10+1)*1 = 231
IW11 = net.IW{1,1} % H x I1, [ 10X1]
b1 = net.b{1} % H x1 [10x1]
b2 = net.b{2} % O x1 [1x1]
LW11 = net.LW % H x (ND*H) [10x20]
LW21 = net.LW{2,1} % O x H [ 1x10]
view(net)
% Number of estimation degees of freedom
Ndof = Ntrneq-Nw % -161 < 0 ===> OVERFITTING !!!
[Xs,Xi,Ai,Ts] = preparets(net,X,T);
whos X T Xs Xi Ai Ts
% Name Size Bytes Class
%
% Ai 2x2 416 cell
% T 1x100 6800 cell
% Ts 1x98 6664 cell
% X 1x100 6800 cell
% Xi 1x0 0 cell
% Xs 1x98 6664 cell
[net tr Ys Xf Af] = train(net,Xs,Ts,Xi,Ai);
view(net);
whos X T Xs Xi Ai Ts Ys Xf Af
% Name Size Bytes Class Attributes
%
% Af 0x0 0 double
% Xf 1x98 6664 cell
% Ys 1x98 6664 cell
Y = net(Xs,Xi,Ai);
isequal(Ys,Y) % 1
perf = perform(net,Ys,Ts) % 0.0054 mse(ts-ys)
stopcrit = tr.stop % Val stop
bestepoch = tr.best_epoch % 94
ts = cell2mat(Ts);
MSE00 = var(ts,1) % 0.0444
ys = cell2mat(Ys);
MSE = mse(ts-ys) % 0.0054
NMSE = MSE/MSE00 % 0.1215
R2 = 1- NMSE % 0.8785
tstrn = ts(tr.trainInd);
tsval = ts(tr.valInd);
tstst = ts(tr.testInd);
ystrn00 = mean(tstrn,2); % Scalar output
% Final Model Evaluation
R2trn = 1-tr.perf(tr.best_epoch)/mse(tstrn-ystrn00) % 0.8824
R2val = 1-tr.vperf(tr.best_epoch)/mse(tsval-ystrn00) % 0.8948
R2tst = 1-tr.tperf(tr.best_epoch)/mse(tstst-ystrn00) % 0.8261
toc %13 sec
Hope this helps.
Greg
%
% DELAYED FEEDBACK FROM HIDDEN LAYER
%
% COMMENTS
%
% Unlike other timeseries functions (e.g. timedelaynet, narnet
% and narxnet), the Elman net uses delayed hidden layer
% feedback, instead of input delays and/or output feedback .
% Therefore, there is no direct way to use significant lags of
% the target autocorrelation or target/input crosscorrelation
% functions to guide the choice of hidden layer feedback
% delays, FD.
%
% In addition, there are no guaranteed apriori guidelines for
% estimating good values for the number of hidden nodes, H.
% A useful rule is to try to avoid overfitting. However, if that is
% not possible, overtraining mitigation using validation set
% early stopping or objective function regularization can be
% implemented.
%
% The default values of FD and H used in the Elman net
% documentation example lead to severe overfitting. However,
% mitigation via validation set stopping prevents the overfit
% net from being overtrained.
%
% Multiple trials using the default values of FD and H
% (not shown here) demonstrate the dependence of a good
% design on a fortunate random draw of initial weights.
%
% Final remarks about the documentation example
% (See help and/or doc elmannet)
%
% [X,T] = simpleseries_dataset;
% net = elmannet(1:2,10); % Default values
% [Xs,Xi,Ai,Ts] = preparets(net,X,T);
% net = train(net,Xs,Ts,Xi,Ai); % Default random data division
% view(net);
% Y = net(Xs,Xi,Ai);
% perf = perform(net,Y,Ts) % Depends on initial state of RNG
%
% 1. No attempt to vary FD or H to improve performance
% 2. No attempt to mitigate a possibly unfortunate choice
% of initial random weights.
% 3. Use of the default random data division function,
% 'dividerand' that destroys input, hidden and output
% time series correlations.
% 4. No final test set evaluation in terms of NMSE or R^2
% NOTE: Ending semicolons purposelly omitted below to display output
close all, clear all, clc, plt = 0;
tic
[X,T] = simpleseries_dataset;
whos
% Name Size Bytes Class
% T 1x100 6800 cell
% X 1x100 6800 cell
[ I N ] = size(X) % [ 1 100 ]
[ O N ] = size(T) % [ 1 100 ]
% No. of training equations
Ntrneq = 0.7*N*O % 70 (0.7 is default data division value)
FD = 1:2 % Feedback delay
ND = length(FD) % 2 = No. of delays
H = 10 % 10 = No. hidden nodes
% Network properties "b"efore Configuration
% rng(4151941) Unfortunate random draws
rng(0)
netb = elmannet(FD,H) % No semicolon
Ib = netb.inputs{:}.size % 0 = Input dimension
H = netb.layers{:}.size % 10
Ob = netb.outputs{1,2}.size % 0 = Output dimension
netb.inputWeights{:}.size % [ 10 0]
netb.biases{:}.size % 10
netb.layerWeights{:}.size % [ 10 20 ]
Nwb = netb.numWeightElements % 210 No. of weights
Nwb = ( Ib + ND*H +1)*H + (H + 1 )*Ob
% = (0 + 2*10 + 1)*10 + (10+1)*0 = 210
IW11b = netb.IW{1,1} % H x Ib, [ 10x0]
b1b = netb.b{1} % H x1 [10x1]
b2b = netb.b{2} % Ob x1 [ 0x1]
LW11b = netb.LW{1,1} % H x (ND*H) [10x20]
LW21b = netb.LW{2,1} % Ob x H [ 0x10]
view(netb)
% Net properties After Configuration
net = configure(netb,X,T);
I = net.inputs{:}.size % 1 = Input dimension
O = net.outputs{1,2}.size % 1 = Output dimension
Nw = net.numWeightElements % 231
Nw = ( I + ND*H +1)*H + (H + 1 )*O
% = (1 + 2*10 + 1)*10 + (10+1)*1 = 231
IW11 = net.IW{1,1} % H x I1, [ 10X1]
b1 = net.b{1} % H x1 [10x1]
b2 = net.b{2} % O x1 [1x1]
LW11 = net.LW % H x (ND*H) [10x20]
LW21 = net.LW{2,1} % O x H [ 1x10]
view(net)
% Number of estimation degees of freedom
Ndof = Ntrneq-Nw % -161 < 0 ===> OVERFITTING !!!
[Xs,Xi,Ai,Ts] = preparets(net,X,T);
whos X T Xs Xi Ai Ts
% Name Size Bytes Class
%
% Ai 2x2 416 cell
% T 1x100 6800 cell
% Ts 1x98 6664 cell
% X 1x100 6800 cell
% Xi 1x0 0 cell
% Xs 1x98 6664 cell
[net tr Ys Xf Af] = train(net,Xs,Ts,Xi,Ai);
view(net);
whos X T Xs Xi Ai Ts Ys Xf Af
% Name Size Bytes Class Attributes
%
% Af 0x0 0 double
% Xf 1x98 6664 cell
% Ys 1x98 6664 cell
Y = net(Xs,Xi,Ai);
isequal(Ys,Y) % 1
perf = perform(net,Ys,Ts) % 0.0054 mse(ts-ys)
stopcrit = tr.stop % Val stop
bestepoch = tr.best_epoch % 94
ts = cell2mat(Ts);
MSE00 = var(ts,1) % 0.0444
ys = cell2mat(Ys);
MSE = mse(ts-ys) % 0.0054
NMSE = MSE/MSE00 % 0.1215
R2 = 1- NMSE % 0.8785
tstrn = ts(tr.trainInd);
tsval = ts(tr.valInd);
tstst = ts(tr.testInd);
ystrn00 = mean(tstrn,2); % Scalar output
% Final Model Evaluation
R2trn = 1-tr.perf(tr.best_epoch)/mse(tstrn-ystrn00) % 0.8824
R2val = 1-tr.vperf(tr.best_epoch)/mse(tsval-ystrn00) % 0.8948
R2tst = 1-tr.tperf(tr.best_epoch)/mse(tstst-ystrn00) % 0.8261
toc %13 sec
Hope this helps.
Greg