java

How can I correct the rule of updating the strategic vector in the Linear Reward Inaction algorithm (LRI)

```Problem : I work on the Linear Reward Inaction algorithm (LRI),I have a problem in the rule of updating the strategic vector.Normally only one element of the vector must increase and the others must decrease except this is not my case. Here is the code I can not correct the error.
package LRI_algorithm;
import java.awt.List;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileWriter;
import java.io.PrintStream;
import java.io.PrintWriter;
import java.util.ArrayList;
import java.util.Random;
public class Janvier_2017_stp {
static double[][] MatResult = new double[150][150];
static double[][] graph = new double[150][150];
static double[][] graphProb = new double[150][150];
static double[][] graphtemps = new double[150][150];
static int tL = 142;
static int tC = 142;
// ////////////////////////////////////////////////////////////////
public static double CalculEsperance (int[] path)
{
structuredonneesofficielles.stdproba.TraitementMatriceproba(graphProb, tL, tC, graphProb, tL, tC);
structuredonneesofficielles.matrice_temps_VF.TraitementMatriceNouvelleTransition(graphtemps, tL, tC, graphtemps, tL, tC);
double costtemps = 0.0F;
double costprob2 = 1.0F;
double costprob1=1.0F;
double cost=0.0F;
double somme=0;
double i=cost;
for (int j=0;j<path.length-1;j++)
{
costtemps=costtemps +graphtemps [ path[j]][ path[j+1]]; // somme le temps
costprob2=costprob2 *(1-graphProb [path[j]][ path[j+1]]);// produit (1-p)
}
costprob1=costprob2 *graphProb [ path[path.length-2]][ path[path.length-1]] * (1.0F/(1.0F- (graphProb [ path[path.length-2]][ path[path.length-1]])));// produit p(1-p)
cost=cost+ costprob2*costtemps+costprob1*costtemps;
while (i <= path.length-1) {
somme = somme + i;
i = i + 1;
}
return cost;
}
// //////////////////////////////////////////////////////////////////////////
public static double Utilite (double esperance_max, double esperance, double esperance_min)
{
double U=0;
U=Math.abs ((esperance_max-esperance)/(esperance_max- esperance_min));
//U=Math.abs ((esperance_max-esperance));
return U;
}
public static void initializeMatrix(double[][] MatResult, double[][] MatResult1)
{
int nbL1 = 142;
int nbC1 = 142;
double[][] M_trans = new double[150][150];
structuredonneesofficielles.stdproba.TraitementMatriceproba(MatResult, tL, tC, graph, tL, tC);
structuredonneesofficielles.transition_nouvelle.TraitementMatriceNouvelleTransition(M_trans, nbL1, nbC1,MatResult1, nbL1, nbC1);
}
public static int[] chemin_aleatoire(int depth, int start, double[][] MatResult, double[][] MatResult1,double [] esperance_param) {
int nbL1 = 142;
int nbC1 = 142;
int state = start; // current state
int[] save = new int[nbL1];
int taille_save = 0;
save[taille_save] = state;
taille_save++;
double esperance_min=100;
double esperance_max=0;
double esperance=0;
int nextState;
int steps = 0;
for (int tour=0; tour<50; tour++){
while (state > 0 && steps < depth) {
steps++;
double r = Math.random();
double sum = 0.0;
boolean stop = false;
int j =0;
//for (int j = 0; j < nbC1; j++) {
while ((j<nbC1) & (stop==false)) {
if (MatResult1[state][j] == 0) {
} else {
sum += MatResult1[state][j];
if ((r < sum)) {
save[taille_save] = j;
state = j;
//System.out.println(state+"|"+r);
taille_save++;
stop = true;
}
}
j++;
}
}
// debut calcul du gain
esperance= CalculEsperance(save);
esperance_param[1]= esperance;
esperance_min= esperance_param[2];
esperance_max= esperance_param[0];
if (esperance<esperance_min)
{
esperance_min=esperance;
}
if (esperance>esperance_max)
{
esperance_max=esperance;
}
esperance_param[0]=esperance_max;
esperance_param[2]= esperance_min;
}
if (esperance<esperance_min)
{
esperance_min=esperance;
}
if (esperance>esperance_max)
{
esperance_max=esperance;
}
double Utilite_save= Utilite(esperance_max,esperance, esperance_min);
esperance_param[0]=esperance_max;
esperance_param[2]= esperance_min;
return save;
}
public static int[] strategie_aleatoire(int depth, int start, double[][] MatResult, double[][] MatResult1,double [] esperance_param) {
int nbL1 = 142;
int nbC1 = 142;
//int origine=98;
int state = start; // current state
int[] save = new int[nbL1];
int taille_save = 0;
save[taille_save] = state;
taille_save++;
double esperance_min=100;
double esperance_max=0;
double esperance=0;
int nextState;
int steps = 0;
while (state > 0 && steps < depth) {
steps++;
double r = Math.random();
double sum = 0.0;
boolean stop = false;
int j =0;
//for (int j = 0; j < nbC1; j++) {
while ((j<nbC1) & (stop==false)) {
if (MatResult1[state][j] == 0) {
} else {
sum += MatResult1[state][j];
if ((r < sum)) {
save[taille_save] = j;
state = j;
//System.out.println(state+"|"+r);
taille_save++;
stop = true;
}
}
j++;
};
}
// debut calcul du gain
esperance= CalculEsperance(save);
esperance_param[1]= esperance;
esperance_min= esperance_param[2];
esperance_max= esperance_param[0];
if (esperance<esperance_min)
{
esperance_min=esperance;
}
if (esperance>esperance_max)
{
esperance_max=esperance;
}
esperance_param[0]=esperance_max;
esperance_param[2]= esperance_min;
double Utilite_save= Utilite(esperance_max,esperance, esperance_min);
esperance_param[0]=esperance_max;
esperance_param[2]= esperance_min;
// fin calcul du gain
double b = 0.05;
// parcourir save
for (int p = 0; p < depth - 1; p++) {
state = save[p];
nextState = save[p + 1];
double currentValueStateNextState = MatResult1[state][nextState];
double[] nextValuesMatResult1 = new double[nbC1];
double sum = 0.0;
// D'abord calculer les nouvelles valeurs
for (int k = 0; k < nbC1; k++) {
// D'abord on distribue les pénalités
nextValuesMatResult1[k] = MatResult1[state][k];
if ((k != nextState) & (nextValuesMatResult1[k] != 0)){
nextValuesMatResult1[k] = nextValuesMatResult1[k]
- ((nextValuesMatResult1[k] * b *Utilite_save)/ (1 - currentValueStateNextState));
if (nextValuesMatResult1[k] < 0) {
nextValuesMatResult1[k] = 0;
}
} else {
if ((k == nextState) & (nextValuesMatResult1[nextState] != 0)) {
if ((nextValuesMatResult1[nextState] <1)) {
// Distribution des récompenses
nextValuesMatResult1[nextState] += b *Utilite_save;
if (nextValuesMatResult1[nextState] > 1) {
nextValuesMatResult1[nextState] = 1;
}
}
}
}
}
// Normalize the sum, this case should happen only when the cumulated numerical error exceeds 1
// The following instruction are proposed to correct the computation of probabilities.
try{
String nomfichier = "Lri_Janvier_Dom.txt";
File nomFichier = new File(nomfichier);
PrintWriter fich;
fich = new PrintWriter(new BufferedWriter(new FileWriter(nomfichier, true)));
for (int k = 0; k < nbC1; k++) {
if(nextValuesMatResult1[k]!=0){
//System.out.println (+k+"|"+nextValuesMatResult1[k]+"|");
fich.print(+k+"|"+nextValuesMatResult1[k]+"|");
}
fich.println();
}
fich.flush();
fich.close();
}
catch (Exception e) {
}
// Gérer le résidu au-dessus de 1 ou au-dessous de 0
// Trier les valeurs de nextValuesMatResult1 et de MatResult1[state] et comparer leur ordre
// si l'ordre change alors mettre à jour les valeur de MatResult1[state] avec ceux de nextValuesMatResult1
// ordre est par choix, choisi comme ascendant du plus petit au plus grand.
int[] ordreNextValuesMatResult1 = ordre(nextValuesMatResult1);
//ordreNextValuesMatResult1
int[] ordreMatResult1State = ordre(MatResult1[state]);
boolean ordreEqual = true;
for(int i=0; i < ordreNextValuesMatResult1.length;i++)
{
if(ordreNextValuesMatResult1[i] != ordreMatResult1State[i])
{
ordreEqual = false;
break;
}
}
// mettre à jour uniquement si l'ordre est different
if(ordreEqual == false)
{
for (int k = 0; k < nbC1; k++) {
MatResult1[state][k] = nextValuesMatResult1[k];
}
}
// fin de mise à jour
}
for (int k = 1; k < taille_save - 1; k++) {
//System.out.println(+save[k] + "|" + MatResult1[save[k - 1]][save[k]]);
// printStream1.print(+save[k] + " ");
// printStream2.print(MatResult1[save[k-1]][save[k]]+" " );
}
return save;
}
public static void printPath(int[] save) {
double[][] M_trans = new double[150][150];
double[][] MatResult1 = new double[150][150];
int taille_save = save.length;
int nbL1 = 142;
int nbC1 = 142;
structuredonneesofficielles.transition_nouvelle
.TraitementMatriceNouvelleTransition(M_trans, nbL1, nbC1,
MatResult1, nbL1, nbC1);
System.out.println(save[0]);
for (int k = 1; k < taille_save - 1; k++) {
System.out.println(save[k]);
}
}
public static int selectChild(int[] path, int depth) {
int child;
if (depth < path.length)
child = path[depth];
else
child = -1; // -1 stand for the stop node
return child;
}
// test de convergence !!
public static boolean comparePaths(int[] path1, int[] path2)
{
if(path1.length != path2.length) return false;
for(int i=0; i < path1.length; i++)
{
if(path1[i] != path2[i]) return false;
}
return true;
}
public static void main(String args[]) {
// Record starting time
long startTime = System.currentTimeMillis();
for(int startingPoints=98; startingPoints <99; startingPoints++) // tous les noeuds du graphe
{
try{
String nomfichier = "thierry_eval_prob_apres_convergence.txt";
File nomFichier = new File(nomfichier);
PrintWriter fich;
fich = new PrintWriter(new BufferedWriter(new FileWriter(nomfichier, true)));
double[][] MatResult = new double[150][150];
double[][] MatResult1 = new double[150][150];
int depth =4; // la profondeur
int drop = 1; // la descente (je descends de drop pas)
int maxLength =1; // nb tronçons
int nbSimulations =5;
float averageLearningSteps = 0;
for (int simulationIndex = 0; simulationIndex < nbSimulations; simulationIndex++) {
initializeMatrix(MatResult, MatResult1);
float totalLearningSteps = 0;
int currentNodeIndex = 0;
int start = startingPoints; // position d'entrée dans l'orbite
int[] foundPath = new int[maxLength];
while ((start != -1) && (currentNodeIndex < maxLength))
{
foundPath[currentNodeIndex] = start;
double[]esperance_param = new double[3];
int[] save = strategie_aleatoire(depth, start, MatResult, MatResult1,esperance_param);
totalLearningSteps++;
int[] save_aux;
int nbC1 = 142;
boolean converged = false;
//Le programme a été testé jusqu'ici
//Le programme a été testé jusqu'ici et fait 10 iteration pour voir s'il a convergé ou pas
int numberMinimalIteration = 10;
int numberOfIteration = 0;
while((!converged))
{
save_aux = strategie_aleatoire(depth, start, MatResult, MatResult1,esperance_param);
totalLearningSteps++;
converged = comparePaths(save, save_aux);
//System.out.println("****converged="+ converged);
save = save_aux;
numberOfIteration++;
}
for (int i = 0; i < nbC1; i++) {
if(MatResult1[start][i]!=0){
System.out.print(+i+"|"+MatResult1[start][i]+"|");
fich.print(i+"|"+MatResult1[start][i]+"|");
System.out.print("\n");
}
}
System.out.println();
for (int i = currentNodeIndex + 1; i < currentNodeIndex + drop; i++) {
foundPath[i] = save[i - currentNodeIndex];
}
start = selectChild(save, drop);
currentNodeIndex += (drop);
}
for (int i = 0; i < maxLength-1; i++) {
}
/* ces deux lignes nous donnes les itinéraires*/
System.out.println("Simulation"+ simulationIndex );
fich.println();
fich.println("Simulation="+ simulationIndex );
//System.out.println("****************The found path#"+ simulationIndex + " is :******************");
// printPath(foundPath);
//System.out.println("*");
// System.out.println("The total learning steps neede for this path is : "+totalLearningSteps);
// averageLearningSteps += (float)(totalLearningSteps/nbSimulations);
}
// System.out.println("_____________________The average learning step needed by simulation is : " + averageLearningSteps+ "_________________________");
fich.println("*");
fich.flush();
fich.close();
}
catch (Exception e) {
} //System.out.println(startingPoints+","+averageLearningSteps);
}
// Get elapsed time
long endTime = System.currentTimeMillis();
long elapsedTime = endTime - startTime;
System.out.println("Execution finished after " + elapsedTime + "ms");
}
// Renvoie un tableau representant les index des élements trié du plus petit au plus grand
// à part d'un tableau double
static int[] ordre(double[ ] a) {
double[] a_aux = a.clone();
int currentMinIndex = 0;
int[] indexOrdonnes = new int[a.length];
int j=0;
while(j < a.length)
{
double currentMin = 2;
for(int i=0; i < a_aux.length; i++)
{
if(a_aux[i] < currentMin) {currentMinIndex = i; currentMin = a_aux[i];}
}
indexOrdonnes[j] = currentMinIndex;
a_aux[currentMinIndex] = 3;
j++;
}
return indexOrdonnes;
}
}```

Database Users
RDBMS discuss
javascript
java
csharp
php
android
javascript
java
csharp
php
python
android
jquery
ruby
ios
html
Mobile App
Mobile App
Mobile App