Участник:Easik/NBLINDESC — различия между версиями

Материал из DISCOPAL
Перейти к: навигация, поиск
Строка 2: Строка 2:
  
 
[Не проходит по точности] Версия 1. Реализовал Q-Learning со стохастическим спуском к оптимальному решению.
 
[Не проходит по точности] Версия 1. Реализовал Q-Learning со стохастическим спуском к оптимальному решению.
 +
Upd: пробовал сделать через Climbing Hill (есть такой тэг к задаче), но даже если оптимизировать вектор вероятностей грубо, подсчёт мат ожидания на его единственной оптимальной (задача выпуклой оптимизации) реализации занимает слишком много времени, чтобы пройти ограничение времени задачи.
  
 
'''C \ gcc'''
 
'''C \ gcc'''

Версия 01:23, 17 декабря 2020

[Не проходит по точности] Версия 1. Реализовал Q-Learning со стохастическим спуском к оптимальному решению. Upd: пробовал сделать через Climbing Hill (есть такой тэг к задаче), но даже если оптимизировать вектор вероятностей грубо, подсчёт мат ожидания на его единственной оптимальной (задача выпуклой оптимизации) реализации занимает слишком много времени, чтобы пройти ограничение времени задачи.

C \ gcc

#include <stdio.h>
#include <stdlib.h>  /* rand(), srand() */
#include <string.h>  /* strlen() */
#include <assert.h>  /* assert() */
#include <stdbool.h> /* bool */
#include <time.h>    /* time() */
 
#define DEBUG false
#define ACTIONS 4           /* Possible actions: North, South, East, West */
#define EPOCHS 500000       /* Number of learning paths to make*/
#define INIT_ALPHA 0.000001   /* Initial learning rate */
#define MAX_ALPHA 0.000001 // 1.0 
#define INIT_EPSILON 50     /* Exploration rate (out of 100) */
#define THRESHOLD 0.95
#define MAX_TRAIN_STEPS 50  /* Max steps in a single path in train */
#define MAX_TEST_STEPS 20   /* Max steps in a single path in test */
#define ERROR_EPS 1e-5      /* -16 */
 
char GRID[8][9];
double PROBABILITIES[ACTIONS];
int PATH_COUNT[ACTIONS];
 
int N, M;
int FINISHED = 0;
 
/* Move dir:    N   S  E   W */
int X_MOVE[] = { 0, 0, 1, -1};
int Y_MOVE[] = {-1, 1, 0,  0};
 
int in_grid(int cur_x, int cur_y);
double move_recurs(int cur_x, int cur_y, int finish_x, int finish_y, int steps, double prob);
 
int main() {
 
    int i, j, k;
    int T;
    time_t t;
    int start_x, start_y;
    int finish_x, finish_y;
    int cur_x, cur_y;
 
    int rand_num;
    int steps, move_idx;
    double penalty, penalty_sum;
    double reward, max_reward;
    double tmp;
    double expected_time;
 
    double alpha;
    int epsilon;
 
    bool prob_normalized = false;
 
    /* Intialize random number generator */
    srand((unsigned)time(&t));
 
    /* Debug */
    /*
    for (i = 0; i < 5; i++) {
 
        printf("%d\n", rand() % 100);
 
    }
    */
 
    scanf("%d", &T);
    for (i = 0; i < T; i++) {
 
        /* Read a test case */
        scanf("%d %d", &N, &M);
        for (j = 0; j < N; j++) {
 
            scanf("%s", GRID[j]);
            // printf(">>> %s\n", GRID[j]);
 
            /* Find start & finish coordinates */
            for (k = 0; k < M; k++) {
 
                if (GRID[j][k] == '@') {
 
                    start_x = k;
                    start_y = j;
 
                } else if (GRID[j][k] == '*') {
 
                    finish_x = k;
                    finish_y = j;
 
                }
 
            }
 
            /* Start == Finish */
            if ((start_x == finish_x) & (start_y == finish_y)) return 0;
 
        }
 
        /* Debug */
        if (DEBUG) printf("[%d, %d] (%d, %d) ---> (%d, %d)\n", N, M, start_y, start_x, finish_y, finish_x);
 
        /* Init probabilities */
        for (j = 0; j < ACTIONS; j++) {
 
            PROBABILITIES[j] = 0;
 
        }
 
        alpha = INIT_ALPHA;
        epsilon = INIT_EPSILON;
 
        /* Q-learning: learning optimal probabilities of moves */
        for (j = 0; j < 2 * EPOCHS; j++) {
 
            /* Reset path count */
            for (k = 0; k < ACTIONS; k++) PATH_COUNT[k] = 0;
            steps = 0;
 
            cur_x = start_x;
            cur_y = start_y;
 
            if (DEBUG) printf("Moves: ");
 
            /* Walk while end is not reached or max steps taken */
            while (steps <= MAX_TRAIN_STEPS) {
 
                rand_num = rand() % 100;
 
                /* Exploration move */
                /* Epsilon decay: INIT_EPSILON * (EPOCHS - j) */
                if (rand_num <= epsilon) {
 
                    rand_num = rand() % 100;
 
                    /* North move */
                    if (rand_num < 25) {
 
                        move_idx = 0;
 
                    /* South move */
                    } else if ((rand_num >= 25) && (rand_num < 50)) {
 
                        move_idx = 1;
 
                    /* East move */
                    } else if ((rand_num >= 50) && (rand_num < 75)) {
 
                        move_idx = 2;
 
                    /* West move */
                    } else if (rand_num >= 75) {
 
                        move_idx = 3;
 
                    }
 
                /* Exploitation move */
                } else {
 
                    /* Find move with max reward */
                    /*
                    move_idx = -1;
                    max_reward = -1;
                    for (k = 0; k < 4; k++) {
 
                        if (PROBABILITIES[k] >= max_reward) {
 
                            max_reward = PROBABILITIES[k];
                            move_idx = k;
 
                        }
 
                    }
                    assert(move_idx >= 0);
                    */
 
                    rand_num = rand() % 100;
 
                    /* Sample move from probabilities distribution */
 
                    tmp = 100 / (PROBABILITIES[0] + PROBABILITIES[1] + PROBABILITIES[2] + PROBABILITIES[3]);
                    if (rand_num < (int)(tmp * PROBABILITIES[0])) {
 
                        move_idx = 0;
 
                    } else if ((rand_num >= (int)(tmp * PROBABILITIES[0])) && 
                        (rand_num < (int)(tmp * (PROBABILITIES[0] + PROBABILITIES[1])))) {
 
                        move_idx = 1;
 
                    } else if ((rand_num >= (int)(tmp * (PROBABILITIES[0] + PROBABILITIES[1]))) && 
                        (rand_num < (int)(tmp * (PROBABILITIES[0] + PROBABILITIES[1] + PROBABILITIES[2])))) {
 
                        move_idx = 2;
 
                    } else if (rand_num >= (int)(tmp * (PROBABILITIES[0] + PROBABILITIES[1] + PROBABILITIES[2]))) {
 
                        move_idx = 3;
 
                    }
 
                }
 
                if (DEBUG) printf("%d", move_idx);
 
                /* Move if it's valid */
                if ((in_grid(cur_x + X_MOVE[move_idx], cur_y + Y_MOVE[move_idx])) && 
                    (GRID[cur_y + Y_MOVE[move_idx]][cur_x + X_MOVE[move_idx]] != '#')) {
 
                    cur_x += X_MOVE[move_idx];
                    cur_y += Y_MOVE[move_idx];
 
                }
 
                /* Increment taken step count & total steps taken */
                PATH_COUNT[move_idx]++;
                steps++;
 
                /* Check if end is reached */
                if ((cur_x == finish_x) && (cur_y == finish_y)) {
 
                    break;
 
                }
 
            }
 
            if (DEBUG) printf("\n");
            // printf("[Epoch=%d]", j);
 
            /* Update optimal probabilities */
            for (k = 0; k < ACTIONS; k++) {
 
                reward = PATH_COUNT[k] * (1.0 / steps) / steps;
                PROBABILITIES[k] = (1 - alpha) * PROBABILITIES[k] + alpha * reward;
                if (j == EPOCHS - 1) printf("%.16lf (%d)\n", PROBABILITIES[k], PATH_COUNT[k]);
                // printf(" %d", PATH_COUNT[k]);
 
            }
 
            /* Update hyperparameters */
            //alpha += (MAX_ALPHA - INIT_ALPHA);
            alpha = INIT_ALPHA + (j / EPOCHS) * (MAX_ALPHA - INIT_ALPHA);
            epsilon = (int)(INIT_EPSILON * (EPOCHS - j) / EPOCHS);
 
            // printf("\n");
 
        }
 
        /* Threshold: if some move probability greater than threshold, set this probability to 1 */
        for (j = 0; j < ACTIONS; j++) {
 
            printf("%.16lf ? %.16lf\n", PROBABILITIES[j], THRESHOLD);
            if (PROBABILITIES[j] >= THRESHOLD) {
 
                PROBABILITIES[j] = 1.0;
                for (k = 0; k < j; k++) PROBABILITIES[k] = 0.0;
                for (k = j + 1; k < ACTIONS; k++) PROBABILITIES[k] = 0.0;
                prob_normalized = true;
 
            }
 
        }
 
        /* Normalize probabilities */
        if (!prob_normalized) {
 
            tmp = 0;
            for (j = 0; j < ACTIONS; j++) tmp += PROBABILITIES[j];
 
            for (j = 0; j < ACTIONS; j++) PROBABILITIES[j] /= tmp;
 
        }
 
        /*
        PROBABILITIES[0] = 0.0026367051954495;
        PROBABILITIES[1] = 0.5808374592488428;
        PROBABILITIES[2] = 0.3932525182143020;
        PROBABILITIES[3] = 0.0232733173414058;
        */
 
        /*
        PROBABILITIES[0] = 0.0000000000000000;
        PROBABILITIES[1] = 0.6057445290748531;
        PROBABILITIES[2] = 0.3784352058231522;
        PROBABILITIES[3] = 0.0158202651019948;
        */
 
        /* Debug */
        printf("Normalized probabilities: %.16lf %.16lf %.16lf %.16lf\n", PROBABILITIES[0], PROBABILITIES[1],
                                                            PROBABILITIES[2], PROBABILITIES[3]);
 
        expected_time = 0;
 
        /* Calculate expectancy time to get to finish given optimal probabilities */
        for (j = 0; j < ACTIONS; j++) {
 
            cur_x = start_x;
            cur_y = start_y;
 
            if (PROBABILITIES[j] > ERROR_EPS) {
 
                /* Move if it's valid */
                if ((in_grid(cur_x + X_MOVE[j], cur_y + Y_MOVE[j])) && 
                    (GRID[cur_y + Y_MOVE[j]][cur_x + X_MOVE[j]] != '#')) {
 
                    cur_x += X_MOVE[j];
                    cur_y += Y_MOVE[j];
 
                }
 
                printf("j=%d %d %d -(%lf)-> %d %d\n", j, cur_x, cur_y, PROBABILITIES[j], finish_x, finish_y);
                expected_time += move_recurs(cur_x, cur_y, finish_x, finish_y, 1, PROBABILITIES[j]);
 
            }
 
        }
 
        printf("%.16lf\n", expected_time);
 
        printf("Finished: %d", FINISHED);
 
    }
 
    return 0;
 
}
 
/* Check if positioned in grid */
int in_grid(int cur_x, int cur_y) {
    // 1 0
    // n = 1 m = 2
 
    //printf("%d %d %d %d", cur_x, cur_y, N, M);
    int res = (0 <= cur_x) && (cur_x < M) && (0 <= cur_y) && (cur_y < N);
    //printf(" %d\n", res);
    return res;
 
}
 
double move_recurs(int cur_x, int cur_y, int finish_x, int finish_y, int steps, double prob) {
 
    int j;
    double expected_time = 0.0;
 
    /* Max steps taken */
    /*
    if (steps > MAX_TEST_STEPS) {
 
        return 0;
 
    }
    */
 
    /* Expectancy is too low to continue search */
    if (prob * steps < ERROR_EPS) {
 
        //printf("%.16lf < %.16lf\n", prob * steps < ERROR_EPS);
        return prob * steps;
 
    }
 
    /* Check if end is reached */
    if ((cur_x == finish_x) && (cur_y == finish_y)) {
 
        //printf("Valid sub-expectancy: %.16lf\n", prob * steps);
        FINISHED++;
        return prob * steps;
 
    }
 
    /* Calculate expectancy time to get to finish given optimal probabilities */
    for (j = 0; j < ACTIONS; j++) {
 
        if (PROBABILITIES[j] > ERROR_EPS) {
 
            /* Move if it's valid */
            if ((in_grid(cur_x + X_MOVE[j], cur_y + Y_MOVE[j])) && 
                (GRID[cur_y + Y_MOVE[j]][cur_x + X_MOVE[j]] != '#')) {
 
                cur_x += X_MOVE[j];
                cur_y += Y_MOVE[j];
 
            }
 
            expected_time += move_recurs(cur_x, cur_y, finish_x, finish_y, steps + 1, prob * PROBABILITIES[j]);
 
        }
 
    }
 
    return expected_time;
 
}