-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathmain.cpp
More file actions
77 lines (71 loc) · 1.78 KB
/
main.cpp
File metadata and controls
77 lines (71 loc) · 1.78 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
#include <cmath>
#include <matplot/matplot.h>
#include <vector>
using namespace matplot;
int reward(int money) {
if (money >= 100)
return 1;
else
return 0;
}
double at_or0(const std::vector<double> &V, int idx) {
return (0 <= idx && idx < int(V.size()) ? V[idx] : 0.0);
}
int main() {
int n = 99;
double gamma = 1;
double p_h = 0.4;
// Value State array
std::vector<double> V(n, 0);
std::vector<double> nextV(n, 0);
std::vector<int> policy(n, 0);
double diff = 100;
double epsdiff = 0.0000000001;
int iterations = 0;
while (diff > epsdiff) {
iterations++;
for (int i = 0; i < n; i++) {
// Loop over V
// Amount of money is i + 1
double max_seen = -1;
for (int a = 1; a <= std::min(i + 1, n + 1 - (i + 1)); a++) {
// For every possible action, take max reward
double temp = p_h * (reward(a + i + 1) + gamma * at_or0(V, i + a)) +
(1 - p_h) * (gamma * at_or0(V, i - a));
if (temp > max_seen) {
max_seen = temp;
policy[i] = a;
}
}
nextV[i] = max_seen;
}
std::swap(V, nextV);
// Evaluate Performance difference
double max_diff = 0;
for (int i = 0; i < n; i++) {
max_diff = std::max(max_diff, std::abs(V[i] - nextV[i]));
}
diff = max_diff;
}
// Plot
std::vector<int> x(n);
for (int i = 0; i < n; i++) {
x[i] = i + 1;
}
std::cout << iterations;
auto f1 = figure(true);
auto ax1 = gca();
plot(x, V)->line_width(2);
ylabel("Value Function");
xlabel("Capital");
auto f2 = figure(true);
auto ax2 = gca();
plot(x, policy, "-xr");
xlabel("Capital");
ylabel("Bet Size");
title("Policy");
figure(f1); save("value_function.png");
figure(f2); save("policy.png");
show();
return 0;
}