Iterative policy Evaluation, for estimating $V \approx v_{\pi}$
P = {}
PO = {}
V = {}
delta = 0;
while
for s in V.keys():
v = V[s]
for a in PO[s].Keys():
for (s_r) in P[s][a]:
V[s] += PO[s][a] * s_r.prob * (s_r.r + V[s_r.s])
delta = max(delta, abs(v - V[s]))
until delta < theta