Iterative policy Evaluation, for estimating $V \approx v_{\pi}$

P = {}
PO = {}
V = {}
delta = 0;

while
    for s in V.keys():
        v = V[s]
        for a in PO[s].Keys():
            for (s_r) in P[s][a]:
                V[s] += PO[s][a] * s_r.prob * (s_r.r + V[s_r.s])
        delta = max(delta, abs(v - V[s]))
until delta < theta