deep irl by c language
TRANSCRIPT
!"
!
τ cτ(θ)
pθ(τ) =1
Z(θ)exp (−cτ (θ))
rτ (θ) = −cτ (θ)
rτ (θ) =1
N
N∑
t=0
rst,at(θ)
τ =
(
s1, s2, · · · , sNa1, a2, · · · , aN
)
cτ (θ) = −rτ (θ)st t s
at t a
τ
! "
!"
!
τ cτ (θ)rτ (θ) = −cτ (θ)
pθ(τ) =1
Z(θ)exp (−cτ (θ)) =
1
Z(θ)exp(rτ (θ))
!"#$%& '() (* '( ()
! "
!"
L(θ) = log p(τ(s, a)|c) = log1
Z(θ)exp
1
N
N∑
s,a
rs,a(θ)
!"
=1
N
N∑
s,a
rs,a(θ)− logZ(θ) #"
$"
θ
∂L(θ)
∂θ=
1
N
∑
s,a
∂
∂θrs,a(θ)
−∂
∂θlogZ(θ) %"
logZ(θ)
logZ(θ) =∑
θ
1
Nlog
∑
s,a
exp rs,a(θ)p(θ) =∑
θ
1
N
∑
s,a
rs,a(θ)p(θ) &"
∂
∂θlogZ(θ) =
∑
θ
1
N
∑
s,a
∂
∂θrs,a(θ)p(θ) = Eθ
1
N
∑
s,a
∂
∂θrs,a(θ) '"
! "
!"
rs,a(θ) = θT f(s, a) θ f(s, a)
∂
∂θrs,a(θi) = f(s, a) !"
∂L(θ)
∂θ=
1
N
∑
s,a
f(s, a)
−
1
N
∑
s,a
Eθf(s, a)
#"
= Es,af(s, a)− Es,a[f̂(s, a)] f̂(s, a) = Eθf(s, a) $"
θ
! "
!"#$%& '()*+,(*-!.
'()*+,(*
'()*+,(*-!.
/#0(,!1.
23 23
!"#$%& '()%** +$!,-.$*, /#0%#,. 1%23$,0 45 6+7
! "