deep irl by c language

9
จಡձ

Upload: masato-nakai

Post on 22-Jan-2018

313 views

Category:

Data & Analytics


0 download

TRANSCRIPT

!"#$%$ &'()*+,- .//+ 0'1/)2/ 3/#'4*)5/$/'( 6/!)'#'7

!"#$%&'()*

!"

+!$,!-. /0 )'12

!"# $%&'(& !&)$*+',&-&$. "&/'$)$01

!"#$%&

!"#$%& ' ()* !+,"%

! "

!"

!

τ cτ(θ)

pθ(τ) =1

Z(θ)exp (−cτ (θ))

rτ (θ) = −cτ (θ)

rτ (θ) =1

N

N∑

t=0

rst,at(θ)

τ =

(

s1, s2, · · · , sNa1, a2, · · · , aN

)

cτ (θ) = −rτ (θ)st t s

at t a

τ

! "

!"

!

τ cτ (θ)rτ (θ) = −cτ (θ)

pθ(τ) =1

Z(θ)exp (−cτ (θ)) =

1

Z(θ)exp(rτ (θ))

!"#$%& '() (* '( ()

! "

!"

L(θ) = log p(τ(s, a)|c) = log1

Z(θ)exp

1

N

N∑

s,a

rs,a(θ)

!"

=1

N

N∑

s,a

rs,a(θ)− logZ(θ) #"

$"

θ

∂L(θ)

∂θ=

1

N

s,a

∂θrs,a(θ)

−∂

∂θlogZ(θ) %"

logZ(θ)

logZ(θ) =∑

θ

1

Nlog

s,a

exp rs,a(θ)p(θ) =∑

θ

1

N

s,a

rs,a(θ)p(θ) &"

∂θlogZ(θ) =

θ

1

N

s,a

∂θrs,a(θ)p(θ) = Eθ

1

N

s,a

∂θrs,a(θ) '"

! "

!"

rs,a(θ) = θT f(s, a) θ f(s, a)

∂θrs,a(θi) = f(s, a) !"

∂L(θ)

∂θ=

1

N

s,a

f(s, a)

1

N

s,a

Eθf(s, a)

#"

= Es,af(s, a)− Es,a[f̂(s, a)] f̂(s, a) = Eθf(s, a) $"

θ

! "

!"#$%& '()*+,(*-!.

'()*+,(*

'()*+,(*-!.

/#0(,!1.

23 23

!"#$%& '()%** +$!,-.$*, /#0%#,. 1%23$,0 45 6+7

! "

!"

!"

reward+ = α ∗ ∂L∂θ

!"#$%& '()%** +$!,-.$*, /#0%#, 1%23$,0 45 6+7

! "

!!" #!$%&' #!(

Network.grad = ∂L∂θ

!"#$%& '()%** +$!,-.$*, /#0%#, 1%23$,0 45 677

!