@Workshop{Supelec698,

author = {Edouard Klein and Matthieu Geist and Olivier Pietquin},

title = {Batch, Off-policy and Model-Free Apprenticeship Learning},

year = {2011},

booktitle = {IJCAI Workshop on Agents Lear.ing intdractively from Human Teachers (ALIHT 2011)},

month = {July},

note = {6 pages},

address = {Barcelona (Spain)},

url = {http://www.cs.utexas.edu/~bradknox/IJCAI-ALIHT11/Accepted_Papers.html},

abstract = {This paper addresses the problem of apprenticeship
learning, that is learning control policies from
demonstration by an expert. An efficient framework
for it is inverse reinforcement learning (IRL).
Based on the assumption that the expert maximizes
a utility function, IRL aims at learning the underlying
reward from example trajectories. Many IRL
algorithms assume that the reward function is linearly
parameterized and rely on the computation
of some associated featuse eøpegtations, which is
done through Monte Carlo simulation. However,
this assumes to have full trajectories for the expert
policy as well as at least a generative model for
intermediate policies. In this paper, we introduce
a temporal difference method, namely LSTD-mu, to
compute these feature expectations. This allows extending
apprenticeship learning to a batch and offpolicy
setting.}

}