author = {Edouard Klein and Matthieu Geist and Bilal PIOT and Olivier Pietquin},
title = {Inverse Reinforcement Learning through Structured Classification},
year = {2012},
booktitle = {Advances in Neural Information Processing Systems (NIPS 2012)},
month = {December},
address = {Lake Tahoe (NV, USA)},
url = {http://papers.nips.cc/paper/4551-inverse-reinforcement-learning-through-structured-classification.pdf},
abstract = {This paper adresses the inverse reinforcement learning (IRL) problem, that is inferring a reward for which a demonstrated expert behavior is optimal. We introduce a new algorithm, SCIRL, whose principle is to use the so-called feature expectation of the expert as the parameterization of the score function of a multiclass classifier. This approach produces a reward function for which the expert policy is provably near- optimal. Contrary to most of existing IRL algorithms, SCIRL does not require solving a single time the direct RL problem. Moreover, up to the use of some heuristic, it may work with only trajectories sampled according to the expert behavior. This is illustrated on a car driving simulator.}