author = {Bilal PIOT and Matthieu Geist and Olivier Pietquin},
title = {Boosted and Reward-regularized Classification for Apprenticeship Learning},
year = {2014},
booktitle = {13th International Conference on Autonomous Agents and Multiagent Systems (AAMAS 2014)},
note = {(accepted, to appear)},
address = {Paris, France},
url = {http://www.metz.supelec.fr//metz/personnel/geist_mat/pdfs/supelec874.pdf},
abstract = {This paper deals with the problem of learning from demonstrations, where an agent called the apprentice tries to learn a behavior from demonstrations of another agent called the expert. To address this problem, we place ourselves into the Markov Decision Process (MDP) framework, which is well suited for sequential decision making problems. A way to tackle this problem is to reduce it to classi cation but doing so we do not take into account the MDP structure. Other methods which take into account the MDP structure need to solve MDPs which is a dicult task and/or need a choice of features which is problem-dependent. The main contribution of the paper is to extend a large margin approach, which is a classi cation method, by adding a regularization term which takes into account the MDP structure. The derived algorithm, called Reward-regularized Classi cation for Apprenticeship Learning (RCAL), does not need to solve MDPs. But, the major advantage is that it can be boosted: this avoids the choice of features, which is a drawback of parametric approaches. A state of the art experiment (Highway) and generic experiments (structured Garnets) are conducted to show the performance of RCAL compared to algorithms from the literature.}