Ofer Dekel, Ambuj Tewari, Raman Arora
Online Bandit Learning against an Adaptive Adversary: from Regret to Policy Regret
ICML, 2012.
@inproceedings{ICML-2012-DekelTA, author = "Ofer Dekel and Ambuj Tewari and Raman Arora", booktitle = "{Proceedings of the 29th International Conference on Machine Learning}", ee = "http://icml.cc/2012/papers/749.pdf", pages = "227", publisher = "{icml.cc / Omnipress}", title = "{Online Bandit Learning against an Adaptive Adversary: from Regret to Policy Regret}", year = 2012, }