@inproceedings{lloyd2015a,
title = {Statistical Model Criticism using Kernel Two Sample Tests},
author = {James Robert Lloyd and Zoubin Ghahramani},
year = {2015},
location = {Montreal, Canada},
booktitle = {Advances in Neural Information Processing Systems~28},
pages = {829--837},
editor = {Corinna Cortes and Neil D. Lawrence and Daniel D. Lee and Masashi Sugiyama and Roman Garnett},
publisher = {Curran Associates, Inc.},
abstract = {
We propose an exploratory approach to statistical model
criticism using maximum mean discrepancy (MMD) two sample
tests. Typical approaches to model criticism require a
practitioner to select a statistic by which to measure
discrepancies between data and a statistical model. MMD two
sample tests are instead constructed as an analytic
maximisation over a large space of possible statistics and
therefore automatically select the statistic which most shows
any discrepancy. We demonstrate on synthetic data that the
selected statistic, called the witness function, can be used to
identify where a statistical model most misrepresents the data
it was trained on. We then apply the procedure to real data
where the models being assessed are restricted Boltzmann
machines, deep belief networks and Gaussian process regression
and demonstrate the ways in which these models fail to capture
the properties of the data they are trained on.
},
}