@article{200266, keywords = {machine learning, multi-fidelity data, multi-resolution simulation}, author = {Seungjoon Lee and Felix Dietrich and George E. Karniadakis and IoannisG. Kevrekidis}, title = {Linking Gaussian process regression with data-driven manifold embeddings for nonlinear data fusion}, abstract = { In statistical modelling with Gaussian process regression, it has been shown that combining (few) high-fidelity data with (many) low-fidelity data can enhance prediction accuracy, compared to prediction based on the few high-fidelity data only. Such information fusion techniques for multi-fidelity data commonly approach the high-fidelity model fh(t) as a function of two variables (t, s), and then use fl(t) as the s data. More generally, the high-fidelity model can be written as a function of several variables (t, s1, s2{\textellipsis}.); the low-fidelity model fl and, say, some of its derivatives can then be substituted for these variables. In this paper, we will explore mathematical algorithms for multi-fidelity information fusion that use such an approach towards improving the representation of the high-fidelity function with only a few training data points. Given that fh may not be a simple function-and sometimes not even a function-of fl, we demonstrate that using additional functions of t, such as derivatives or shifts of fl, can drastically improve the approximation of fh through Gaussian processes. We also point out a connection with {\textquoteright}embedology{\textquoteright} techniques from topology and dynamical systems. Our illustrative examples range from instructive caricatures to computational biology models, such as Hodgkin-Huxley neural oscillations. }, year = {2019}, journal = {Interface Focus}, volume = {9}, number = {3}, pages = {20180083}, isbn = {2042-8898}, url = {https://doi.org/10.1098/rsfs.2018.0083}, language = {eng}, }