@article{kutz_deep_2017,
title = {Deep learning in fluid dynamics},
volume = {814},
issn = {0022-1120, 1469-7645},
url = {https://www.cambridge.org/core/journals/journal-of-fluid-mechanics/article/deep-learning-in-fluid-dynamics/F2EDDAB89563DE5157FC4B8342AD9C70},
doi = {10.1017/jfm.2016.803},
abstract = {It was only a matter of time before deep neural networks (DNNs) – deep learning – made their mark in turbulence modelling, or more broadly, in the general area of high-dimensional, complex dynamical systems. In the last decade, DNNs have become a dominant data mining tool for big data applications. Although neural networks have been applied previously to complex fluid flows, the article featured here (Ling et al., J. Fluid Mech., vol. 807, 2016, pp. 155–166) is the first to apply a true DNN architecture, specifically to Reynolds averaged Navier Stokes turbulence models. As one often expects with modern DNNs, performance gains are achieved over competing state-of-the-art methods, suggesting that DNNs may play a critically enabling role in the future of modelling complex flows.},
language = {en},
urldate = {2021-10-18},
journal = {Journal of Fluid Mechanics},
author = {Kutz, J. Nathan},
month = mar,
year = {2017},
note = {Publisher: Cambridge University Press},
keywords = {computational methods, low-dimensional models, turbulence modelling},
pages = {1--4},
}
@article{lu_deeponet_2020,
title = {{DeepONet}: {Learning} nonlinear operators for identifying differential equations based on the universal approximation theorem of operators},
shorttitle = {{DeepONet}},
url = {http://arxiv.org/abs/1910.03193},
abstract = {While it is widely known that neural networks are universal approximators of continuous functions, a less known and perhaps more powerful result is that a neural network with a single hidden layer can approximate accurately any nonlinear continuous operator. This universal approximation theorem is suggestive of the potential application of neural networks in learning nonlinear operators from data. However, the theorem guarantees only a small approximation error for a sufficient large network, and does not consider the important optimization and generalization errors. To realize this theorem in practice, we propose deep operator networks (DeepONets) to learn operators accurately and efficiently from a relatively small dataset. A DeepONet consists of two sub-networks, one for encoding the input function at a fixed number of sensors \$x\_i, i=1,{\textbackslash}dots,m\$ (branch net), and another for encoding the locations for the output functions (trunk net). We perform systematic simulations for identifying two types of operators, i.e., dynamic systems and partial differential equations, and demonstrate that DeepONet significantly reduces the generalization error compared to the fully-connected networks. We also derive theoretically the dependence of the approximation error in terms of the number of sensors (where the input function is defined) as well as the input function type, and we verify the theorem with computational results. More importantly, we observe high-order error convergence in our computational tests, namely polynomial rates (from half order to fourth order) and even exponential convergence with respect to the training dataset size.},
urldate = {2021-10-05},
journal = {arXiv:1910.03193 [cs, stat]},
author = {Lu, Lu and Jin, Pengzhan and Karniadakis, George Em},
month = apr,
year = {2020},
note = {00074
arXiv: 1910.03193},
keywords = {Computer Science - Machine Learning, Statistics - Machine Learning},
}
@article{li_fourier_2021,
title = {Fourier {Neural} {Operator} for {Parametric} {Partial} {Differential} {Equations}},
url = {http://arxiv.org/abs/2010.08895},
abstract = {The classical development of neural networks has primarily focused on learning mappings between finite-dimensional Euclidean spaces. Recently, this has been generalized to neural operators that learn mappings between function spaces. For partial differential equations (PDEs), neural operators directly learn the mapping from any functional parametric dependence to the solution. Thus, they learn an entire family of PDEs, in contrast to classical methods which solve one instance of the equation. In this work, we formulate a new neural operator by parameterizing the integral kernel directly in Fourier space, allowing for an expressive and efficient architecture. We perform experiments on Burgers' equation, Darcy flow, and Navier-Stokes equation. The Fourier neural operator is the first ML-based method to successfully model turbulent flows with zero-shot super-resolution. It is up to three orders of magnitude faster compared to traditional PDE solvers. Additionally, it achieves superior accuracy compared to previous learning-based solvers under fixed resolution.},
urldate = {2021-10-05},
journal = {arXiv:2010.08895 [cs, math]},
author = {Li, Zongyi and Kovachki, Nikola and Azizzadenesheli, Kamyar and Liu, Burigede and Bhattacharya, Kaushik and Stuart, Andrew and Anandkumar, Anima},
month = may,
year = {2021},
note = {00092
arXiv: 2010.08895
version: 3},
keywords = {Computer Science - Machine Learning, Mathematics - Numerical Analysis},
}
@article{Thuerey2021,
title = {Physics-based {Deep} {Learning}},
url = {http://arxiv.org/abs/2109.05237},
abstract = {This digital book contains a practical and comprehensive introduction of everything related to deep learning in the context of physical simulations. As much as possible, all topics come with hands-on code examples in the form of Jupyter notebooks to quickly get started. Beyond standard supervised learning from data, we'll look at physical loss constraints, more tightly coupled learning algorithms with differentiable simulations, as well as reinforcement learning and uncertainty modeling. We live in exciting times: these methods have a huge potential to fundamentally change what computer simulations can achieve.},
urldate = {2021-09-30},
journal = {arXiv:2109.05237 [physics]},
author = {Thuerey, Nils and Holl, Philipp and Mueller, Maximilian and Schnell, Patrick and Trost, Felix and Um, Kiwon},
month = sep,
year = {2021},
note = {00001
arXiv: 2109.05237},
keywords = {Computer Science - Machine Learning, Physics - Computational Physics},
}
@inproceedings{Kristiadi2020,
title = {Being {Bayesian}, {Even} {Just} a {Bit}, {Fixes} {Overconfidence} in {ReLU} {Networks}},
url = {https://proceedings.mlr.press/v119/kristiadi20a.html},
language = {en},
urldate = {2021-09-20},
booktitle = {International {Conference} on {Machine} {Learning}},
publisher = {PMLR},
author = {Kristiadi, Agustinus and Hein, Matthias and Hennig, Philipp},
month = nov,
year = {2020},
note = {00039
ISSN: 2640-3498},
pages = {5436--5446},
}
@article{Rubanova2019,
title = {Latent {ODEs} for {Irregularly}-{Sampled} {Time} {Series}},
url = {http://arxiv.org/abs/1907.03907},
abstract = {Time series with non-uniform intervals occur in many applications, and are difficult to model using standard recurrent neural networks (RNNs). We generalize RNNs to have continuous-time hidden dynamics defined by ordinary differential equations (ODEs), a model we call ODE-RNNs. Furthermore, we use ODE-RNNs to replace the recognition network of the recently-proposed Latent ODE model. Both ODE-RNNs and Latent ODEs can naturally handle arbitrary time gaps between observations, and can explicitly model the probability of observation times using Poisson processes. We show experimentally that these ODE-based models outperform their RNN-based counterparts on irregularly-sampled data.},
urldate = {2021-09-28},
journal = {arXiv:1907.03907 [cs, stat]},
author = {Rubanova, Yulia and Chen, Ricky T. Q. and Duvenaud, David},
month = jul,
year = {2019},
note = {00227
arXiv: 1907.03907},
keywords = {Computer Science - Machine Learning, Statistics - Machine Learning},
}
@article{Krishnan2016,
title = {Structured {Inference} {Networks} for {Nonlinear} {State} {Space} {Models}},
url = {http://arxiv.org/abs/1609.09869},
abstract = {Gaussian state space models have been used for decades as generative models of sequential data. They admit an intuitive probabilistic interpretation, have a simple functional form, and enjoy widespread adoption. We introduce a unified algorithm to efficiently learn a broad class of linear and non-linear state space models, including variants where the emission and transition distributions are modeled by deep neural networks. Our learning algorithm simultaneously learns a compiled inference network and the generative model, leveraging a structured variational approximation parameterized by recurrent neural networks to mimic the posterior distribution. We apply the learning algorithm to both synthetic and real-world datasets, demonstrating its scalability and versatility. We find that using the structured approximation to the posterior results in models with significantly higher held-out likelihood.},
urldate = {2021-09-27},
journal = {arXiv:1609.09869 [cs, stat]},
author = {Krishnan, Rahul G. and Shalit, Uri and Sontag, David},
month = dec,
year = {2016},
note = {00281
arXiv: 1609.09869},
keywords = {Computer Science - Artificial Intelligence, Computer Science - Machine Learning, Statistics - Machine Learning},
}
@article{Krishnan2015,
title = {Deep {Kalman} {Filters}},
url = {http://arxiv.org/abs/1511.05121},
abstract = {Kalman Filters are one of the most influential models of time-varying phenomena. They admit an intuitive probabilistic interpretation, have a simple functional form, and enjoy widespread adoption in a variety of disciplines. Motivated by recent variational methods for learning deep generative models, we introduce a unified algorithm to efficiently learn a broad spectrum of Kalman filters. Of particular interest is the use of temporal generative models for counterfactual inference. We investigate the efficacy of such models for counterfactual inference, and to that end we introduce the "Healing MNIST" dataset where long-term structure, noise and actions are applied to sequences of digits. We show the efficacy of our method for modeling this dataset. We further show how our model can be used for counterfactual inference for patients, based on electronic health record data of 8,000 patients over 4.5 years.},
urldate = {2021-07-19},
journal = {arXiv:1511.05121 [cs, stat]},
author = {Krishnan, Rahul G. and Shalit, Uri and Sontag, David},
month = nov,
year = {2015},
note = {00208
arXiv: 1511.05121},
keywords = {Computer Science - Machine Learning, Statistics - Machine Learning},
}
@article{wu_deep_2019,
title = {Deep {Generative} {Markov} {State} {Models}},
url = {http://arxiv.org/abs/1805.07601},
abstract = {We propose a deep generative Markov State Model (DeepGenMSM) learning framework for inference of metastable dynamical systems and prediction of trajectories. After unsupervised training on time series data, the model contains (i) a probabilistic encoder that maps from high-dimensional configuration space to a small-sized vector indicating the membership to metastable (long-lived) states, (ii) a Markov chain that governs the transitions between metastable states and facilitates analysis of the long-time dynamics, and (iii) a generative part that samples the conditional distribution of configurations in the next time step. The model can be operated in a recursive fashion to generate trajectories to predict the system evolution from a defined starting state and propose new configurations. The DeepGenMSM is demonstrated to provide accurate estimates of the long-time kinetics and generate valid distributions for molecular dynamics (MD) benchmark systems. Remarkably, we show that DeepGenMSMs are able to make long time-steps in molecular configuration space and generate physically realistic structures in regions that were not seen in training data.},
urldate = {2021-09-27},
journal = {arXiv:1805.07601 [physics, stat]},
author = {Wu, Hao and Mardt, Andreas and Pasquali, Luca and Noe, Frank},
month = jan,
year = {2019},
note = {00034
arXiv: 1805.07601},
keywords = {Computer Science - Machine Learning, Mathematics - Dynamical Systems, Mathematics - Probability, Physics - Data Analysis, Statistics and Probability, Statistics - Machine Learning},
}
@article{ha_hypernetworks_2016,
title = {{HyperNetworks}},
url = {http://arxiv.org/abs/1609.09106},
abstract = {This work explores hypernetworks: an approach of using a one network, also known as a hypernetwork, to generate the weights for another network. Hypernetworks provide an abstraction that is similar to what is found in nature: the relationship between a genotype - the hypernetwork - and a phenotype - the main network. Though they are also reminiscent of HyperNEAT in evolution, our hypernetworks are trained end-to-end with backpropagation and thus are usually faster. The focus of this work is to make hypernetworks useful for deep convolutional networks and long recurrent networks, where hypernetworks can be viewed as relaxed form of weight-sharing across layers. Our main result is that hypernetworks can generate non-shared weights for LSTM and achieve near state-of-the-art results on a variety of sequence modelling tasks including character-level language modelling, handwriting generation and neural machine translation, challenging the weight-sharing paradigm for recurrent networks. Our results also show that hypernetworks applied to convolutional networks still achieve respectable results for image recognition tasks compared to state-of-the-art baseline models while requiring fewer learnable parameters.},
urldate = {2021-09-24},
journal = {arXiv:1609.09106 [cs]},
author = {Ha, David and Dai, Andrew and Le, Quoc V.},
month = dec,
year = {2016},
note = {00676
arXiv: 1609.09106},
keywords = {Computer Science - Machine Learning},
}
@article{chalvidal_go_2021,
title = {Go with the {Flow}: {Adaptive} {Control} for {Neural} {ODEs}},
shorttitle = {Go with the {Flow}},
url = {http://arxiv.org/abs/2006.09545},
abstract = {Despite their elegant formulation and lightweight memory cost, neural ordinary differential equations (NODEs) suffer from known representational limitations. In particular, the single flow learned by NODEs cannot express all homeomorphisms from a given data space to itself, and their static weight parameterization restricts the type of functions they can learn compared to discrete architectures with layer-dependent weights. Here, we describe a new module called neurally controlled ODE (N-CODE) designed to improve the expressivity of NODEs. The parameters of N-CODE modules are dynamic variables governed by a trainable map from initial or current activation state, resulting in forms of open-loop and closed-loop control, respectively. A single module is sufficient for learning a distribution on non-autonomous flows that adaptively drive neural representations. We provide theoretical and empirical evidence that N-CODE circumvents limitations of previous NODEs models and show how increased model expressivity manifests in several supervised and unsupervised learning problems. These favorable empirical results indicate the potential of using data- and activity-dependent plasticity in neural networks across numerous domains.},
urldate = {2021-09-24},
journal = {arXiv:2006.09545 [cs, eess, stat]},
author = {Chalvidal, Mathieu and Ricci, Matthew and VanRullen, Rufin and Serre, Thomas},
month = apr,
year = {2021},
note = {00001
arXiv: 2006.09545},
keywords = {Computer Science - Machine Learning, Electrical Engineering and Systems Science - Systems and Control, Statistics - Machine Learning},
}
@techreport{Abadi2016,
title = {{TensorFlow}: {Large}-{Scale} {Machine} {Learning} on {Heterogeneous} {Distributed} {Systems}},
shorttitle = {{TensorFlow}},
url = {http://arxiv.org/abs/1603.04467},
abstract = {TensorFlow is an interface for expressing machine learning algorithms, and an implementation for executing such algorithms. A computation expressed using TensorFlow can be executed with little or no change on a wide variety of heterogeneous systems, ranging from mobile devices such as phones and tablets up to large-scale distributed systems of hundreds of machines and thousands of computational devices such as GPU cards. The system is flexible and can be used to express a wide variety of algorithms, including training and inference algorithms for deep neural network models, and it has been used for conducting research and for deploying machine learning systems into production across more than a dozen areas of computer science and other fields, including speech recognition, computer vision, robotics, information retrieval, natural language processing, geographic information extraction, and computational drug discovery. This paper describes the TensorFlow interface and an implementation of that interface that we have built at Google. The TensorFlow API and a reference implementation were released as an open-source package under the Apache 2.0 license in November, 2015 and are available at www.tensorflow.org.},
number = {1603.04467},
urldate = {2020-09-20},
author = {Abadi, Martín and Agarwal, Ashish and Barham, Paul and Brevdo, Eugene and Chen, Zhifeng and Citro, Craig and Corrado, Greg S. and Davis, Andy and Dean, Jeffrey and Devin, Matthieu and Ghemawat, Sanjay and Goodfellow, Ian and Harp, Andrew and Irving, Geoffrey and Isard, Michael and Jia, Yangqing and Jozefowicz, Rafal and Kaiser, Lukasz and Kudlur, Manjunath and Levenberg, Josh and Mane, Dan and Monga, Rajat and Moore, Sherry and Murray, Derek and Olah, Chris and Schuster, Mike and Shlens, Jonathon and Steiner, Benoit and Sutskever, Ilya and Talwar, Kunal and Tucker, Paul and Vanhoucke, Vincent and Vasudevan, Vijay and Viegas, Fernanda and Vinyals, Oriol and Warden, Pete and Wattenberg, Martin and Wicke, Martin and Yu, Yuan and Zheng, Xiaoqiang},
month = mar,
year = {2016},
note = {05405
arXiv: 1603.04467},
keywords = {Computer Science - Distributed, Parallel, and Cluster Computing, Computer Science - Machine Learning},
}
@article{Lee2021,
title = {Parameterized neural ordinary differential equations: applications to computational physics problems},
volume = {477},
shorttitle = {Parameterized neural ordinary differential equations},
url = {https://royalsocietypublishing.org/doi/full/10.1098/rspa.2021.0162},
doi = {10.1098/rspa.2021.0162},
abstract = {This work proposes an extension of neural ordinary differential equations (NODEs) by introducing an additional set of ODE input parameters to NODEs. This extension allows NODEs to learn multiple dynamics specified by the input parameter instances. Our extension is inspired by the concept of parameterized ODEs, which are widely investigated in computational science and engineering contexts, where characteristics of the governing equations vary over the input parameters. We apply the proposed parameterized NODEs (PNODEs) for learning latent dynamics of complex dynamical processes that arise in computational physics, which is an essential component for enabling rapid numerical simulations for time-critical physics applications. For this, we propose an encoder–decoder-type framework, which models latent dynamics as PNODEs. We demonstrate the effectiveness of PNODEs on benchmark problems from computational physics.},
number = {2253},
urldate = {2021-09-22},
journal = {Proceedings of the Royal Society A: Mathematical, Physical and Engineering Sciences},
author = {Lee, Kookjin and Parish, Eric J.},
month = sep,
year = {2021},
note = {00004
Publisher: Royal Society},
keywords = {autoencoders, deep learning, latent-dynamics learning, model reduction, neural ordinary differential equations, nonlinear manifolds},
pages = {20210162},
}
@article{Jiang2014,
title = {Closed-loop verification of medical devices with model abstraction and refinement},
volume = {16},
issn = {1433-2787},
url = {https://doi.org/10.1007/s10009-013-0289-7},
doi = {10.1007/s10009-013-0289-7},
abstract = {The design and implementation of software for medical devices is challenging due to the closed-loop interaction with the patient, which is a stochastic physical environment. The safety-critical nature and the lack of existing industry standards for verification make this an ideal domain for exploring applications of formal modeling and closed-loop analysis. The biggest challenge is that the environment model(s) have to be both complex enough to express the physiological requirements and general enough to cover all possible inputs to the device. In this effort, we use a dual chamber implantable pacemaker as a case study to demonstrate verification of software specifications of medical devices as timed-automata models in UPPAAL. The pacemaker model is based on the specifications and algorithm descriptions from Boston Scientific. The heart is modeled using timed automata based on the physiology of heart. The model is gradually abstracted with timed simulation to preserve properties. A manual Counter-Example-Guided Abstraction and Refinement (CEGAR) framework has been adapted to refine the heart model when spurious counter-examples are found. To demonstrate the closed-loop nature of the problem and heart model refinement, we investigated two clinical cases of Pacemaker Mediated Tachycardia and verified their corresponding correction algorithms in the pacemaker. Along with our tools for code generation from UPPAAL models, this effort enables model-driven design and certification of software for medical devices.},
number = {2},
journal = {International Journal on Software Tools for Technology Transfer},
author = {Jiang, Zhihao and Pajic, Miroslav and Alur, Rajeev and Mangharam, Rahul},
month = apr,
year = {2014},
note = {00065 },
pages = {191--213},
}
@incollection{Pintard2013,
address = {Berlin, Heidelberg},
title = {Fault {Injection} in the {Automotive} {Standard} {ISO} 26262: {An} {Initial} {Approach}},
volume = {7869},
isbn = {978-3-642-38788-3 978-3-642-38789-0},
shorttitle = {Fault {Injection} in the {Automotive} {Standard} {ISO} 26262},
url = {http://link.springer.com/10.1007/978-3-642-38789-0_11},
urldate = {2019-03-05},
booktitle = {Dependable {Computing}},
publisher = {Springer Berlin Heidelberg},
author = {Pintard, Ludovic and Fabre, Jean-Charles and Kanoun, Karama and Leeman, Michel and Roy, Matthieu},
editor = {Hutchison, David and Kanade, Takeo and Kittler, Josef and Kleinberg, Jon M. and Mattern, Friedemann and Mitchell, John C. and Naor, Moni and Nierstrasz, Oscar and Pandu Rangan, C. and Steffen, Bernhard and Sudan, Madhu and Terzopoulos, Demetri and Tygar, Doug and Vardi, Moshe Y. and Weikum, Gerhard and Vieira, Marco and Cunha, João Carlos},
year = {2013},
doi = {10.1007/978-3-642-38789-0_11},
note = {00000 },
pages = {126--133},
}
@inproceedings{Moradi2019,
address = {Berlin, Germany},
title = {Optimizing {Fault} {Injection} in {FMI} {Co}-simulation},
doi = {10.5555/3374138.3374170},
booktitle = {Proceedings of the 2019 {Summer} {Simulation} {Conference}},
publisher = {Society for Computer Simulation International},
author = {Moradi, Mehrdad and Gomes, Cláudio and Oakes, Bentley James and Denil, Joachim},
year = {2019},
note = {00003 },
pages = {12},
}
@article{San2019,
title = {An artificial neural network framework for reduced order modeling of transient flows},
volume = {77},
issn = {10075704},
url = {https://linkinghub.elsevier.com/retrieve/pii/S1007570419301364},
doi = {10.1016/j.cnsns.2019.04.025},
language = {en},
urldate = {2020-09-20},
journal = {Communications in Nonlinear Science and Numerical Simulation},
author = {San, Omer and Maulik, Romit and Ahmed, Mansoor},
month = oct,
year = {2019},
note = {00030},
pages = {271--287},
}
@article{Xu2021,
title = {Infinitely {Deep} {Bayesian} {Neural} {Networks} with {Stochastic} {Differential} {Equations}},
url = {http://arxiv.org/abs/2102.06559},
abstract = {We perform scalable approximate inference in a continuous-depth Bayesian neural network family. In this model class, uncertainty about separate weights in each layer gives hidden units that follow a stochastic differential equation. We demonstrate gradient-based stochastic variational inference in this infinite-parameter setting, producing arbitrarily-flexible approximate posteriors. We also derive a novel gradient estimator that approaches zero variance as the approximate posterior over weights approaches the true posterior. This approach brings continuous-depth Bayesian neural nets to a competitive comparison against discrete-depth alternatives, while inheriting the memory-efficient training and tunable precision of Neural ODEs.},
urldate = {2021-09-20},
journal = {arXiv:2102.06559 [cs, stat]},
author = {Xu, Winnie and Chen, Ricky T. Q. and Li, Xuechen and Duvenaud, David},
month = aug,
year = {2021},
note = {00004
arXiv: 2102.06559},
keywords = {Computer Science - Machine Learning, Statistics - Machine Learning},
}
@article{Hegde2018,
title = {Deep learning with differential {Gaussian} process flows},
url = {http://arxiv.org/abs/1810.04066},
abstract = {We propose a novel deep learning paradigm of differential flows that learn a stochastic differential equation transformations of inputs prior to a standard classification or regression function. The key property of differential Gaussian processes is the warping of inputs through infinitely deep, but infinitesimal, differential fields, that generalise discrete layers into a dynamical system. We demonstrate state-of-the-art results that exceed the performance of deep Gaussian processes and neural networks},
urldate = {2021-09-21},
journal = {arXiv:1810.04066 [cs, stat]},
author = {Hegde, Pashupati and Heinonen, Markus and Lähdesmäki, Harri and Kaski, Samuel},
month = oct,
year = {2018},
note = {00028
arXiv: 1810.04066},
keywords = {Computer Science - Machine Learning, Statistics - Machine Learning},
}
@article{Liu2019,
title = {Neural {SDE}: {Stabilizing} {Neural} {ODE} {Networks} with {Stochastic} {Noise}},
shorttitle = {Neural {SDE}},
url = {http://arxiv.org/abs/1906.02355},
abstract = {Neural Ordinary Differential Equation (Neural ODE) has been proposed as a continuous approximation to the ResNet architecture. Some commonly used regularization mechanisms in discrete neural networks (e.g. dropout, Gaussian noise) are missing in current Neural ODE networks. In this paper, we propose a new continuous neural network framework called Neural Stochastic Differential Equation (Neural SDE) network, which naturally incorporates various commonly used regularization mechanisms based on random noise injection. Our framework can model various types of noise injection frequently used in discrete networks for regularization purpose, such as dropout and additive/multiplicative noise in each block. We provide theoretical analysis explaining the improved robustness of Neural SDE models against input perturbations/adversarial attacks. Furthermore, we demonstrate that the Neural SDE network can achieve better generalization than the Neural ODE and is more resistant to adversarial and non-adversarial input perturbations.},
urldate = {2021-09-21},
journal = {arXiv:1906.02355 [cs, stat]},
author = {Liu, Xuanqing and Xiao, Tesi and Si, Si and Cao, Qin and Kumar, Sanjiv and Hsieh, Cho-Jui},
month = jun,
year = {2019},
note = {00028
arXiv: 1906.02355},
keywords = {Computer Science - Artificial Intelligence, Computer Science - Computer Vision and Pattern Recognition, Computer Science - Machine Learning, Statistics - Machine Learning},
}
@article{Oganesyan2020,
title = {Stochasticity in {Neural} {ODEs}: {An} {Empirical} {Study}},
shorttitle = {Stochasticity in {Neural} {ODEs}},
url = {http://arxiv.org/abs/2002.09779},
abstract = {Stochastic regularization of neural networks (e.g. dropout) is a wide-spread technique in deep learning that allows for better generalization. Despite its success, continuous-time models, such as neural ordinary differential equation (ODE), usually rely on a completely deterministic feed-forward operation. This work provides an empirical study of stochastically regularized neural ODE on several image-classification tasks (CIFAR-10, CIFAR-100, TinyImageNet). Building upon the formalism of stochastic differential equations (SDEs), we demonstrate that neural SDE is able to outperform its deterministic counterpart. Further, we show that data augmentation during the training improves the performance of both deterministic and stochastic versions of the same model. However, the improvements obtained by the data augmentation completely eliminate the empirical gains of the stochastic regularization, making the difference in the performance of neural ODE and neural SDE negligible.},
urldate = {2021-09-21},
journal = {arXiv:2002.09779 [cs, stat]},
author = {Oganesyan, Viktor and Volokhova, Alexandra and Vetrov, Dmitry},
month = jun,
year = {2020},
note = {00007
arXiv: 2002.09779},
keywords = {Computer Science - Machine Learning, Statistics - Machine Learning},
}
@book{Kloeden1992,
title = {Numerical {Solution} of {Stochastic} {Differential} {Equations}},
isbn = {978-3-662-12616-5},
author = {Kloeden, Peter E and Platen, Eckhard},
year = {1992},
note = {00000},
}
@book{platen2010numerical,
title = {Numerical solution of stochastic differential equations with jumps in finance},
volume = {64},
publisher = {Springer Science \& Business Media},
author = {Platen, Eckhard and Bruti-Liberati, Nicola},
year = {2010},
note = {00437 },
}
@article{Dandekar2021,
title = {Bayesian {Neural} {Ordinary} {Differential} {Equations}},
url = {http://arxiv.org/abs/2012.07244},
abstract = {Recently, Neural Ordinary Differential Equations has emerged as a powerful framework for modeling physical simulations without explicitly defining the ODEs governing the system, but instead learning them via machine learning. However, the question: "Can Bayesian learning frameworks be integrated with Neural ODE's to robustly quantify the uncertainty in the weights of a Neural ODE?" remains unanswered. In an effort to address this question, we primarily evaluate the following categories of inference methods: (a) The No-U-Turn MCMC sampler (NUTS), (b) Stochastic Gradient Hamiltonian Monte Carlo (SGHMC) and (c) Stochastic Langevin Gradient Descent (SGLD). We demonstrate the successful integration of Neural ODEs with the above Bayesian inference frameworks on classical physical systems, as well as on standard machine learning datasets like MNIST, using GPU acceleration. On the MNIST dataset, we achieve a posterior sample accuracy of 98.5\% on the test ensemble of 10,000 images. Subsequently, for the first time, we demonstrate the successful integration of variational inference with normalizing flows and Neural ODEs, leading to a powerful Bayesian Neural ODE object. Finally, considering a predator-prey model and an epidemiological system, we demonstrate the probabilistic identification of model specification in partially-described dynamical systems using universal ordinary differential equations. Together, this gives a scientific machine learning tool for probabilistic estimation of epistemic uncertainties.},
urldate = {2021-09-20},
journal = {arXiv:2012.07244 [cs]},
author = {Dandekar, Raj and Chung, Karen and Dixit, Vaibhav and Tarek, Mohamed and Garcia-Valadez, Aslan and Vemula, Krishna Vishal and Rackauckas, Chris},
month = mar,
year = {2021},
note = {00009
arXiv: 2012.07244},
keywords = {Computer Science - Machine Learning},
}
@article{kidger_efficient_2021,
title = {Efficient and {Accurate} {Gradients} for {Neural} {SDEs}},
url = {http://arxiv.org/abs/2105.13493},
abstract = {Neural SDEs combine many of the best qualities of both RNNs and SDEs: memory efficient training, high-capacity function approximation, and strong priors on model space. This makes them a natural choice for modelling many types of temporal dynamics. Training a Neural SDE (either as a VAE or as a GAN) requires backpropagating through an SDE solve. This may be done by solving a backwards-in-time SDE whose solution is the desired parameter gradients. However, this has previously suffered from severe speed and accuracy issues, due to high computational cost and numerical truncation errors. Here, we overcome these issues through several technical innovations. First, we introduce the {\textbackslash}textit\{reversible Heun method\}. This is a new SDE solver that is {\textbackslash}textit\{algebraically reversible\}: eliminating numerical gradient errors, and the first such solver of which we are aware. Moreover it requires half as many function evaluations as comparable solvers, giving up to a \$1.98{\textbackslash}times\$ speedup. Second, we introduce the {\textbackslash}textit\{Brownian Interval\}: a new, fast, memory efficient, and exact way of sampling {\textbackslash}textit\{and reconstructing\} Brownian motion. With this we obtain up to a \$10.6{\textbackslash}times\$ speed improvement over previous techniques, which in contrast are both approximate and relatively slow. Third, when specifically training Neural SDEs as GANs (Kidger et al. 2021), we demonstrate how SDE-GANs may be trained through careful weight clipping and choice of activation function. This reduces computational cost (giving up to a \$1.87{\textbackslash}times\$ speedup) and removes the numerical truncation errors associated with gradient penalty. Altogether, we outperform the state-of-the-art by substantial margins, with respect to training speed, and with respect to classification, prediction, and MMD test metrics. We have contributed implementations of all of our techniques to the torchsde library to help facilitate their adoption.},
urldate = {2021-09-20},
journal = {arXiv:2105.13493 [cs, math, stat]},
author = {Kidger, Patrick and Foster, James and Li, Xuechen and Lyons, Terry},
month = jun,
year = {2021},
note = {00000
arXiv: 2105.13493},
keywords = {Computer Science - Artificial Intelligence, Computer Science - Machine Learning, Mathematics - Dynamical Systems, Statistics - Machine Learning},
}
@article{Jospin2020,
title = {Hands-on {Bayesian} {Neural} {Networks} -- a {Tutorial} for {Deep} {Learning} {Users}},
url = {http://arxiv.org/abs/2007.06823},
abstract = {Modern deep learning methods have equipped researchers and engineers with incredibly powerful tools to tackle problems that previously seemed impossible. However, since deep learning methods operate as black boxes, the uncertainty associated with their predictions is often challenging to quantify. Bayesian statistics offer a formalism to understand and quantify the uncertainty associated with deep neural networks predictions. This paper provides a tutorial for researchers and scientists who are using machine learning, especially deep learning, with an overview of the relevant literature and a complete toolset to design, implement, train, use and evaluate Bayesian neural networks.},
urldate = {2021-09-20},
journal = {arXiv:2007.06823 [cs, stat]},
author = {Jospin, Laurent Valentin and Buntine, Wray and Boussaid, Farid and Laga, Hamid and Bennamoun, Mohammed},
month = jul,
year = {2020},
note = {00043
arXiv: 2007.06823},
keywords = {62-02 (Primary), Computer Science - Machine Learning, G.3, I.2.6, Statistics - Machine Learning},
}
@article{betancourt_conceptual_2018,
title = {A {Conceptual} {Introduction} to {Hamiltonian} {Monte} {Carlo}},
url = {http://arxiv.org/abs/1701.02434},
abstract = {Hamiltonian Monte Carlo has proven a remarkable empirical success, but only recently have we begun to develop a rigorous understanding of why it performs so well on difficult problems and how it is best applied in practice. Unfortunately, that understanding is confined within the mathematics of differential geometry which has limited its dissemination, especially to the applied communities for which it is particularly important. In this review I provide a comprehensive conceptual account of these theoretical foundations, focusing on developing a principled intuition behind the method and its optimal implementations rather of any exhaustive rigor. Whether a practitioner or a statistician, the dedicated reader will acquire a solid grasp of how Hamiltonian Monte Carlo works, when it succeeds, and, perhaps most importantly, when it fails.},
urldate = {2021-09-20},
journal = {arXiv:1701.02434 [stat]},
author = {Betancourt, Michael},
month = jul,
year = {2018},
note = {00637
arXiv: 1701.02434},
keywords = {Statistics - Methodology},
}
@inproceedings{Denil2017,
address = {San Diego, CA, USA},
series = {{TMS}/{DEVS} '17},
title = {The experiment model and validity frame in {M}\&{S}},
abstract = {Modelling and Simulation approaches use system models to conduct simulation experiments. Experimental frames have been applied in this context to formally define a system's context. During the creation of an experimental frame for a simple spring model it becomes clear that experimental frames in their current definition lack certain properties and omit relevant information. Our approach describes the process of capturing the context of models and systems to provide truly reproducible experiment descriptions. The information, captured as experimental setups, can then be used for different purposes and in different scenarios, in particular for checking the validity of a model, the discovery of suitable models for the design of a system, and for calibrating models.},
urldate = {2021-06-04},
booktitle = {Proceedings of the {Symposium} on {Theory} of {Modeling} \& {Simulation}},
publisher = {Society for Computer Simulation International},
author = {Denil, Joachim and Klikovits, Stefan and Mosterman, Pieter J. and Vallecillo, Antonio and Vangheluwe, Hans},
month = apr,
year = {2017},
note = {00019},
keywords = {Cyber-Physical Systems (CPS), Model-Based Systems Engineering (MBSE), Multi-paradigm Modelling (MPM), cyber-physical systems (CPS), model-based systems engineering (MBSE), multi-paradigm modelling (MPM)},
pages = {1--12},
}
@inproceedings{Morrill2021,
title = {Neural {Rough} {Differential} {Equations} for {Long} {Time} {Series}},
url = {https://proceedings.mlr.press/v139/morrill21b.html},
language = {en},
urldate = {2021-09-03},
booktitle = {International {Conference} on {Machine} {Learning}},
publisher = {PMLR},
author = {Morrill, James and Salvi, Cristopher and Kidger, Patrick and Foster, James},
month = jul,
year = {2021},
note = {00005
ISSN: 2640-3498},
pages = {7829--7838},
}
@article{Morrill2021a,
title = {Neural {Controlled} {Differential} {Equations} for {Online} {Prediction} {Tasks}},
url = {http://arxiv.org/abs/2106.11028},
abstract = {Neural controlled differential equations (Neural CDEs) are a continuous-time extension of recurrent neural networks (RNNs), achieving state-of-the-art (SOTA) performance at modelling functions of irregular time series. In order to interpret discrete data in continuous time, current implementations rely on non-causal interpolations of the data. This is fine when the whole time series is observed in advance, but means that Neural CDEs are not suitable for use in {\textbackslash}textit\{online prediction tasks\}, where predictions need to be made in real-time: a major use case for recurrent networks. Here, we show how this limitation may be rectified. First, we identify several theoretical conditions that interpolation schemes for Neural CDEs should satisfy, such as boundedness and uniqueness. Second, we use these to motivate the introduction of new schemes that address these conditions, offering in particular measurability (for online prediction), and smoothness (for speed). Third, we empirically benchmark our online Neural CDE model on three continuous monitoring tasks from the MIMIC-IV medical database: we demonstrate improved performance on all tasks against ODE benchmarks, and on two of the three tasks against SOTA non-ODE benchmarks.},
urldate = {2021-09-03},
journal = {arXiv:2106.11028 [cs]},
author = {Morrill, James and Kidger, Patrick and Yang, Lingyi and Lyons, Terry},
month = jun,
year = {2021},
note = {00000
arXiv: 2106.11028},
keywords = {Computer Science - Machine Learning},
}
@article{Kidger2020a,
title = {Neural {Controlled} {Differential} {Equations} for {Irregular} {Time} {Series}},
url = {http://arxiv.org/abs/2005.08926},
abstract = {Neural ordinary differential equations are an attractive option for modelling temporal dynamics. However, a fundamental issue is that the solution to an ordinary differential equation is determined by its initial condition, and there is no mechanism for adjusting the trajectory based on subsequent observations. Here, we demonstrate how this may be resolved through the well-understood mathematics of {\textbackslash}emph\{controlled differential equations\}. The resulting {\textbackslash}emph\{neural controlled differential equation\} model is directly applicable to the general setting of partially-observed irregularly-sampled multivariate time series, and (unlike previous work on this problem) it may utilise memory-efficient adjoint-based backpropagation even across observations. We demonstrate that our model achieves state-of-the-art performance against similar (ODE or RNN based) models in empirical studies on a range of datasets. Finally we provide theoretical results demonstrating universal approximation, and that our model subsumes alternative ODE models.},
urldate = {2021-08-27},
journal = {arXiv:2005.08926 [cs, stat]},
author = {Kidger, Patrick and Morrill, James and Foster, James and Lyons, Terry},
month = nov,
year = {2020},
note = {00053
arXiv: 2005.08926},
keywords = {Computer Science - Machine Learning, Statistics - Machine Learning},
}
@article{Kidger2020,
title = {"{Hey}, that's not an {ODE}": {Faster} {ODE} {Adjoints} with 12 {Lines} of {Code}},
shorttitle = {"{Hey}, that's not an {ODE}"},
url = {http://arxiv.org/abs/2009.09457},
abstract = {Neural differential equations may be trained by backpropagating gradients via the adjoint method, which is another differential equation typically solved using an adaptive-step-size numerical differential equation solver. A proposed step is accepted if its error, {\textbackslash}emph\{relative to some norm\}, is sufficiently small; else it is rejected, the step is shrunk, and the process is repeated. Here, we demonstrate that the particular structure of the adjoint equations makes the usual choices of norm (such as \$L{\textasciicircum}2\$) unnecessarily stringent. By replacing it with a more appropriate (semi)norm, fewer steps are unnecessarily rejected and the backpropagation is made faster. This requires only minor code modifications. Experiments on a wide range of tasks---including time series, generative modeling, and physical control---demonstrate a median improvement of 40\% fewer function evaluations. On some problems we see as much as 62\% fewer function evaluations, so that the overall training time is roughly halved.},
urldate = {2021-01-29},
journal = {arXiv:2009.09457 [cs, math]},
author = {Kidger, Patrick and Chen, Ricky T. Q. and Lyons, Terry},
month = sep,
year = {2020},
note = {00001
arXiv: 2009.09457},
keywords = {Computer Science - Machine Learning, Mathematics - Classical Analysis and ODEs},
}
@article{Razavi2012,
title = {Review of surrogate modeling in water resources: {REVIEW}},
volume = {48},
issn = {00431397},
shorttitle = {Review of surrogate modeling in water resources},
url = {http://doi.wiley.com/10.1029/2011WR011527},
doi = {10.1029/2011WR011527},
language = {en},
number = {7},
urldate = {2021-03-12},
journal = {Water Resources Research},
author = {Razavi, Saman and Tolson, Bryan A. and Burn, Donald H.},
month = jul,
year = {2012},
note = {00511},
}
@article{Westermann2019,
title = {Surrogate modelling for sustainable building design – {A} review},
volume = {198},
issn = {03787788},
url = {https://linkinghub.elsevier.com/retrieve/pii/S0378778819302877},
doi = {10.1016/j.enbuild.2019.05.057},
language = {en},
urldate = {2021-03-12},
journal = {Energy and Buildings},
author = {Westermann, Paul and Evins, Ralph},
month = sep,
year = {2019},
note = {00034},
pages = {170--186},
}
@article{Forrester2009,
title = {Recent advances in surrogate-based optimization},
volume = {45},
issn = {03760421},
url = {https://linkinghub.elsevier.com/retrieve/pii/S0376042108000766},
doi = {10.1016/j.paerosci.2008.11.001},
language = {en},
number = {1-3},
urldate = {2020-12-10},
journal = {Progress in Aerospace Sciences},
author = {Forrester, Alexander I.J. and Keane, Andy J.},
month = jan,
year = {2009},
note = {01643},
pages = {50--79},
}
@inproceedings{Viana2010,
address = {Montreal, Quebec, Canada},
title = {Making the {Most} {Out} of {Surrogate} {Models}: {Tricks} of the {Trade}},
isbn = {978-0-7918-4409-0},
shorttitle = {Making the {Most} {Out} of {Surrogate} {Models}},
url = {https://asmedigitalcollection.asme.org/IDETC-CIE/proceedings/IDETC-CIE2010/44090/587/340462},
doi = {10.1115/DETC2010-28813},
urldate = {2021-03-12},
booktitle = {Volume 1: 36th {Design} {Automation} {Conference}, {Parts} {A} and {B}},
publisher = {ASMEDC},
author = {Viana, Felipe A. C. and Gogu, Christian and Haftka, Raphael T.},
month = jan,
year = {2010},
note = {00083},
pages = {587--598},
}
@incollection{Koziel2020,
address = {Cham},
title = {Basics of {Data}-{Driven} {Surrogate} {Modeling}},
isbn = {978-3-030-38925-3 978-3-030-38926-0},
url = {http://link.springer.com/10.1007/978-3-030-38926-0_2},
language = {en},
urldate = {2020-12-10},
booktitle = {Performance-{Driven} {Surrogate} {Modeling} of {High}-{Frequency} {Structures}},
publisher = {Springer International Publishing},
author = {Koziel, Slawomir and Pietrenko-Dabrowska, Anna},
collaborator = {Koziel, Slawomir and Pietrenko-Dabrowska, Anna},
year = {2020},
doi = {10.1007/978-3-030-38926-0_2},
note = {00000 },
pages = {23--58},
}
@article{massaroli_differentiable_2021,
title = {Differentiable {Multiple} {Shooting} {Layers}},
url = {http://arxiv.org/abs/2106.03885},
abstract = {We detail a novel class of implicit neural models. Leveraging time-parallel methods for differential equations, Multiple Shooting Layers (MSLs) seek solutions of initial value problems via parallelizable root-finding algorithms. MSLs broadly serve as drop-in replacements for neural ordinary differential equations (Neural ODEs) with improved efficiency in number of function evaluations (NFEs) and wall-clock inference time. We develop the algorithmic framework of MSLs, analyzing the different choices of solution methods from a theoretical and computational perspective. MSLs are showcased in long horizon optimal control of ODEs and PDEs and as latent models for sequence generation. Finally, we investigate the speedups obtained through application of MSL inference in neural controlled differential equations (Neural CDEs) for time series classification of medical data.},
urldate = {2021-08-18},
journal = {arXiv:2106.03885 [cs, math, stat]},
author = {Massaroli, Stefano and Poli, Michael and Sonoda, Sho and Suzuki, Taji and Park, Jinkyoo and Yamashita, Atsushi and Asama, Hajime},
month = jun,
year = {2021},
note = {00000
arXiv: 2106.03885},
keywords = {Computer Science - Machine Learning, Mathematics - Dynamical Systems, Mathematics - Optimization and Control, Statistics - Machine Learning},
}
@book{meyn_markov_1993,
address = {London},
title = {Markov {Chains} and {Stochastic} {Stability}},
isbn = {978-1-4471-3269-1 978-1-4471-3267-7},
url = {http://link.springer.com/10.1007/978-1-4471-3267-7},
urldate = {2021-08-30},
publisher = {Springer London},
author = {Meyn, Sean P. and Tweedie, Richard L.},
year = {1993},
doi = {10.1007/978-1-4471-3267-7},
note = {07287 },
}
@article{tzen_neural_2019,
title = {Neural {Stochastic} {Differential} {Equations}: {Deep} {Latent} {Gaussian} {Models} in the {Diffusion} {Limit}},
shorttitle = {Neural {Stochastic} {Differential} {Equations}},
url = {http://arxiv.org/abs/1905.09883},
abstract = {In deep latent Gaussian models, the latent variable is generated by a time-inhomogeneous Markov chain, where at each time step we pass the current state through a parametric nonlinear map, such as a feedforward neural net, and add a small independent Gaussian perturbation. This work considers the diffusion limit of such models, where the number of layers tends to infinity, while the step size and the noise variance tend to zero. The limiting latent object is an It{\textbackslash}{\textasciicircum}o diffusion process that solves a stochastic differential equation (SDE) whose drift and diffusion coefficient are implemented by neural nets. We develop a variational inference framework for these {\textbackslash}textit\{neural SDEs\} via stochastic automatic differentiation in Wiener space, where the variational approximations to the posterior are obtained by Girsanov (mean-shift) transformation of the standard Wiener process and the computation of gradients is based on the theory of stochastic flows. This permits the use of black-box SDE solvers and automatic differentiation for end-to-end inference. Experimental results with synthetic data are provided.},
urldate = {2021-08-30},
journal = {arXiv:1905.09883 [cs, stat]},
author = {Tzen, Belinda and Raginsky, Maxim},
month = oct,
year = {2019},
note = {00052
arXiv: 1905.09883},
keywords = {Computer Science - Machine Learning, Statistics - Machine Learning},
}
@article{beintema_nonlinear_2021,
title = {Nonlinear state-space identification using deep encoder networks},
url = {http://arxiv.org/abs/2012.07697},
abstract = {Nonlinear state-space identification for dynamical systems is most often performed by minimizing the simulation error to reduce the effect of model errors. This optimization problem becomes computationally expensive for large datasets. Moreover, the problem is also strongly non-convex, often leading to sub-optimal parameter estimates. This paper introduces a method that approximates the simulation loss by splitting the data set into multiple independent sections similar to the multiple shooting method. This splitting operation allows for the use of stochastic gradient optimization methods which scale well with data set size and has a smoothing effect on the non-convex cost function. The main contribution of this paper is the introduction of an encoder function to estimate the initial state at the start of each section. The encoder function estimates the initial states using a feed-forward neural network starting from historical input and output samples. The efficiency and performance of the proposed state-space encoder method is illustrated on two well-known benchmarks where, for instance, the method achieves the lowest known simulation error on the Wiener--Hammerstein benchmark.},
urldate = {2021-07-07},
journal = {arXiv:2012.07697 [cs, eess]},
author = {Beintema, Gerben and Toth, Roland and Schoukens, Maarten},
month = apr,
year = {2021},
note = {00002
arXiv: 2012.07697},
keywords = {Computer Science - Machine Learning, Electrical Engineering and Systems Science - Systems and Control},
}
@article{Gear1984b,
title = {Solving {Ordinary} {Differential} {Equations} with {Discontinuities}},
volume = {10},
issn = {0098-3500},
url = {http://doi.acm.org/10.1145/356068.356071},
doi = {10.1145/356068.356071},
number = {1},
journal = {ACM Trans. Math. Softw.},
author = {Gear, C W and Osterby, O},
month = jan,
year = {1984},
note = {00136
Publisher: ACM
Place: New York, NY, USA},
pages = {23--44},
}
@article{Betts1998,
title = {Survey of {Numerical} {Methods} for {Trajectory} {Optimization}},
volume = {21},
url = {https://doi.org/10.2514/2.4231},
doi = {10.2514/2.4231},
number = {2},
urldate = {2021-06-08},
journal = {Journal of Guidance, Control, and Dynamics},
author = {Betts, John T.},
year = {1998},
note = {02671
Publisher: American Institute of Aeronautics and Astronautics
\_eprint: https://doi.org/10.2514/2.4231},
pages = {193--207},
}
@incollection{Beutler2005,
address = {Berlin, Heidelberg},
title = {Variational {Equations}},
isbn = {978-3-540-26870-3},
url = {https://doi.org/10.1007/3-540-26870-7_5},
booktitle = {Methods of {Celestial} {Mechanics}: {Volume} {I}: {Physical}, {Mathematical}, and {Numerical} {Principles}},
publisher = {Springer Berlin Heidelberg},
author = {Beutler, Gerhard},
year = {2005},
doi = {10.1007/3-540-26870-7_5},
note = {00002 },
pages = {175--207},
}
@article{Karniadakis2021,
title = {Physics-informed machine learning},
copyright = {2021 Springer Nature Limited},
issn = {2522-5820},
url = {https://www.nature.com/articles/s42254-021-00314-5},
doi = {10.1038/s42254-021-00314-5},
abstract = {Despite great progress in simulating multiphysics problems using the numerical discretization of partial differential equations (PDEs), one still cannot seamlessly incorporate noisy data into existing algorithms, mesh generation remains complex, and high-dimensional problems governed by parameterized PDEs cannot be tackled. Moreover, solving inverse problems with hidden physics is often prohibitively expensive and requires different formulations and elaborate computer codes. Machine learning has emerged as a promising alternative, but training deep neural networks requires big data, not always available for scientific problems. Instead, such networks can be trained from additional information obtained by enforcing the physical laws (for example, at random points in the continuous space-time domain). Such physics-informed learning integrates (noisy) data and mathematical models, and implements them through neural networks or other kernel-based regression networks. Moreover, it may be possible to design specialized network architectures that automatically satisfy some of the physical invariants for better accuracy, faster training and improved generalization. Here, we review some of the prevailing trends in embedding physics into machine learning, present some of the current capabilities and limitations and discuss diverse applications of physics-informed learning both for forward and inverse problems, including discovering hidden physics and tackling high-dimensional problems.},
language = {en},
urldate = {2021-05-31},
journal = {Nature Reviews Physics},
author = {Karniadakis, George Em and Kevrekidis, Ioannis G. and Lu, Lu and Perdikaris, Paris and Wang, Sifan and Yang, Liu},
month = may,
year = {2021},
note = {00001
Publisher: Nature Publishing Group},
pages = {1--19},
}
@article{avizienis_fundamental_2001,
title = {Fundamental {Concepts} of {Dependability}},
abstract = {re unified by W. H. Pierce as the concept of failure tolerance in 1965 [8]. In 1967, A. Avizienis integrated masking with the practical techniques of error detection, fault diagnosis, and recovery into the concept of fault-tolerant systems [9]. In the reliability modeling field, the major event was the introduction of the coverage concept by Bouricius, Carter and Schneider [10]. Seminal work on software fault tolerance was initiated by B. Randell [11,12], later it was complemented by N-version programming [13]. DEPENDABILITY ATTRIBUTES AVAILABILITY RELIABILITY SAFETY CONFIDENTIALITY INTEGRITY MAINTAINABILITY FAULT PREVENTION FAULT TOLERANCE FAULT REMOVAL FAULT FORECASTING MEANS THREATS FAULTS ERRORS FAILURES The formation of the IEEE-CS TC on Fault-Tolerant Computing in 1970 and of IFIP WG 10.4 Dependable Computing and Fault Tolerance in 1980 accelerated the emergence of a consistent set of concepts and terminology. Seven positio},
author = {Avizienis, Algirdas and U, Vytautas and Laprie, Jean-claude and Randell, Brian},
month = apr,
year = {2001},
note = {01212},
}
@article{grosse_csc321_nodate,
title = {{CSC321} {Lecture} 10: {Automatic} {Differentiation}},
language = {en},
author = {Grosse, Roger},
note = {00000},
pages = {23},
}
@article{Baydin2018,
title = {Automatic differentiation in machine learning: a survey},
shorttitle = {Automatic differentiation in machine learning},
url = {http://arxiv.org/abs/1502.05767},
abstract = {Derivatives, mostly in the form of gradients and Hessians, are ubiquitous in machine learning. Automatic differentiation (AD), also called algorithmic differentiation or simply "autodiff", is a family of techniques similar to but more general than backpropagation for efficiently and accurately evaluating derivatives of numeric functions expressed as computer programs. AD is a small but established field with applications in areas including computational fluid dynamics, atmospheric sciences, and engineering design optimization. Until very recently, the fields of machine learning and AD have largely been unaware of each other and, in some cases, have independently discovered each other's results. Despite its relevance, general-purpose AD has been missing from the machine learning toolbox, a situation slowly changing with its ongoing adoption under the names "dynamic computational graphs" and "differentiable programming". We survey the intersection of AD and machine learning, cover applications where AD has direct relevance, and address the main implementation techniques. By precisely defining the main differentiation techniques and their interrelationships, we aim to bring clarity to the usage of the terms "autodiff", "automatic differentiation", and "symbolic differentiation" as these are encountered more and more in machine learning settings.},
urldate = {2021-05-12},
journal = {arXiv:1502.05767 [cs, stat]},
author = {Baydin, Atilim Gunes and Pearlmutter, Barak A. and Radul, Alexey Andreyevich and Siskind, Jeffrey Mark},
month = feb,
year = {2018},
note = {00897
arXiv: 1502.05767},
keywords = {68W30, 65D25, 68T05, Computer Science - Machine Learning, Computer Science - Symbolic Computation, G.1.4, I.2.6, Statistics - Machine Learning},
}
@article{mehlhorn_cycle_nodate,
title = {Cycle {Bases} in {Graphs} {Structure}, {Algorithms}, {Applications}, {Open} {Problems}},
language = {en},
author = {Mehlhorn, Kurt},
note = {00000},
pages = {65},
}
@article{Berry2013,
title = {Time-{Scale} {Separation} from {Diffusion}-{Mapped} {Delay} {Coordinates}},
volume = {12},
url = {https://epubs.siam.org/doi/10.1137/12088183X},
doi = {10.1137/12088183X},
abstract = {It has long been known that the method of time-delay embedding can be used to reconstruct nonlinear dynamics from time series data. A less-appreciated fact is that the induced geometry of time-delay coordinates increasingly biases the reconstruction toward the stable directions as delays are added. This bias can be exploited, using the diffusion maps approach to dimension reduction, to extract dynamics on desired time scales from high-dimensional observed data. We demonstrate the technique on a wide range of examples, including data generated by a model of meandering spiral waves and video recordings of a liquid-crystal experiment.},
number = {2},
urldate = {2021-04-21},
journal = {SIAM Journal on Applied Dynamical Systems},
author = {Berry, T. and Cressman, J. R. and Gregurić-Ferenček, Z. and Sauer, T.},
month = jan,
year = {2013},
note = {00078
Publisher: Society for Industrial and Applied Mathematics},
pages = {618--649},
}
@inproceedings{Wegenkittl1997,
title = {Visualizing the behaviour of higher dimensional dynamical systems},
booktitle = {Proceedings. {Visualization}'97 (cat. {No}. {97CB36155})},
author = {Wegenkittl, Rainer and Loffelmann, Helwig and Groller, Eduard},
year = {1997},
note = {00129 },
pages = {119--125},
}
@article{Bartolovic2020,
title = {Phase {Space} {Projection} of {Dynamical} {Systems}},
volume = {39},
copyright = {© 2020 The Author(s) Computer Graphics Forum © 2020 The Eurographics Association and John Wiley \& Sons Ltd. Published by John Wiley \& Sons Ltd.},
issn = {1467-8659},
url = {https://onlinelibrary.wiley.com/doi/abs/10.1111/cgf.13978},
doi = {https://doi.org/10.1111/cgf.13978},
abstract = {Dynamical systems are commonly used to describe the state of time-dependent systems. In many engineering and control problems, the state space is high-dimensional making it difficult to analyze and visualize the behavior of the system for varying input conditions. We present a novel dimensionality reduction technique that is tailored to high-dimensional dynamical systems. In contrast to standard general purpose dimensionality reduction algorithms, we use energy minimization to preserve properties of the flow in the high-dimensional space. Once the projection operator is optimized, further high-dimensional trajectories are projected easily. Our 3D projection maintains a number of useful flow properties, such as critical points and flow maps, and is optimized to match geometric characteristics of the high-dimensional input, as well as optional user constraints. We apply our method to trajectories traced in the phase spaces of second-order dynamical systems, including finite-sized objects in fluids, the circular restricted three-body problem and a damped double pendulum. We compare the projections with standard visualization techniques, such as PCA, t-SNE and UMAP, and visualize the dynamical systems with multiple coordinated views interactively, featuring a spatial embedding, projection to subspaces, our dimensionality reduction and a seed point exploration tool.},
language = {en},
number = {3},
urldate = {2021-04-20},
journal = {Computer Graphics Forum},
author = {Bartolovic, Nemanja and Gross, Markus and Günther, Tobias},
year = {2020},
note = {00000
\_eprint: https://onlinelibrary.wiley.com/doi/pdf/10.1111/cgf.13978},
keywords = {CCS Concepts, Scientific visualization, • Human-centered computing → Visualization techniques},
pages = {253--264},
}
@article{Narendra1992,
title = {Neural networks and dynamical systems},
volume = {6},
issn = {0888-613X},
url = {https://www.sciencedirect.com/science/article/pii/0888613X9290014Q},
doi = {10.1016/0888-613X(92)90014-Q},
abstract = {Models for the identification and control of nonlinear dynamical systems using neural networks were introduced by Narendra and Parthasarathy in 1990, and methods for the adjustment of model parameters were also suggested. Simulation results of simple nonlinear systems were presented to demonstrate the feasibility of the schemes proposed. The concepts introduced at that time are investigated in this paper in greater detail. In particular, a number of questions that arise when the methods are applied to more complex systems are addressed. These include nonlinear systems of higher order as well as multivariable systems. The effect of using simpler models for both identification and control are discussed, and a new controller structure containing a linear part in addition to a multilayer neural network is introduced.},
language = {en},
number = {2},
urldate = {2021-04-14},
journal = {International Journal of Approximate Reasoning},
author = {Narendra, Kumpati S. and Parthasarathy, Kannan},
month = feb,
year = {1992},
note = {00072},
keywords = {backpropagation, control, dynamic backpropagation, dynamical systems, identification, neural networks},
pages = {109--131},
}
@article{behrmann_invertible_nodate,
title = {Invertible {Residual} {Networks}},
language = {en},
author = {Behrmann, Jens},
note = {00190},
pages = {30},
}
@inproceedings{Behrmann2019,
series = {Proceedings of machine learning research},
title = {Invertible residual networks},
volume = {97},
url = {http://proceedings.mlr.press/v97/behrmann19a.html},
abstract = {We show that standard ResNet architectures can be made invertible, allowing the same model to be used for classification, density estimation, and generation. Typically, enforcing invertibility requires partitioning dimensions or restricting network architectures. In contrast, our approach only requires adding a simple normalization step during training, already available in standard frameworks. Invertible ResNets define a generative model which can be trained by maximum likelihood on unlabeled data. To compute likelihoods, we introduce a tractable approximation to the Jacobian log-determinant of a residual block. Our empirical evaluation shows that invertible ResNets perform competitively with both state-of-the-art image classifiers and flow-based generative models, something that has not been previously achieved with a single architecture.},
booktitle = {Proceedings of the 36th international conference on machine learning},
publisher = {PMLR},
author = {Behrmann, Jens and Grathwohl, Will and Chen, Ricky T. Q. and Duvenaud, David and Jacobsen, Joern-Henrik},
editor = {Chaudhuri, Kamalika and Salakhutdinov, Ruslan},
month = jun,
year = {2019},
note = {00190
tex.pdf: http://proceedings.mlr.press/v97/behrmann19a/behrmann19a.pdf},
pages = {573--582},
}
@inproceedings{Ott2021,
title = {{ResNet} after all: {Neural} {ODEs} and their numerical solution},
url = {https://openreview.net/forum?id=HxzSxSxLOJZ},
booktitle = {International conference on learning representations},
author = {Ott, Katharina and Katiyar, Prateek and Hennig, Philipp and Tiemann, Michael},
year = {2021},
note = {00000 },
}
@article{Arnold2021,
title = {State–space modeling for control based on physics-informed neural networks},
volume = {101},
issn = {0952-1976},
url = {https://www.sciencedirect.com/science/article/pii/S0952197621000427},
doi = {10.1016/j.engappai.2021.104195},
abstract = {Dynamic system models, based on partial differential equations (PDEs), are often unsuitable for direct use in control or state estimation purposes, due to the high computational cost arising from the necessity to apply sophisticated numerical methods for a solution, such as semi-discretization, also known as spatial discretization. Hence, there is often an inevitable trade-off between accuracy and computational efficiency during the model reduction step to ensure real-time applicability. In this contribution, we propose a state–space model formulation, using so-called physics-informed neural networks. This modeling approach enables a highly efficient inclusion of complex physical system descriptions within the design of control or state estimation setups. The resulting state–space model does not require any numerical solution techniques during the state propagation, as each time step is based on the evaluation of a reasonably sized neural net that approximates the solution of the PDE. Thus, this approach is suitable for real-time applications of various complex dynamic systems that can be described by one or a set of PDEs. Besides the modeling approach itself, the contribution also provides an illustrative example of the state–space modeling method in the context of model predictive control, as well as state estimation with an extended Kalman filter. These methods will be applied to a system based on a numerical solution of the Burgers equation.},
language = {en},
urldate = {2021-03-18},
journal = {Engineering Applications of Artificial Intelligence},
author = {Arnold, Florian and King, Rudibert},
month = may,
year = {2021},
note = {00000},
keywords = {Machine learning, Model predictive control, Neural networks, State estimation, State–space},
pages = {104195},
}
@incollection{Doan2020,
address = {Cham},
title = {Learning {Hidden} {States} in a {Chaotic} {System}: {A} {Physics}-{Informed} {Echo} {State} {Network} {Approach}},
volume = {12142},
isbn = {978-3-030-50432-8 978-3-030-50433-5},
shorttitle = {Learning {Hidden} {States} in a {Chaotic} {System}},
url = {http://link.springer.com/10.1007/978-3-030-50433-5_9},
language = {en},
urldate = {2020-09-30},
booktitle = {Computational {Science} – {ICCS} 2020},
publisher = {Springer International Publishing},
author = {Doan, Nguyen Anh Khoa and Polifke, Wolfgang and Magri, Luca},
editor = {Krzhizhanovskaya, Valeria V. and Závodszky, Gábor and Lees, Michael H. and Dongarra, Jack J. and Sloot, Peter M. A. and Brissos, Sérgio and Teixeira, João},
year = {2020},
doi = {10.1007/978-3-030-50433-5_9},
note = {00000
Series Title: Lecture Notes in Computer Science},
pages = {117--123},
}
@article{Praditia2020,
title = {Improving {Thermochemical} {Energy} {Storage} {Dynamics} {Forecast} with {Physics}-{Inspired} {Neural} {Network} {Architecture}},
volume = {13},
issn = {1996-1073},
url = {https://www.mdpi.com/1996-1073/13/15/3873},
doi = {10.3390/en13153873},
abstract = {Thermochemical Energy Storage (TCES), specifically the calcium oxide (CaO)/calcium hydroxide (Ca(OH)2) system is a promising energy storage technology with relatively high energy density and low cost. However, the existing models available to predict the system\’s internal states are computationally expensive. An accurate and real-time capable model is therefore still required to improve its operational control. In this work, we implement a Physics-Informed Neural Network (PINN) to predict the dynamics of the TCES internal state. Our proposed framework addresses three physical aspects to build the PINN: (1) we choose a Nonlinear Autoregressive Network with Exogeneous Inputs (NARX) with deeper recurrence to address the nonlinear latency; (2) we train the network in closed-loop to capture the long-term dynamics; and (3) we incorporate physical regularisation during its training, calculated based on discretized mole and energy balance equations. To train the network, we perform numerical simulations on an ensemble of system parameters to obtain synthetic data. Even though the suggested approach provides results with the error of 3.96\×10\−4 which is in the same range as the result without physical regularisation, it is superior compared to conventional Artificial Neural Network (ANN) strategies because it ensures physical plausibility of the predictions, even in a highly dynamic and nonlinear problem. Consequently, the suggested PINN can be further developed for more complicated analysis of the TCES system.},
number = {15},
journal = {Energies},
author = {Praditia, Timothy and Walser, Thilo and Oladyshkin, Sergey and Nowak, Wolfgang},
year = {2020},
note = {00000},
}
@article{Wang2020a,
title = {Understanding and mitigating gradient pathologies in physics-informed neural networks},
url = {http://arxiv.org/abs/2001.04536},
abstract = {The widespread use of neural networks across different scientific domains often involves constraining them to satisfy certain symmetries, conservation laws, or other domain knowledge. Such constraints are often imposed as soft penalties during model training and effectively act as domain-specific regularizers of the empirical risk loss. Physics-informed neural networks is an example of this philosophy in which the outputs of deep neural networks are constrained to approximately satisfy a given set of partial differential equations. In this work we review recent advances in scientific machine learning with a specific focus on the effectiveness of physics-informed neural networks in predicting outcomes of physical systems and discovering hidden physics from noisy data. We will also identify and analyze a fundamental mode of failure of such approaches that is related to numerical stiffness leading to unbalanced back-propagated gradients during model training. To address this limitation we present a learning rate annealing algorithm that utilizes gradient statistics during model training to balance the interplay between different terms in composite loss functions. We also propose a novel neural network architecture that is more resilient to such gradient pathologies. Taken together, our developments provide new insights into the training of constrained neural networks and consistently improve the predictive accuracy of physics-informed neural networks by a factor of 50-100x across a range of problems in computational physics. All code and data accompanying this manuscript are publicly available at {\textbackslash}url\{https://github.com/PredictiveIntelligenceLab/GradientPathologiesPINNs\}.},
urldate = {2021-03-17},
journal = {arXiv:2001.04536 [cs, math, stat]},
author = {Wang, Sifan and Teng, Yujun and Perdikaris, Paris},
month = jan,
year = {2020},
note = {00032
arXiv: 2001.04536},
keywords = {Computer Science - Machine Learning, Mathematics - Numerical Analysis, Statistics - Machine Learning},
}
@article{Wang2020,
title = {When and why {PINNs} fail to train: {A} neural tangent kernel perspective},
shorttitle = {When and why {PINNs} fail to train},
url = {http://arxiv.org/abs/2007.14527},
abstract = {Physics-informed neural networks (PINNs) have lately received great attention thanks to their flexibility in tackling a wide range of forward and inverse problems involving partial differential equations. However, despite their noticeable empirical success, little is known about how such constrained neural networks behave during their training via gradient descent. More importantly, even less is known about why such models sometimes fail to train at all. In this work, we aim to investigate these questions through the lens of the Neural Tangent Kernel (NTK); a kernel that captures the behavior of fully-connected neural networks in the infinite width limit during training via gradient descent. Specifically, we derive the NTK of PINNs and prove that, under appropriate conditions, it converges to a deterministic kernel that stays constant during training in the infinite-width limit. This allows us to analyze the training dynamics of PINNs through the lens of their limiting NTK and find a remarkable discrepancy in the convergence rate of the different loss components contributing to the total training error. To address this fundamental pathology, we propose a novel gradient descent algorithm that utilizes the eigenvalues of the NTK to adaptively calibrate the convergence rate of the total training error. Finally, we perform a series of numerical experiments to verify the correctness of our theory and the practical effectiveness of the proposed algorithms. The data and code accompanying this manuscript are publicly available at {\textbackslash}url\{https://github.com/PredictiveIntelligenceLab/PINNsNTK\}.},
urldate = {2021-03-17},
journal = {arXiv:2007.14527 [cs, math, stat]},
author = {Wang, Sifan and Yu, Xinling and Perdikaris, Paris},
month = jul,
year = {2020},
note = {00010
arXiv: 2007.14527},
keywords = {Computer Science - Machine Learning, Mathematics - Numerical Analysis, Statistics - Machine Learning},
}
@article{rahaman_spectral_2019,
title = {On the {Spectral} {Bias} of {Neural} {Networks}},
url = {http://arxiv.org/abs/1806.08734},
abstract = {Neural networks are known to be a class of highly expressive functions able to fit even random input-output mappings with \$100{\textbackslash}\%\$ accuracy. In this work, we present properties of neural networks that complement this aspect of expressivity. By using tools from Fourier analysis, we show that deep ReLU networks are biased towards low frequency functions, meaning that they cannot have local fluctuations without affecting their global behavior. Intuitively, this property is in line with the observation that over-parameterized networks find simple patterns that generalize across data samples. We also investigate how the shape of the data manifold affects expressivity by showing evidence that learning high frequencies gets {\textbackslash}emph\{easier\} with increasing manifold complexity, and present a theoretical understanding of this behavior. Finally, we study the robustness of the frequency components with respect to parameter perturbation, to develop the intuition that the parameters must be finely tuned to express high frequency functions.},
urldate = {2021-03-17},
journal = {arXiv:1806.08734 [cs, stat]},
author = {Rahaman, Nasim and Baratin, Aristide and Arpit, Devansh and Draxler, Felix and Lin, Min and Hamprecht, Fred A. and Bengio, Yoshua and Courville, Aaron},
month = may,
year = {2019},
note = {00103
arXiv: 1806.08734},
keywords = {Computer Science - Machine Learning, Statistics - Machine Learning},
}
@misc{noauthor_191000359_nodate,
title = {[1910.00359] {Truth} or {Backpropaganda}? {An} {Empirical} {Investigation} of {Deep} {Learning} {Theory}},
url = {https://arxiv.org/abs/1910.00359},
urldate = {2021-03-16},
note = {00010},
}
@article{Li2018,
title = {Visualizing the {Loss} {Landscape} of {Neural} {Nets}},
url = {http://arxiv.org/abs/1712.09913},
abstract = {Neural network training relies on our ability to find "good" minimizers of highly non-convex loss functions. It is well-known that certain network architecture designs (e.g., skip connections) produce loss functions that train easier, and well-chosen training parameters (batch size, learning rate, optimizer) produce minimizers that generalize better. However, the reasons for these differences, and their effects on the underlying loss landscape, are not well understood. In this paper, we explore the structure of neural loss functions, and the effect of loss landscapes on generalization, using a range of visualization methods. First, we introduce a simple "filter normalization" method that helps us visualize loss function curvature and make meaningful side-by-side comparisons between loss functions. Then, using a variety of visualizations, we explore how network architecture affects the loss landscape, and how training parameters affect the shape of minimizers.},
urldate = {2021-03-15},
journal = {arXiv:1712.09913 [cs, stat]},
author = {Li, Hao and Xu, Zheng and Taylor, Gavin and Studer, Christoph and Goldstein, Tom},
month = nov,
year = {2018},
note = {00527
arXiv: 1712.09913},
keywords = {Computer Science - Computer Vision and Pattern Recognition, Computer Science - Machine Learning, Statistics - Machine Learning},
}
@article{green_persistence_1986,
title = {Persistence of excitation in linear systems},
volume = {7},
issn = {01676911},
url = {https://linkinghub.elsevier.com/retrieve/pii/0167691186900526},
doi = {10.1016/0167-6911(86)90052-6},
language = {en},
number = {5},
urldate = {2021-02-24},
journal = {Systems \& Control Letters},
author = {Green, Michael and Moore, John B.},
month = sep,
year = {1986},
pages = {351--360},
}
@incollection{Paszke2019,
title = {{PyTorch}: {An} imperative style, high-performance deep learning library},
url = {http://papers.nips.cc/paper/9015-pytorch-an-imperative-style-high-performance-deep-learning-library.pdf},
booktitle = {Advances in neural information processing systems 32},
publisher = {Curran Associates, Inc.},
author = {Paszke, Adam and Gross, Sam and Massa, Francisco and Lerer, Adam and Bradbury, James and Chanan, Gregory and Killeen, Trevor and Lin, Zeming and Gimelshein, Natalia and Antiga, Luca and Desmaison, Alban and Kopf, Andreas and Yang, Edward and DeVito, Zachary and Raison, Martin and Tejani, Alykhan and Chilamkurthy, Sasank and Steiner, Benoit and Fang, Lu and Bai, Junjie and Chintala, Soumith},
editor = {Wallach, H. and Larochelle, H. and Beygelzimer, A. and dAlché-Buc, F. and Fox, E. and Garnett, R.},
year = {2019},
note = {02485},
pages = {8026--8037},
}
@article{Talathi2016,
title = {Improving performance of recurrent neural network with relu nonlinearity},
url = {http://arxiv.org/abs/1511.03771},
abstract = {In recent years significant progress has been made in successfully training recurrent neural networks (RNNs) on sequence learning problems involving long range temporal dependencies. The progress has been made on three fronts: (a) Algorithmic improvements involving sophisticated optimization techniques, (b) network design involving complex hidden layer nodes and specialized recurrent layer connections and (c) weight initialization methods. In this paper, we focus on recently proposed weight initialization with identity matrix for the recurrent weights in a RNN. This initialization is specifically proposed for hidden nodes with Rectified Linear Unit (ReLU) non linearity. We offer a simple dynamical systems perspective on weight initialization process, which allows us to propose a modified weight initialization strategy. We show that this initialization technique leads to successfully training RNNs composed of ReLUs. We demonstrate that our proposal produces comparable or better solution for three toy problems involving long range temporal structure: the addition problem, the multiplication problem and the MNIST classification problem using sequence of pixels. In addition, we present results for a benchmark action recognition problem.},
urldate = {2021-03-10},
journal = {arXiv:1511.03771 [cs]},
author = {Talathi, Sachin S. and Vartak, Aniket},
month = jun,
year = {2016},
note = {00061
arXiv: 1511.03771},
keywords = {Computer Science - Machine Learning, Computer Science - Neural and Evolutionary Computing},
}
@article{drgona_spectral_2020,
title = {Spectral {Analysis} and {Stability} of {Deep} {Neural} {Dynamics}},
url = {http://arxiv.org/abs/2011.13492},
abstract = {Our modern history of deep learning follows the arc of famous emergent disciplines in engineering (e.g. aero- and fluid dynamics) when theory lagged behind successful practical applications. Viewing neural networks from a dynamical systems perspective, in this work, we propose a novel characterization of deep neural networks as pointwise affine maps, making them accessible to a broader range of analysis methods to help close the gap between theory and practice. We begin by showing the equivalence of neural networks with parameter-varying affine maps parameterized by the state (feature) vector. As the paper's main results, we provide necessary and sufficient conditions for the global stability of generic deep feedforward neural networks. Further, we identify links between the spectral properties of layer-wise weight parametrizations, different activation functions, and their effect on the overall network's eigenvalue spectra. We analyze a range of neural networks with varying weight initializations, activation functions, bias terms, and depths. Our view of neural networks as affine parameter varying maps allows us to "crack open the black box" of global neural network dynamical behavior through visualization of stationary points, regions of attraction, state-space partitioning, eigenvalue spectra, and stability properties. Our analysis covers neural networks both as an end-to-end function and component-wise without simplifying assumptions or approximations. The methods we develop here provide tools to establish relationships between global neural dynamical properties and their constituent components which can aid in the principled design of neural networks for dynamics modeling and optimal control.},
urldate = {2021-03-10},
journal = {arXiv:2011.13492 [cs]},
author = {Drgona, Jan and Skomski, Elliott and Vasisht, Soumya and Tuor, Aaron and Vrabie, Draguna},
month = nov,
year = {2020},
note = {00000
arXiv: 2011.13492},
keywords = {Computer Science - Machine Learning, Computer Science - Neural and Evolutionary Computing},
}
@inproceedings{Ioffe2015,
title = {Batch {Normalization}: {Accelerating} {Deep} {Network} {Training} by {Reducing} {Internal} {Covariate} {Shift}},
shorttitle = {Batch {Normalization}},
url = {http://proceedings.mlr.press/v37/ioffe15.html},
abstract = {Training Deep Neural Networks is complicated by the fact that the distribution of each layer’s inputs changes during training, as the parameters of the previous layers change. This slows down the t...},
language = {en},
urldate = {2021-03-10},
booktitle = {International {Conference} on {Machine} {Learning}},
publisher = {PMLR},
author = {Ioffe, Sergey and Szegedy, Christian},
month = jun,
year = {2015},
note = {25400
ISSN: 1938-7228},
pages = {448--456},
}
@article{Santurkar2019,
title = {How {Does} {Batch} {Normalization} {Help} {Optimization}?},
url = {http://arxiv.org/abs/1805.11604},
abstract = {Batch Normalization (BatchNorm) is a widely adopted technique that enables faster and more stable training of deep neural networks (DNNs). Despite its pervasiveness, the exact reasons for BatchNorm's effectiveness are still poorly understood. The popular belief is that this effectiveness stems from controlling the change of the layers' input distributions during training to reduce the so-called "internal covariate shift". In this work, we demonstrate that such distributional stability of layer inputs has little to do with the success of BatchNorm. Instead, we uncover a more fundamental impact of BatchNorm on the training process: it makes the optimization landscape significantly smoother. This smoothness induces a more predictive and stable behavior of the gradients, allowing for faster training.},
urldate = {2021-03-10},
journal = {arXiv:1805.11604 [cs, stat]},
author = {Santurkar, Shibani and Tsipras, Dimitris and Ilyas, Andrew and Madry, Aleksander},
month = apr,
year = {2019},
note = {00587
arXiv: 1805.11604},
keywords = {Computer Science - Machine Learning, Computer Science - Neural and Evolutionary Computing, Statistics - Machine Learning},
}
@article{Kingma2017,
title = {Adam: {A} {Method} for {Stochastic} {Optimization}},
shorttitle = {Adam},
url = {http://arxiv.org/abs/1412.6980},
abstract = {We introduce Adam, an algorithm for first-order gradient-based optimization of stochastic objective functions, based on adaptive estimates of lower-order moments. The method is straightforward to implement, is computationally efficient, has little memory requirements, is invariant to diagonal rescaling of the gradients, and is well suited for problems that are large in terms of data and/or parameters. The method is also appropriate for non-stationary objectives and problems with very noisy and/or sparse gradients. The hyper-parameters have intuitive interpretations and typically require little tuning. Some connections to related algorithms, on which Adam was inspired, are discussed. We also analyze the theoretical convergence properties of the algorithm and provide a regret bound on the convergence rate that is comparable to the best known results under the online convex optimization framework. Empirical results demonstrate that Adam works well in practice and compares favorably to other stochastic optimization methods. Finally, we discuss AdaMax, a variant of Adam based on the infinity norm.},
urldate = {2021-03-10},
journal = {arXiv:1412.6980 [cs]},
author = {Kingma, Diederik P. and Ba, Jimmy},
month = jan,
year = {2017},
note = {67529
arXiv: 1412.6980},
keywords = {Computer Science - Machine Learning},
}
@inproceedings{glorot_understanding_2010,
title = {Understanding the difficulty of training deep feedforward neural networks},
url = {http://proceedings.mlr.press/v9/glorot10a.html},
abstract = {Whereas before 2006 it appears that deep multi-layer neural networks were not successfully trained, since then several algorithms have been shown to successfully train them, with experimental resul...},
language = {en},
urldate = {2021-03-10},
booktitle = {Proceedings of the {Thirteenth} {International} {Conference} on {Artificial} {Intelligence} and {Statistics}},
publisher = {JMLR Workshop and Conference Proceedings},
author = {Glorot, Xavier and Bengio, Yoshua},
month = mar,
year = {2010},
note = {11858
ISSN: 1938-7228},
pages = {249--256},
}
@article{esteve_large-time_2020,
title = {Large-time asymptotics in deep learning},
url = {http://arxiv.org/abs/2008.02491},
abstract = {It is by now well-known that practical deep supervised learning may roughly be cast as an optimal control problem for a specific discrete-time, nonlinear dynamical system called an artificial neural network. In this work, we consider the continuous-time formulation of the deep supervised learning problem, and study the latter's behavior when the final time horizon increases, a fact that can be interpreted as increasing the number of layers in the neural network setting.When considering the classical regularized empirical risk minimization problem, we show that, in long time, the optimal states converge to zero training error, namely approach the zero training error regime, whilst the optimal control parameters approach, on an appropriate scale, minimal norm parameters with corresponding states precisely in the zero training error regime. This result provides an alternative theoretical underpinning to the notion that neural networks learn best in the overparametrized regime, when seen from the large layer perspective. We also propose a learning problem consisting of minimizing a cost with a state tracking term, and establish the well-known turnpike property, which indicates that the solutions of the learning problem in long time intervals consist of three pieces, the first and the last of which being transient short-time arcs, and the middle piece being a long-time arc staying exponentially close to the optimal solution of an associated static learning problem. This property in fact stipulates a quantitative estimate for the number of layers required to reach the zero training error regime. Both of the aforementioned asymptotic regimes are addressed in the context of continuous-time and continuous space-time neural networks, the latter taking the form of nonlinear, integro-differential equations, hence covering residual neural networks with both fixed and possibly variable depths.},
urldate = {2021-03-09},
journal = {arXiv:2008.02491 [cs, math]},
author = {Esteve, Carlos and Geshkovski, Borjan and Pighin, Dario and Zuazua, Enrique},
month = aug,
year = {2020},
note = {00008
arXiv: 2008.02491},
keywords = {68T07, 34H05, 34H15, 93D23, 93D20, Computer Science - Machine Learning, Mathematics - Optimization and Control},
}
@article{He2015,
title = {Deep {Residual} {Learning} for {Image} {Recognition}},
url = {http://arxiv.org/abs/1512.03385},
abstract = {Deeper neural networks are more difficult to train. We present a residual learning framework to ease the training of networks that are substantially deeper than those used previously. We explicitly reformulate the layers as learning residual functions with reference to the layer inputs, instead of learning unreferenced functions. We provide comprehensive empirical evidence showing that these residual networks are easier to optimize, and can gain accuracy from considerably increased depth. On the ImageNet dataset we evaluate residual nets with a depth of up to 152 layers---8x deeper than VGG nets but still having lower complexity. An ensemble of these residual nets achieves 3.57\% error on the ImageNet test set. This result won the 1st place on the ILSVRC 2015 classification task. We also present analysis on CIFAR-10 with 100 and 1000 layers. The depth of representations is of central importance for many visual recognition tasks. Solely due to our extremely deep representations, we obtain a 28\% relative improvement on the COCO object detection dataset. Deep residual nets are foundations of our submissions to ILSVRC \& COCO 2015 competitions, where we also won the 1st places on the tasks of ImageNet detection, ImageNet localization, COCO detection, and COCO segmentation.},
urldate = {2021-03-02},
journal = {arXiv:1512.03385 [cs]},
author = {He, Kaiming and Zhang, Xiangyu and Ren, Shaoqing and Sun, Jian},
month = dec,
year = {2015},
note = {70948
arXiv: 1512.03385},
keywords = {Computer Science - Computer Vision and Pattern Recognition},
}
@article{liu_hierarchical_2020,
title = {Hierarchical {Deep} {Learning} of {Multiscale} {Differential} {Equation} {Time}-{Steppers}},
url = {http://arxiv.org/abs/2008.09768},
abstract = {Nonlinear differential equations rarely admit closed-form solutions, thus requiring numerical time-stepping algorithms to approximate solutions. Further, many systems characterized by multiscale physics exhibit dynamics over a vast range of timescales, making numerical integration computationally expensive due to numerical stiffness. In this work, we develop a hierarchy of deep neural network time-steppers to approximate the flow map of the dynamical system over a disparate range of time-scales. The resulting model is purely data-driven and leverages features of the multiscale dynamics, enabling numerical integration and forecasting that is both accurate and highly efficient. Moreover, similar ideas can be used to couple neural network-based models with classical numerical time-steppers. Our multiscale hierarchical time-stepping scheme provides important advantages over current time-stepping algorithms, including (i) circumventing numerical stiffness due to disparate time-scales, (ii) improved accuracy in comparison with leading neural-network architectures, (iii) efficiency in long-time simulation/forecasting due to explicit training of slow time-scale dynamics, and (iv) a flexible framework that is parallelizable and may be integrated with standard numerical time-stepping algorithms. The method is demonstrated on a wide range of nonlinear dynamical systems, including the Van der Pol oscillator, the Lorenz system, the Kuramoto-Sivashinsky equation, and fluid flow pass a cylinder; audio and video signals are also explored. On the sequence generation examples, we benchmark our algorithm against state-of-the-art methods, such as LSTM, reservoir computing, and clockwork RNN. Despite the structural simplicity of our method, it outperforms competing methods on numerical integration.},
urldate = {2021-03-03},
journal = {arXiv:2008.09768 [physics]},
author = {Liu, Yuying and Kutz, J. Nathan and Brunton, Steven L.},
month = aug,
year = {2020},
note = {00002
arXiv: 2008.09768},
keywords = {Computer Science - Machine Learning, Mathematics - Numerical Analysis, Physics - Computational Physics},
}
@article{Gholami2019,
title = {{ANODE}: {Unconditionally} {Accurate} {Memory}-{Efficient} {Gradients} for {Neural} {ODEs}},
shorttitle = {{ANODE}},
url = {http://arxiv.org/abs/1902.10298},
abstract = {Residual neural networks can be viewed as the forward Euler discretization of an Ordinary Differential Equation (ODE) with a unit time step. This has recently motivated researchers to explore other discretization approaches and train ODE based networks. However, an important challenge of neural ODEs is their prohibitive memory cost during gradient backpropogation. Recently a method proposed in [8], claimed that this memory overhead can be reduced from O(LN\_t), where N\_t is the number of time steps, down to O(L) by solving forward ODE backwards in time, where L is the depth of the network. However, we will show that this approach may lead to several problems: (i) it may be numerically unstable for ReLU/non-ReLU activations and general convolution operators, and (ii) the proposed optimize-then-discretize approach may lead to divergent training due to inconsistent gradients for small time step sizes. We discuss the underlying problems, and to address them we propose ANODE, an Adjoint based Neural ODE framework which avoids the numerical instability related problems noted above, and provides unconditionally accurate gradients. ANODE has a memory footprint of O(L) + O(N\_t), with the same computational cost as reversing ODE solve. We furthermore, discuss a memory efficient algorithm which can further reduce this footprint with a trade-off of additional computational cost. We show results on Cifar-10/100 datasets using ResNet and SqueezeNext neural networks.},
urldate = {2021-03-02},
journal = {arXiv:1902.10298 [cs]},
author = {Gholami, Amir and Keutzer, Kurt and Biros, George},
month = jul,
year = {2019},
note = {00055
arXiv: 1902.10298},
keywords = {Computer Science - Machine Learning},
}
@inproceedings{simpson_design_2008,
address = {Victoria, British Columbia, Canada},
title = {Design and {Analysis} of {Computer} {Experiments} in {Multidisciplinary} {Design} {Optimization}: {A} {Review} of {How} {Far} {We} {Have} {Come} - {Or} {Not}},
isbn = {978-1-60086-982-2},
shorttitle = {Design and {Analysis} of {Computer} {Experiments} in {Multidisciplinary} {Design} {Optimization}},
url = {http://arc.aiaa.org/doi/10.2514/6.2008-5802},
doi = {10.2514/6.2008-5802},
language = {en},
urldate = {2021-02-26},
booktitle = {12th {AIAA}/{ISSMO} {Multidisciplinary} {Analysis} and {Optimization} {Conference}},
publisher = {American Institute of Aeronautics and Astronautics},
author = {Simpson, Timothy and Toropov, Vasilli and Balabanov, Vladimir and Viana, Felipe},
month = sep,
year = {2008},
note = {00324},
}
@book{american_institute_of_aeronautics_and_astronautics_12th_2008,
address = {Reston, VA},
title = {12th {AIAA} / {September} 10-12, 2008.},
isbn = {978-1-56347-947-2},
language = {English},
publisher = {American Institute of Aeronautics and Astronautics},
author = {{American Institute of Aeronautics and Astronautics}},
year = {2008},
note = {00000
OCLC: 949741311},
}
@article{swischuk_projection-based_2019,
title = {Projection-based model reduction: {Formulations} for physics-based machine learning},
volume = {179},
issn = {00457930},
shorttitle = {Projection-based model reduction},
url = {https://linkinghub.elsevier.com/retrieve/pii/S0045793018304250},
doi = {10.1016/j.compfluid.2018.07.021},
language = {en},
urldate = {2021-02-18},
journal = {Computers \& Fluids},
author = {Swischuk, Renee and Mainini, Laura and Peherstorfer, Benjamin and Willcox, Karen},
month = jan,
year = {2019},
note = {00104},
pages = {704--717},
}
@incollection{ilchmann_model_2017,
address = {Cham},
title = {Model {Order} {Reduction} for {Differential}-{Algebraic} {Equations}: {A} {Survey}},
isbn = {978-3-319-46617-0 978-3-319-46618-7},
shorttitle = {Model {Order} {Reduction} for {Differential}-{Algebraic} {Equations}},
url = {http://link.springer.com/10.1007/978-3-319-46618-7_3},
urldate = {2021-02-18},
booktitle = {Surveys in {Differential}-{Algebraic} {Equations} {IV}},
publisher = {Springer International Publishing},
author = {Benner, Peter and Stykel, Tatjana},
editor = {Ilchmann, Achim and Reis, Timo},
year = {2017},
doi = {10.1007/978-3-319-46618-7_3},
note = {00000
Series Title: Differential-Algebraic Equations Forum},
pages = {107--160},
}
@techreport{antoulas_survey_2000,
type = {{CAAM} {Technical} {Reports}},
title = {A survey of model reduction methods for large-scale systems},
url = {https://hdl.handle.net/1911/101963},
number = {TR00-38},
author = {Antoulas, Athanasios C and Sorensen, Danny C and Gugercin, Serkan},
year = {2000},
note = {00787},
}
@article{benner_survey_2015,
title = {A {Survey} of {Projection}-{Based} {Model} {Reduction} {Methods} for {Parametric} {Dynamical} {Systems}},
volume = {57},
issn = {0036-1445, 1095-7200},
url = {http://epubs.siam.org/doi/10.1137/130932715},
doi = {10.1137/130932715},
language = {en},
number = {4},
urldate = {2021-02-18},
journal = {SIAM Review},
author = {Benner, Peter and Gugercin, Serkan and Willcox, Karen},
month = jan,
year = {2015},
note = {00884},
pages = {483--531},
}
@book{hairer_geometric_2006,
title = {Geometric numerical integration: structure-preserving algorithms for ordinary differential equations},
volume = {31},
publisher = {Springer Science \& Business Media},
author = {Hairer, Ernst and Lubich, Christian and Wanner, Gerhard},
year = {2006},
note = {05268},
}
@article{zhong_symplectic_2020,
title = {Symplectic {ODE}-{Net}: {Learning} {Hamiltonian} {Dynamics} with {Control}},
shorttitle = {Symplectic {ODE}-{Net}},
url = {http://arxiv.org/abs/1909.12077},
abstract = {In this paper, we introduce Symplectic ODE-Net (SymODEN), a deep learning framework which can infer the dynamics of a physical system, given by an ordinary differential equation (ODE), from observed state trajectories. To achieve better generalization with fewer training samples, SymODEN incorporates appropriate inductive bias by designing the associated computation graph in a physics-informed manner. In particular, we enforce Hamiltonian dynamics with control to learn the underlying dynamics in a transparent way, which can then be leveraged to draw insight about relevant physical aspects of the system, such as mass and potential energy. In addition, we propose a parametrization which can enforce this Hamiltonian formalism even when the generalized coordinate data is embedded in a high-dimensional space or we can only access velocity data instead of generalized momentum. This framework, by offering interpretable, physically-consistent models for physical systems, opens up new possibilities for synthesizing model-based control strategies.},
urldate = {2021-01-29},
journal = {arXiv:1909.12077 [physics, stat]},
author = {Zhong, Yaofeng Desmond and Dey, Biswadip and Chakraborty, Amit},
month = apr,
year = {2020},
note = {00035
arXiv: 1909.12077},
keywords = {Computer Science - Machine Learning, Electrical Engineering and Systems Science - Systems and Control, Physics - Computational Physics, Statistics - Machine Learning},
}
@article{Wu2019,
title = {Numerical {Aspects} for {Approximating} {Governing} {Equations} {Using} {Data}},
volume = {384},
issn = {00219991},
url = {http://arxiv.org/abs/1809.09170},
doi = {10.1016/j.jcp.2019.01.030},
abstract = {We present eﬀective numerical algorithms for locally recovering unknown governing diﬀerential equations from measurement data. We employ a set of standard basis functions, e.g., polynomials, to approximate the governing equation with high accuracy. Upon recasting the problem into a function approximation problem, we discuss several important aspects for accurate approximation. Most notably, we discuss the importance of using a large number of short bursts of trajectory data, rather than using data from a single long trajectory. Several options for the numerical algorithms to perform accurate approximation are then presented, along with an error estimate of the ﬁnal equation approximation. We then present an extensive set of numerical examples of both linear and nonlinear systems to demonstrate the properties and eﬀectiveness of our equation recovery algorithms.},
language = {en},
urldate = {2021-01-26},
journal = {Journal of Computational Physics},
author = {Wu, Kailiang and Xiu, Dongbin},
month = may,
year = {2019},
note = {00028
arXiv: 1809.09170},
keywords = {Computer Science - Machine Learning, Mathematics - Dynamical Systems, Mathematics - Numerical Analysis, Statistics - Machine Learning},
pages = {200--221},
}
@article{Shin2020,
title = {On the convergence of physics informed neural networks for linear second-order elliptic and parabolic type {PDEs}},
volume = {28},
issn = {1815-2406, 1991-7120},
url = {http://arxiv.org/abs/2004.01806},
doi = {10.4208/cicp.OA-2020-0193},
abstract = {Physics informed neural networks (PINNs) are deep learning based techniques for solving partial differential equations (PDEs) encounted in computational science and engineering. Guided by data and physical laws, PINNs find a neural network that approximates the solution to a system of PDEs. Such a neural network is obtained by minimizing a loss function in which any prior knowledge of PDEs and data are encoded. Despite its remarkable empirical success in one, two or three dimensional problems, there is little theoretical justification for PINNs. As the number of data grows, PINNs generate a sequence of minimizers which correspond to a sequence of neural networks. We want to answer the question: Does the sequence of minimizers converge to the solution to the PDE? We consider two classes of PDEs: linear second-order elliptic and parabolic. By adapting the Schauder approach and the maximum principle, we show that the sequence of minimizers strongly converges to the PDE solution in \$C{\textasciicircum}0\$. Furthermore, we show that if each minimizer satisfies the initial/boundary conditions, the convergence mode becomes \$H{\textasciicircum}1\$. Computational examples are provided to illustrate our theoretical findings. To the best of our knowledge, this is the first theoretical work that shows the consistency of PINNs.},
number = {5},
urldate = {2021-01-26},
journal = {Communications in Computational Physics},
author = {Shin, Yeonjong and Darbon, Jerome and Karniadakis, George Em},
month = jun,
year = {2020},
note = {00001
arXiv: 2004.01806},
keywords = {Computer Science - Machine Learning, Mathematics - Numerical Analysis},
pages = {2042--2074},
}
@inproceedings{thule_towards_2018,
address = {Toulouse, France},
title = {Towards {Verification} of {Hybrid} {Co}-simulation {Algorithms}},
isbn = {978-3-030-04771-9},
doi = {10.1007/978-3-030-04771-9_1},
booktitle = {Workshop on {Formal} {Co}-{Simulation} of {Cyber}-{Physical} {Systems}},
publisher = {Springer, Cham},
author = {Thule, Casper and Gomes, Cláudio and Deantoni, Julien and Larsen, Peter Gorm and Brauer, Jörg and Vangheluwe, Hans},
year = {2018},
note = {00014},
}
@article{kofman_quantized-state_2001,
title = {Quantized-state systems: a {DEVS} {Approach} for continuous system simulation},
volume = {18},
issn = {0740-6797},
number = {3},
journal = {Transactions of The Society for Modeling and Simulation International},
author = {Kofman, Ernesto and Junco, Sergio},
year = {2001},
note = {00234
Publisher: [La Jolla, CA]: The Society, c1984-c2001.},
pages = {123--132},
}
@article{marsden_discrete_2001,
title = {Discrete mechanics and variational integrators},
volume = {10},
issn = {0962-4929, 1474-0508},
url = {https://www.cambridge.org/core/product/identifier/S096249290100006X/type/journal_article},
doi = {10.1017/S096249290100006X},
abstract = {This paper gives a review of integration algorithms for finite dimensional
mechanical systems that are based on discrete variational principles. The
variational technique gives a unified treatment of many symplectic schemes,
including those of higher order, as well as a natural treatment of the discrete
Noether theorem. The approach also allows us to include forces, dissipation
and constraints in a natural way. Amongst the many specific schemes treated
as examples, the Verlet, SHAKE, RATTLE, Newmark, and the symplectic
partitioned Runge–Kutta schemes are presented.},
language = {en},
urldate = {2020-02-20},
journal = {Acta Numerica},
author = {Marsden, J. E. and West, M.},
month = may,
year = {2001},
note = {01566},
pages = {357--514},
}
@book{mohamed_machine_2018,
address = {Cham},
title = {Machine {Learning} for {Model} {Order} {Reduction}},
isbn = {978-3-319-75713-1 978-3-319-75714-8},
url = {http://link.springer.com/10.1007/978-3-319-75714-8},
language = {en},
urldate = {2020-12-10},
publisher = {Springer International Publishing},
author = {Mohamed, Khaled Salah},
year = {2018},
doi = {10.1007/978-3-319-75714-8},
note = {00013 },
}
@article{rackauckas_modern_nodate,
title = {Modern {Differential} {Equations} {Solver} {Software}: {Where} {We} {Are} and {Where} {We}'re {Headed}},
language = {en},
author = {Rackauckas, Chris},
note = {00000},
pages = {44},
}
@article{Plebe2019,
title = {The {Unbearable} {Shallow} {Understanding} of {Deep} {Learning}},
volume = {29},
issn = {1572-8641},
url = {https://doi.org/10.1007/s11023-019-09512-8},
doi = {10.1007/s11023-019-09512-8},
abstract = {This paper analyzes the rapid and unexpected rise of deep learning within Artificial Intelligence and its applications. It tackles the possible reasons for this remarkable success, providing candidate paths towards a satisfactory explanation of why it works so well, at least in some domains. A historical account is given for the ups and downs, which have characterized neural networks research and its evolution from “shallow” to “deep” learning architectures. A precise account of “success” is given, in order to sieve out aspects pertaining to marketing or sociology of research, and the remaining aspects seem to certify a genuine value of deep learning, calling for explanation. The alleged two main propelling factors for deep learning, namely computing hardware performance and neuroscience findings, are scrutinized, and evaluated as relevant but insufficient for a comprehensive explanation. We review various attempts that have been made to provide mathematical foundations able to justify the efficiency of deep learning, and we deem this is the most promising road to follow, even if the current achievements are too scattered and relevant for very limited classes of deep neural models. The authors’ take is that most of what can explain the very nature of why deep learning works at all and even very well across so many domains of application is still to be understood and further research, which addresses the theoretical foundation of artificial learning, is still very much needed.},
language = {en},
number = {4},
urldate = {2020-12-07},
journal = {Minds and Machines},
author = {Plebe, Alessio and Grasso, Giorgio},
month = dec,
year = {2019},
note = {00003},
pages = {515--553},
}
@article{wang_review_2007,
title = {Review of {Metamodeling} {Techniques} in {Support} of {Engineering} {Design} {Optimization}},
volume = {129},
issn = {1050-0472, 1528-9001},
url = {https://asmedigitalcollection.asme.org/mechanicaldesign/article/129/4/370/466824/Review-of-Metamodeling-Techniques-in-Support-of},
doi = {10.1115/1.2429697},
abstract = {Computation-intensive design problems are becoming increasingly common in manufacturing industries. The computation burden is often caused by expensive analysis and simulation processes in order to reach a comparable level of accuracy as physical testing data. To address such a challenge, approximation or metamodeling techniques are often used. Metamodeling techniques have been developed from many different disciplines including statistics, mathematics, computer science, and various engineering disciplines. These metamodels are initially developed as “surrogates” of the expensive simulation process in order to improve the overall computation efficiency. They are then found to be a valuable tool to support a wide scope of activities in modern engineering design, especially design optimization. This work reviews the state-of-the-art metamodel-based techniques from a practitioner’s perspective according to the role of metamodeling in supporting design optimization, including model approximation, design space exploration, problem formulation, and solving various types of optimization problems. Challenges and future development of metamodeling in support of engineering design is also analyzed and discussed.},
language = {en},
number = {4},
urldate = {2020-11-30},
journal = {Journal of Mechanical Design},
author = {Wang, G. Gary and Shan, S.},
month = apr,
year = {2007},
note = {01486},
pages = {370--380},
}
@book{forrester_engineering_2008,
address = {Chichester, West Sussex, England ; Hoboken, NJ},
title = {Engineering design via surrogate modelling: a practical guide},
isbn = {978-0-470-06068-1},
shorttitle = {Engineering design via surrogate modelling},
publisher = {J. Wiley},
author = {Forrester, Alexander I. J. and Sóbester, András and Keane, A. J.},
year = {2008},
note = {02311},
keywords = {Engineering design, Mathematical models, Statistical methods},
}
@article{deshmukh_design_2017,
title = {Design of {Dynamic} {Systems} {Using} {Surrogate} {Models} of {Derivative} {Functions}},
volume = {139},
issn = {1050-0472, 1528-9001},
url = {https://asmedigitalcollection.asme.org/mechanicaldesign/article/doi/10.1115/1.4037407/367011/Design-of-Dynamic-Systems-Using-Surrogate-Models},
doi = {10.1115/1.4037407},
abstract = {Optimization of dynamic systems often requires system simulation. Several important classes of dynamic system models have computationally expensive time derivative functions, resulting in simulations that are significantly slower than real time. This makes design optimization based on these models impractical. An efficient two-loop method, based on surrogate modeling, is presented here for solving dynamic system design problems with computationally expensive derivative functions. A surrogate model is constructed for only the derivative function instead of the simulation response. Simulation is performed based on the computationally inexpensive surrogate derivative function; this strategy preserves the nature of the dynamic system, and improves computational efficiency and accuracy compared to conventional surrogate modeling. The inner-loop optimization problem is solved for a given derivative function surrogate model (DFSM), and the outer loop updates the surrogate model based on optimization results. One unique challenge of this strategy is to ensure surrogate model accuracy in two regions: near the optimal point in the design space, and near the state trajectory in the state space corresponding to the optimal design. The initial evidence of method effectiveness is demonstrated first using two simple design examples, followed by a more detailed wind turbine codesign problem that accounts for aeroelastic effects and simultaneously optimizes physical and control system design. In the last example, a linear state-dependent model is used that requires computationally expensive matrix updates when either state or design variables change. Results indicate an order-of-magnitude reduction in function evaluations when compared to conventional surrogate modeling. The DFSM method is expected to be beneficial only for problems where derivative function evaluation expense, and not large problem dimension, is the primary contributor to solution expense (a restricted but important problem class). The initial studies presented here revealed opportunities for potential further method improvement and deeper investigation.},
language = {en},
number = {10},
urldate = {2020-11-30},
journal = {Journal of Mechanical Design},
author = {Deshmukh, Anand P. and Allison, James T.},
month = oct,
year = {2017},
note = {00003},
pages = {101402},
}
@book{astrom_feedback_2010,
title = {Feedback systems: an introduction for scientists and engineers},
isbn = {1-4008-2873-2},
publisher = {Princeton university press},
author = {Aström, Karl Johan and Murray, Richard M},
year = {2010},
note = {00000},
}