@article{6fbef79689814e1d8e60150caa8ecb45,
title = "Deep Phenotyping on Electronic Health Records Facilitates Genetic Diagnosis by Clinical Exomes",
abstract = "Integration of detailed phenotype information with genetic data is well established to facilitate accurate diagnosis of hereditary disorders. As a rich source of phenotype information, electronic health records (EHRs) promise to empower diagnostic variant interpretation. However, how to accurately and efficiently extract phenotypes from heterogeneous EHR narratives remains a challenge. Here, we present EHR-Phenolyzer, a high-throughput EHR framework for extracting and analyzing phenotypes. EHR-Phenolyzer extracts and normalizes Human Phenotype Ontology (HPO) concepts from EHR narratives and then prioritizes genes with causal variants on the basis of the HPO-coded phenotype manifestations. We assessed EHR-Phenolyzer on 28 pediatric individuals with confirmed diagnoses of monogenic diseases and found that the genes with causal variants were ranked among the top 100 genes selected by EHR-Phenolyzer for 16/28 individuals (p < 2.2 × 10−16), supporting the value of phenotype-driven gene prioritization in diagnostic sequence interpretation. To assess the generalizability, we replicated this finding on an independent EHR dataset of ten individuals with a positive diagnosis from a different institution. We then assessed the broader utility by examining two additional EHR datasets, including 31 individuals who were suspected of having a Mendelian disease and underwent different types of genetic testing and 20 individuals with positive diagnoses of specific Mendelian etiologies of chronic kidney disease from exome sequencing. Finally, through several retrospective case studies, we demonstrated how combined analyses of genotype data and deep phenotype data from EHRs can expedite genetic diagnoses. In summary, EHR-Phenolyzer leverages EHR narratives to automate phenotype-driven analysis of clinical exomes or genomes, facilitating the broader implementation of genomic medicine.",
keywords = "biomedical informatics, diagnosis, electronic health records, exome, genome, knowledge engineering, natural language processing, next-generation sequencing, phenotyping, precision medicine",
author = "Son, {Jung Hoon} and Gangcai Xie and Chi Yuan and Lyudmila Ena and Ziran Li and Andrew Goldstein and Lulin Huang and Liwei Wang and Feichen Shen and Hongfang Liu and Karla Mehl and Groopman, {Emily E.} and Maddalena Marasa and Krzysztof Kiryluk and Gharavi, {Ali G.} and Chung, {Wendy K.} and George Hripcsak and Carol Friedman and Chunhua Weng and Kai Wang",
note = "Funding Information: We thank the individuals and their family members who participated in this study to improve the diagnostic rates in clinical exome testing. We also thank the genetic counselors and clinical geneticists who provided the clinical notes that our algorithms used to infer automated phenotype terms. We thank colleagues at the Institute of Genomic Medicine, especially Dr. David Goldstein, Nick Stong, and Louise Bier, for suggesting phenotype-driven analysis and helpful discussion and comments on the study. We thank the developers of the Human Phenotype Ontology for continuous development of this ontology over the past few years, which greatly facilitated and standardized clinical diagnosis of individuals with suspected genetic disorders. This study was supported by grants from the JPB Foundation (to W.K.C), National Human Genome Research Institute (HG006465 to K.W. and HG008680 to A.G., G.H., and C.W.), National Institute of Mental Health (MH108728 to K.W.), National Library of Medicine (R01LM009886 to C.W.), National Institute of Diabetes and Digestive and Kidney Diseases (R01DK105124, UG3DK114926, and RC2DK116690 to K.K.), and National Center for Advancing Translational Sciences (U01TR02062 to H.L., F.S., and L.W.). Funding Information: We thank the individuals and their family members who participated in this study to improve the diagnostic rates in clinical exome testing. We also thank the genetic counselors and clinical geneticists who provided the clinical notes that our algorithms used to infer automated phenotype terms. We thank colleagues at the Institute of Genomic Medicine, especially Dr. David Goldstein, Nick Stong, and Louise Bier, for suggesting phenotype-driven analysis and helpful discussion and comments on the study. We thank the developers of the Human Phenotype Ontology for continuous development of this ontology over the past few years, which greatly facilitated and standardized clinical diagnosis of individuals with suspected genetic disorders. This study was supported by grants from the JPB Foundation (to W.K.C), National Human Genome Research Institute ( HG006465 to K.W. and HG008680 to A.G., G.H., and C.W.), National Institute of Mental Health ( MH108728 to K.W.), National Library of Medicine ( R01LM009886 to C.W.), National Institute of Diabetes and Digestive and Kidney Diseases ( R01DK105124 , UG3DK114926 , and RC2DK116690 to K.K.), and National Center for Advancing Translational Sciences ( U01TR02062 to H.L., F.S., and L.W.). Publisher Copyright: {\textcopyright} 2018 American Society of Human Genetics",
year = "2018",
month = jul,
day = "5",
doi = "10.1016/j.ajhg.2018.05.010",
language = "English (US)",
volume = "103",
pages = "58--73",
journal = "American journal of human genetics",
issn = "0002-9297",
publisher = "Cell Press",
number = "1",
}