@Article{info:doi/10.2196/80539, author="Fu, Qiang and Ji, Wei and Fan, Yu-Ping and Yao, Jian and Song, Ming-Xia and Yan, Qiao-Jing", title="Systematic Mining of Bioactive Compounds for Wound Healing From Cayratia Japonica Exosome-Like Nanovesicles: A Workflow Combining LC-MS and DeepSeek Models", journal="JMIR Bioinform Biotech", year="2026", month="Jan", day="8", volume="7", pages="e80539", keywords="DeepSeek", keywords="liquid chromatography-mass spectrometry", keywords="LC-MS", keywords="Cayratia japonica exosome-like nanovesicles", keywords="CJ-ELNs", keywords="artificial intelligence", keywords="AI-powered multimodal framework", keywords="wound healing and tissue regeneration", abstract="Background: Plant-derived exosome-like nanovesicles (P-ELNs) effectively deliver bioactive compounds due to their high biocompatibility and low immunogenicity. While liquid chromatography-mass spectrometry (LC-MS) profiles compounds in complex samples, its analysis of large datasets remains limited by traditional methods. Recent advances in large language models (LLMs) and domain-specific systems have enhanced Chinese biomedical data processing and cross-modal pharmaceutical research. Objective: This study aimed to create a multimodal framework of LC-MS combined with DeepSeek models for data mining of compounds with wound-healing properties from exosome-like nanovesicles derived from Cayratia japonica (CJ-ELNs). Methods: LC-MS identified compounds enriched in CJ (n=3) and CJ-ELNs (n=3), and then compounds specifically enriched in CJ-ELNs were filtered via a four-step filtering workflow. The CJ-ELNs-specific compounds were processed by DeepSeek models for screening naturally active compounds with targeted functions of antioxidation, anti-inflammation, anticellular damage, antiapoptosis, wound healing and tissue regeneration, and cell proliferation. Results: A multimodal framework of LC-MS combined with the DeepSeek-DF model was created. With the assistance of artificial intelligence (AI), a total of 46 naturally active compounds derived from CJ-ELNs with targeted functions were identified. Conclusions: A self-designed multimodal framework of LC-MS, combined with DeepSeek models, rapidly and accurately identifies naturally active compounds from CJ-ELNs. This AI-powered system innovatively integrates the traditional analytical technique with modern LLMs, thus greatly favoring data mining of active ingredients in traditional Chinese medicine herbs. ", doi="10.2196/80539", url="https://bioinform.jmir.org/2026/1/e80539" } @Article{info:doi/10.2196/69104, author="Boehm, Dominik and Strantz, Cosima and Ustjanzew, Arsenij and Manuilova, Iryna and Scheiter, Alexander and Pauli, Thomas and Hechtel, Nicole and Reimer, Niklas and Christoph, Jan and Busch, Hauke and Ganslandt, Thomas and Unberath, Philipp", title="Data Visualization Support for Interdisciplinary Team Treatment Planning in Clinical Oncology: Scoping Review", journal="J Med Internet Res", year="2025", month="Dec", day="9", volume="27", pages="e69104", keywords="clinical oncology", keywords="tumor board", keywords="cancer conference", keywords="multidisciplinary", keywords="visualization", keywords="software", keywords="scoping review", keywords="tumor", abstract="Background: Complex and expanding datasets in clinical oncology applications require flexible and interactive visualization of patient data to provide physicians and other medical professionals with maximum amount of information. In particular, interdisciplinary tumor conferences profit from customized tools to integrate, link, and visualize relevant data from all professions involved. Objective: Our objective was to identify and present currently available data visualization tools for tumor boards and related areas. We wanted to provide an overview of not only the digital tools currently used in tumor board settings but also of the data they include, their respective visualization solutions, and their integration into hospital processes. Methods: This scoping review was based on the scoping study framework by Arksey and O'Malley and attempted to answer the following research question: ``What are the key features of data visualization solutions used in molecular and organ tumor boards, and how are these elements integrated and used within the clinical setting?'' The following electronic databases were searched for articles: PubMed, Web of Science, and Scopus. Articles were deemed eligible if published in English in the last 10 years. Eligible articles were first deduplicated, followed by screening of titles and abstracts. Full-text screening was then conducted to decide on article selection. All included articles were analyzed using a data extraction template. The template included a variety of meta-information, as well as specific fields aiming to answer the research question. Results: The review process started with 2049 articles, of which 1014 (49.49\%) were included in the title and abstract screening. A total of 5.47\% (112/2049) of the publications were eligible for full-text screening, leading to 2.93\% (60/2049) of the publications being eligible for final inclusion. They covered 49 distinct visualization tools and applications. We discovered a variety of innovative visualization solutions, most often driven by the complexity of omics data, represented in 96\% (47/49) of the tools. Tables remained the most used tool for the visualization of data types described in the articles. Approximately one-third of the identified tools (16/49, 33\%) were systematically evaluated in some form. For most discovered tools (37/49, 76\%), there was no documentation of implementation into the clinical routine. A significant number of applications (21/49, 43\%) were available through open-source access. Conclusions: There is a wide range of projects providing visualization solutions for tumor boards and clinical oncology applications. Among the few tools that have made their way into clinical routine settings, there are both commercial and academic solutions. While tables for a variety of data types remain the dominant visualization strategy, the complexity of omics data appears to be the driving force behind many visualization innovations in the domain of tumor boards. International Registered Report Identifier (IRRID): RR2-10.2196/53627 ", doi="10.2196/69104", url="https://www.jmir.org/2025/1/e69104" } @Article{info:doi/10.2196/83872, author="Pach{\'o}n-Su{\'a}rez, Isabel Diana and Mej{\'i}a-Salgado, Germ{\'a}n and Correa, Oscar and S{\'a}nchez, Andr{\'e}s and Munera, Marlon and de-la-Torre, Alejandra", title="Immunogenicity of Adalimumab in Bacterial Molecular Mimicry: In Silico Analysis", journal="JMIR Bioinform Biotech", year="2025", month="Dec", day="8", volume="6", pages="e83872", keywords="adalimumab", keywords="antidrug antibody", keywords="immunogenicity", keywords="in silico analysis", keywords="molecular mimicry", abstract="Background: Adalimumab, a monoclonal antibody targeting tumor necrosis factor $\alpha$, treats autoimmune diseases but induces antidrug antibodies in 30\% to 60\% of patients, reducing its efficacy. Objective: This study aims to investigate molecular mimicry as a mechanism behind this immunogenicity, where bacterial immunoglobulin domains structurally resemble adalimumab's light chain, triggering immune responses. Methods: Using PSI-BLASTp (National Center for Biotechnology Information) and PRALINE (Center for Integrative Bioinformatics), there are 40 bacterial antigens homologous to adalimumab, with 8 clinically relevant strains. Results: Structural analysis revealed 94\% amino acid identity between the immunoglobulin domain of Escherichia coli strain B1 and adalimumab's light chain, and 89.67\% similarity with Corynebacterium pyruviciproducens. Root mean square deviation values confirmed strong structural homology. Additionally, 5 cross-reactive B-cell epitopes were predicted, suggesting overlapping surfaces that may promote immune cross-reactivity and antidrug antibody development. Conclusions: This study represents a first step toward identifying a potential microbial factor driving antiadalimumab antibody formation. The predicted cross-reactive regions provide specific candidates for further in vitro validation to confirm molecular mimicry and refine epitope mapping. Understanding these mechanisms may ultimately inform the design of less immunogenic biologics and guide clinical strategies to predict and prevent antidrug antibody formation. ", doi="10.2196/83872", url="https://bioinform.jmir.org/2025/1/e83872" } @Article{info:doi/10.2196/76736, author="Chou, Charissa and Baykara, Yi?it and Hacking, Sean and Amin, Ali and Cheng, Liang and Uzun, Alper and Gamsiz Uzun, Dilber Ece", title="Protein-Protein Interactions in Papillary and Nonpapillary Urothelial Carcinoma Architectures: Comparative Study", journal="JMIR Bioinform Biotech", year="2025", month="Nov", day="27", volume="6", pages="e76736", keywords="urothelial carcinoma", keywords="comprehensive genomic profiling", keywords="protein-protein interactions", keywords="network biology", keywords="drug repurposing", abstract="Background: Bladder cancer is a disease characterized by complex perturbations in gene networks and is heterogeneous in terms of histology, mutations, and prognosis. Advances in high-throughput sequencing technologies, genome-wide association studies, and bioinformatics methods have revealed greater insights into the pathogenesis of complex diseases. Network biology--based approaches have been used to identify complex protein-protein interactions (PPIs) that can lead to potential drug targets. There is a need to better understand PPIs specific to urothelial carcinoma. Objective: This study aimed to elucidate PPIs specific to papillary and nonpapillary urothelial carcinoma and identify the most connected or ``hub'' proteins, as these are potential drug targets. Methods: A novel PPI analysis tool, Proteinarium, was used to analyze RNA sequencing data from 132 patients with papillary and 270 patients with nonpapillary urothelial carcinoma from the TCGA Cell 2017 dataset and 39 patients with papillary and 88 patients with nonpapillary urothelial carcinoma from the TCGA Nature 2014 dataset. Hub proteins were identified in distinct PPI networks specific to papillary and nonpapillary urothelial carcinoma. Statistical significance of clusters was assessed using the Fisher exact test (P<.001), and network separation was quantified using the interactome-based separation score. Results: RPS27A, UBA52, and VAMP8 were the most connected or ``hub'' proteins identified in the network specific to the papillary urothelial carcinoma. In the network specific to the nonpapillary carcinoma, GNB1, RHOA, UBC, and FPR2 were found to be the hub proteins. Notably, GNB1 and FPR2 were among the proteins that have existing drugs targeting them. Conclusions: We identified distinct PPI networks and the hub proteins specific to papillary and nonpapillary urothelial carcinomas. However, these findings are limited by the use of transcriptomic data and require experimental validation to confirm the functional relevance of the identified targets. ", doi="10.2196/76736", url="https://bioinform.jmir.org/2025/1/e76736" } @Article{info:doi/10.2196/72133, author="Mechnine, Abdelilah and Saih, Asmae and Wakrim, Lahcen and Aarab, Ahmed", title="In Silico Analysis and Validation of A Disintegrin and Metalloprotease (ADAM) 17 Gene Missense Variants: Structural Bioinformatics Study", journal="JMIR Bioinform Biotech", year="2025", month="Aug", day="25", volume="6", pages="e72133", keywords="bioinformatics", keywords="in silico", keywords="COVID-19", keywords="SARS-CoV-2", keywords="molecular modeling", abstract="Background: The protein A disintegrin and metalloprotease (ADAM) domain containing 17, also called tumor necrosis factor alpha--converting enzyme, is mainly responsible for cleaving a specific sequence Pro-Leu-Ala-Gln-Ala-/-Val-Arg-Ser-Ser-Ser in the membrane-bound precursor of tumor necrosis factor alpha. This cleavage process has significant implications for inflammatory and immune responses, and recent research indicates that genetic variants of ADAM17 may influence susceptibility to and severity of SARS-CoV-2 infection. Objective: The aim of the study is to identify the most deleterious missense variants of ADAM17 that impact protein stability, structure, and function and to assess specific variants potentially involved in SARS-CoV-2 infection. Methods: A bioinformatics approach was used on 12,042 single-nucleotide polymorphisms using tools including SIFT (Sorting Intolerant From Tolerant), PolyPhen2.0, PROVEAN (Protein Variation Effect Analyzer), PANTHER (Protein Analysis Through Evolutionary Relationships), SNP\&GO (Single Nucleotide Polymorphisms and Gene Ontology), PhD-SNP (Predictor of Human Deleterious Single Nucleotide Polymorphisms), Mutation Assessor, SNAP2 (Screening for Non-Acceptable Polymorphisms 2), MUpro, I-Mutant, iStable, InterPro, Sparks-x, PROCHECK (Programs to Check the Stereochemical Quality of Protein Structures), PyMol, Project HOPE (Have (y)Our Protein Explained), ConSurf, and SWISS-MODEL. Missense variants of ADAM17 were collected from the Ensembl database for analysis. Results: In total, 7 nonsynonymous single-nucleotide polymorphisms (P556L, G550D, V483A, G479E, G349E, T339P, and D232E) were identified as high-risk pathogenic by all prediction tools, and these variants were found to potentially have deleterious effects on the stability, structure, and function of the ADAM17 protein, potentially destroying the entire cleavage process. Additionally, 4 missense variants (Q658H, D657G, D654N, and F652L) in positions related to SARS-CoV-2 infection exhibited high conservation scores and were predicted to be deleterious, suggesting that they play an important role in SARS-CoV-2 infection. Conclusions: Specific missense variants of ADAM17 are predicted to be highly pathogenic, potentially affecting protein stability and function and contributing to SARS-CoV-2 pathogenesis. These findings provide a basis for understanding their clinical relevance, aiding in early diagnosis, risk assessment, and therapeutic development. ", doi="10.2196/72133", url="https://bioinform.jmir.org/2025/1/e72133" } @Article{info:doi/10.2196/60207, author="Lin, Lin and Wang, Guoyong and Hao, Lianzheng and Yan, Tingbin", title="Genotype Distribution and Migration Patterns of Hepatitis C Virus in Shandong Province, China: Molecular Epidemiology and Phylogenetic Study", journal="JMIR Med Inform", year="2025", month="Aug", day="18", volume="13", pages="e60207", keywords="hepatitis C virus", keywords="genotypic diversity", keywords="phylogenetic analysis", keywords="migration patterns", keywords="Shandong Province", keywords="Bayesian skyline plot", keywords="migration", keywords="genotype", keywords="prevention", keywords="disease control", keywords="phylogenetic", keywords="HCV", keywords="migration pattern", keywords="epidemiological", keywords="China", abstract="Background: Hepatitis C virus (HCV) remains a significant public health concern in China, particularly in Shandong Province, where detailed molecular epidemiological data are limited. HCV exhibits substantial genetic diversity, and understanding its genotype distribution and transmission dynamics is critical for developing effective control strategies. Objective: This study aimed to investigate the genetic diversity, geographic dissemination, and evolutionary history of HCV genotypes in Shandong Province, China, using molecular techniques and phylogenetic methods. Methods: A total of 320 HCV-positive serum samples were collected from multiple hospitals across Shandong Province between 2013 and 2021. HCV RNA was extracted and amplified targeting the 5{\textasciiacutex} untranslated region (UTR), Core, and NS5B regions. Sequencing was conducted, and genotypes were determined using the National Center for Biotechnology Information's Basic Local Alignment Search Tool (NCBI BLAST). Phylogenetic trees were constructed using maximum likelihood methods with the general time reversible with Gamma-distributed rate variation among sites [(GTR)+Gamma model]. The temporal and geographic evolution of the major subtypes (1b and 2a) was analyzed using Bayesian Markov chain Monte Carlo (MCMC) methods implemented in Bayesian Evolutionary Analysis Sampling Trees (BEAST). The Bayesian skyline plot (BSP) was used to infer population dynamics and estimate the time to the most recent common ancestor (tMRCA). Results: Genotypes 1b (n=165) and 2a (n=131) were identified as the predominant subtypes, with a small number of genotypes 3b, 6a, 6k, and potential recombinant strains also detected. Phylogenetic analysis revealed distinct evolutionary clustering of 1b and 2a strains, suggesting multiple diffusion events within the province. The tMRCA of subtypes 1b and 2a were estimated to be 1957 and 1979, respectively. Bayesian skyline analysis showed that both subtypes experienced long-term population stability, followed by a rapid expansion period between 2014 and 2019 (1b) and 2014 to 2016 (2a), respectively. The analysis also identified key transmission hubs such as Jinan, Liaocheng, Tai'an, and Dezhou, indicating city-level variations in HCV spread. Conclusions: This study provides data-supported insights into the genotypic landscape and evolutionary patterns of HCV in Shandong Province. The identification of dominant subtypes, potential recombinant strains, and regional transmission pathways enhances our understanding of local HCV epidemiology. These findings have implications for public health policy, resource allocation, and targeted treatment strategies. The integration of molecular epidemiology and phylogenetics offers a valuable model for infectious disease surveillance and control in similar settings. ", doi="10.2196/60207", url="https://medinform.jmir.org/2025/1/e60207" } @Article{info:doi/10.2196/69454, author="Khan, Shumaila and Alam, Mahmood and Qasim, Iqbal and Khan, Shahnaz and Khan, Wahab and Mamyrbayev, Orken and Akhmediyarova, Ainur and Mukazhanov, Nurzhan and Alibiyeva,, Zhibek", title="Genetic Diversity and Mutation Frequency Databases in Ethnic Populations: Systematic Review", journal="JMIR Bioinform Biotech", year="2025", month="Aug", day="11", volume="6", pages="e69454", keywords="ethnic-specific mutation frequency databases", keywords="genetic diversity", keywords="mutation disorder", keywords="inherited disease", abstract="Background: National and ethnic mutation frequency databases (NEMDBs) play a crucial role in documenting gene variations across populations, offering invaluable insights for gene mutation research and the advancement of precision medicine. These databases provide an essential resource for understanding genetic diversity and its implications for health and disease across different ethnic groups. Objective: The aim of this study is to systematically evaluate 42 NEMDBs to (1) quantify gaps in standardization (70\% nonstandard formats, 50\% outdated data), (2) propose artificial intelligence/linked open data solutions for interoperability, and (3) highlight clinical implications for precision medicine across NEMDBs. Methods: A systematic approach was used to assess the databases based on several criteria, including data collection methods, system design, and querying mechanisms. We analyzed the accessibility and user-centric features of each database, noting their ability to integrate with other systems and their role in advancing genetic disorder research. The review also addressed standardization and data quality challenges prevalent in current NEMDBs. Results: The analysis of 42 NEMDBs revealed significant issues, with 70\% (29/42) lacking standardized data formats and 60\% (25/42) having notable gaps in the cross-comparison of genetic variations, and 50\% (21/42) of the databases contained incomplete or outdated data, limiting their clinical utility. However, databases developed on open-source platforms, such as LOVD, showed a 40\% increase in usability for researchers, highlighting the benefits of using flexible, open-access systems. Conclusions: We propose cloud-based platforms and linked open data frameworks to address critical gaps in standardization (70\% of databases) and outdated data (50\%) alongside artificial intelligence--driven models for improved interoperability. These solutions prioritize user-centric design to effectively serve clinicians, researchers, and public stakeholders. ", doi="10.2196/69454", url="https://bioinform.jmir.org/2025/1/e69454" } @Article{info:doi/10.2196/70621, author="Almadhoun, B. Mahmoud and Burhanuddin, MA", title="Optimizing Feature Selection and Machine Learning Algorithms for Early Detection of Prediabetes Risk: Comparative Study", journal="JMIR Bioinform Biotech", year="2025", month="Jul", day="31", volume="6", pages="e70621", keywords="prediabetes", keywords="machine learning", keywords="feature selection", keywords="prediction", keywords="extreme gradient boosting", keywords="support vector machine", keywords="k-nearest neighbors", abstract="Background: Prediabetes is an intermediate stage between normal glucose metabolism and diabetes and is associated with increased risk of complications like cardiovascular disease and kidney failure. Objective: It is crucial to recognize individuals with prediabetes early in order to apply timely intervention strategies to decelerate or prohibit diabetes development. This study aims to compare the effectiveness of machine learning (ML) algorithms in predicting prediabetes and identifying its key clinical predictors. Methods: Multiple ML models are evaluated in this study, including random forest, extreme gradient boosting (XGBoost), support vector machine (SVM), and k-nearest neighbors (KNNs), on a dataset of 4743 individuals. For improved performance and interpretability, key clinical features were selected using LASSO (Least Absolute Shrinkage and Selection Operator) regression and principal component analysis (PCA). To optimize model accuracy and reduce overfitting, we used hyperparameter tuning with RandomizedSearchCV for XGBoost and random forest, and GridSearchCV for SVM and KNN. SHAP (Shapley Additive Explanations) was used to assess model-agnostic feature importance. To resolve data imbalance, SMOTE (Synthetic Minority Oversampling Technique) was applied to ensure reliable classifications. Results: A cross-validated ROC-AUC (receiver operating characteristic area under the curve) score of 0.9117 highlighted the robustness of random forest in generalizing across datasets among the models tested. XGBoost followed closely, providing balanced accuracy in distinguishing between normal and prediabetic cases. While SVMs and KNNs performed adequately as baseline models, they exhibited limitations in sensitivity. The SHAP analysis indicated that BMI, age, high-density lipoprotein cholesterol, and low-density lipoprotein cholesterol emerged as the key predictors across models. The performance was significantly enhanced through hyperparameter tuning; for example, the ROC-AUC for SVM increased from 0.813 (default) to 0.863 (tuned). PCA kept 12 components while maintaining 95\% of the variance in the dataset. Conclusions: It is demonstrated in this research that optimized ML models, especially random forest and XGBoost, are effective tools for assessing early prediabetes risk. Combining SHAP analysis with LASSO and PCA enhances transparency, supporting their integration in real-time clinical decision support systems. Future directions include validating these models in diverse clinical settings and integrating additional biomarkers to improve prediction accuracy, offering a promising avenue for early intervention and personalized treatment strategies in preventive health care. ", doi="10.2196/70621", url="https://bioinform.jmir.org/2025/1/e70621" } @Article{info:doi/10.2196/69800, author="Braga, Melvin David and Rawal, Bharat", title="Harnessing AI and Quantum Computing for Revolutionizing Drug Discovery and Approval Processes: Case Example for Collagen Toxicity", journal="JMIR Bioinform Biotech", year="2025", month="Jul", day="22", volume="6", pages="e69800", keywords="generative AI", keywords="quantum computing", keywords="computational data", keywords="new drug discovery", keywords="computer-aided drug discovery", keywords="artificial intelligence", doi="10.2196/69800", url="https://bioinform.jmir.org/2025/1/e69800" } @Article{info:doi/10.2196/67862, author="Nagino, Ken and Akasaki, Yasutsugu and Fuse, Nobuo and Ogishima, Soichi and Shimizu, Atsushi and Uruno, Akira and Sutoh, Yoichi and Otsuka-Yamasaki, Yayoi and Nagami, Fuji and Seita, Jun and Nakamura, Tomohiro and Nagaie, Satoshi and Taira, Makiko and Kobayashi, Tomoko and Shimizu, Ritsuko and Hozawa, Atsushi and Kuriyama, Shinichi and Eguchi, Atsuko and Midorikawa-Inomata, Akie and Nakamura, Masahiro and Murakami, Akira and Nakao, Shintaro and Inomata, Takenori", title="Integration of Digital Phenotyping and Genomics for Dry Eye Disease: Protocol for a Prospective Cohort Study", journal="JMIR Res Protoc", year="2025", month="May", day="12", volume="14", pages="e67862", keywords="dry eye syndrome", keywords="dry eye disease", keywords="mobile health", keywords="smartphone", keywords="biobank", keywords="ocular surface", keywords="digital health", keywords="genome-wide association study", abstract="Background: Dry eye disease (DED) is a common ocular condition with diverse and heterogeneous symptoms. Current treatment standards of DED include the post facto management of associated symptoms through topical eye drops. However, there is a need for predictive, preventive, personalized, and participatory medicine. The DryEyeRhythm mobile health app enables real-time data collection on environmental, lifestyle, host, and digital factors in a patient's daily environment. Combining these data with genetic information from biobanks could enhance our understanding of individual variations and facilitate the development of personalized treatment strategies for DED. Objective: This study aims to integrate digital data from the DryEyeRhythm smartphone app with the Tohoku Medical Megabank database to create a comprehensive database that elucidates the interplay between multifactorial factors and the onset and progression of DED. Methods: This prospective observational cohort study will include 1200 participants for the discovery stage and 1000 participants for the replication stage, all of whom have data available in the Tohoku Medical Megabank database. Participants will be recruited from the Community Support Center of Sendai, Miyagi Prefecture, Japan. Participant enrollment for the discovery stage was conducted from August 1, 2021, to June 30, 2022, and the replication stage will be conducted from August 31, 2024, to March 31, 2026. Participants will provide demographic data, medical history, lifestyle information, DED symptoms, and maximum blink interval measurements at baseline and after 30 days using the DryEyeRhythm smartphone app. Upon scanning a registration code, each participant's cohort ID from the Tohoku Medical Megabank database will be linked to their smartphone app, enabling data integration between the Tohoku Medical Megabank and DryEyeRhythm database. The primary outcome will assess the association between genetic polymorphisms and DED using a genome-wide association study. Secondary outcomes will explore associations between DED and various factors, including sociodemographic characteristics, lifestyle habits, medical history, biospecimen analyses (eg, blood and urine), and physiological measurements (eg, height, weight, and eye examination results). Associations will be evaluated using logistic regression analysis, adjusting for potential confounding factors. Results: The discovery stage of participant enrollment was conducted from August 1, 2021, to June 30, 2022. The replication stage will take place from August 31, 2024, to March 31, 2026. Data analysis is expected to be completed by September 2026, with results reported by March 2027. Conclusions: This study highlights the potential of smartphone apps in advancing biobank research and deepening the understanding of multifactorial DED, paving the way for personalized treatment strategies in the future. International Registered Report Identifier (IRRID): DERR1-10.2196/67862 ", doi="10.2196/67862", url="https://www.researchprotocols.org/2025/1/e67862" } @Article{info:doi/10.2196/70463, author="Sanchez, William and Dewan, Ananya and Budd, Eve and Eifler, M. and Miller, C. Robert and Kahn, Jeffery and Macis, Mario and Gross, Marielle", title="Decentralized Biobanking Apps for Patient Tracking of Biospecimen Research: Real-World Usability and Feasibility Study", journal="JMIR Bioinform Biotech", year="2025", month="Apr", day="10", volume="6", pages="e70463", keywords="patient empowerment", keywords="biobanking", keywords="biospecimens", keywords="transparency", keywords="community engagement", keywords="nonfungible tokens", keywords="NFTs", keywords="blockchain technology", keywords="decentralized biobanking", keywords="pilot studies", keywords="technical feasibility", keywords="biowallet", abstract="Background: Biobank privacy policies strip patient identifiers from donated specimens, undermining transparency, utility, and value for patients, scientists, and society. We are advancing decentralized biobanking apps that reconnect patients with biospecimens and facilitate engagement through a privacy-preserving nonfungible token (NFT) digital twin framework. The decentralized biobanking platform was first piloted for breast cancer biobank members. Objective: This study aimed to demonstrate the technical feasibility of (1) patient-friendly biobanking apps, (2) integration with institutional biobanks, and (3) establishing the foundation of an NFT digital twin framework for decentralized biobanking. Methods: We designed, developed, and deployed a decentralized biobanking mobile app for a feasibility pilot from 2021 to 2023 in the setting of a breast cancer biobank at a National Cancer Institute comprehensive cancer center. The Flutter app was integrated with the biobank's laboratory information management systems via an institutional review board--approved mechanism leveraging authorized, secure devices and anonymous ID codes and complemented with a nontransferable ERC-721 NFT representing the soul-bound connection between an individual and their specimens. Biowallet NFTs were held within a custodial wallet, whereas the user experiences simulated token-gated access to personalized feedback about collection and use of individual and collective deidentified specimens. Quantified app user journeys and NFT deployment data demonstrate technical feasibility complemented with design workshop feedback. Results: The decentralized biobanking app incorporated key features: ``biobank'' (learn about biobanking), ``biowallet'' (track personal biospecimens), ``labs'' (follow research), and ``profile'' (share data and preferences). In total, 405 pilot participants downloaded the app, including 361 (89.1\%) biobank members. A total of 4 central user journeys were captured. First, all app users were oriented to the ?60,000-biospecimen collection, and 37.8\% (153/405) completed research profiles, collectively enhancing annotations for 760 unused specimens. NFTs were minted for 94.6\% (140/148) of app users with specimens at an average cost of US \$4.51 (SD US \$2.54; range US \$1.84-\$11.23) per token, projected to US \$17,769.40 (SD US \$159.52; range US \$7265.62-\$44,229.27) for the biobank population. In total, 89.3\% (125/140) of the users successfully claimed NFTs during the pilot, thereby tracking 1812 personal specimens, including 202 (11.2\%) distributed under 42 unique research protocols. Participants embraced the opportunity for direct feedback, community engagement, and potential health benefits, although user onboarding requires further refinement. Conclusions: Decentralized biobanking apps demonstrate technical feasibility for empowering patients to track donated biospecimens via integration with institutional biobank infrastructure. Our pilot reveals potential to accelerate biomedical research through patient engagement; however, further development is needed to optimize the accessibility, efficiency, and scalability of platform design and blockchain elements, as well as a robust incentive and governance structure for decentralized biobanking. ", doi="10.2196/70463", url="https://bioinform.jmir.org/2025/1/e70463", url="http://www.ncbi.nlm.nih.gov/pubmed/40208659" } @Article{info:doi/10.2196/65645, author="Goes Job, Eduarda Maria and Fukumasu, Heidge and Malta, Maistro Tathiane and Porfirio Xavier, Luiz Pedro", title="Investigating Associations Between Prognostic Factors in Gliomas: Unsupervised Multiple Correspondence Analysis", journal="JMIR Bioinform Biotech", year="2025", month="Mar", day="12", volume="6", pages="e65645", keywords="brain tumors", keywords="bioinformatics", keywords="stemness", keywords="multiple correspondence analysis", abstract="Background: Multiple correspondence analysis (MCA) is an unsupervised data science methodology that aims to identify and represent associations between categorical variables. Gliomas are an aggressive type of cancer characterized by diverse molecular and clinical features that serve as key prognostic factors. Thus, advanced computational approaches are essential to enhance the analysis and interpretation of the associations between clinical and molecular features in gliomas. Objective: This study aims to apply MCA to identify associations between glioma prognostic factors and also explore their associations with stemness phenotype. Methods: Clinical and molecular data from 448 patients with brain tumors were obtained from the Cancer Genome Atlas. The DNA methylation stemness index, derived from DNA methylation patterns, was built using a one-class logistic regression. Associations between variables were evaluated using the $\chi${\texttwosuperior} test with k degrees of freedom, followed by analysis of the adjusted standardized residuals (ASRs >1.96 indicate a significant association between variables). MCA was used to uncover associations between glioma prognostic factors and stemness. Results: Our analysis revealed significant associations among molecular and clinical characteristics in gliomas. Additionally, we demonstrated the capability of MCA to identify associations between stemness and these prognostic factors. Our results exhibited a strong association between higher DNA methylation stemness index and features related to poorer prognosis such as glioblastoma cancer type (ASR: 8.507), grade 4 (ASR: 8.507), isocitrate dehydrogenase wild type (ASR:15.904), unmethylated MGMT (methylguanine methyltransferase) Promoter (ASR: 9.983), and telomerase reverse transcriptase expression (ASR: 3.351), demonstrating the utility of MCA as an analytical tool for elucidating potential prognostic factors. Conclusions: MCA is a valuable tool for understanding the complex interdependence of prognostic markers in gliomas. MCA facilitates the exploration of large-scale datasets and enhances the identification of significant associations. ", doi="10.2196/65645", url="https://bioinform.jmir.org/2025/1/e65645" } @Article{info:doi/10.2196/70282, author="Liu, Xu and Guo, Linghong and Jiang, Xian", title="Use of Clinical Public Databases in Hidradenitis Suppurativa Research", journal="Interact J Med Res", year="2025", month="Feb", day="18", volume="14", pages="e70282", keywords="hidradenitis suppurativa", keywords="clinical public databases", keywords="disease progression", keywords="patient data", keywords="HS", doi="10.2196/70282", url="https://www.i-jmr.org/2025/1/e70282" } @Article{info:doi/10.2196/50712, author="Friedenson, Bernard", title="Identifying Safeguards Disabled by Epstein-Barr Virus Infections in Genomes From Patients With Breast Cancer: Chromosomal Bioinformatics Analysis", journal="JMIRx Med", year="2025", month="Jan", day="29", volume="6", pages="e50712", keywords="breast cancer", keywords="cancer", keywords="oncology", keywords="ovarian", keywords="virus", keywords="viral", keywords="Epstein-Barr", keywords="herpes", keywords="bioinformatics", keywords="chromosome", keywords="gene", keywords="genetic", keywords="chromosomal", keywords="DNA", keywords="genomic", keywords="BRCA", keywords="metastasis", keywords="biology", abstract="Background: The causes of breast cancer are poorly understood. A potential risk factor is Epstein-Barr virus (EBV), a lifelong infection nearly everyone acquires. EBV-transformed human mammary cells accelerate breast cancer when transplanted into immunosuppressed mice, but the virus can disappear as malignant cells reproduce. If this model applies to human breast cancers, then they should have genome damage characteristic of EBV infection. Objective: This study tests the hypothesis that EBV infection predisposes one to breast cancer by causing permanent genome damage that compromises cancer safeguards. Methods: Publicly available genome data from approximately 2100 breast cancers and 25 ovarian cancers were compared to cancers with proven associations to EBV, including 70 nasopharyngeal cancers, 90 Burkitt lymphomas, 88 diffuse large B-cell lymphomas, and 34 gastric cancers. Calculation algorithms to make these comparisons were developed. Results: Chromosome breakpoints in breast and ovarian cancer clustered around breakpoints in EBV-associated cancers. Breakpoint distributions in breast and EBV-associated cancers on some chromosomes were not confidently distinguished (P>.05), but differed from controls unrelated to EBV infection. Viral breakpoint clusters occurred in high-risk, sporadic, and other breast cancer subgroups. Breakpoint clusters disrupted gene functions essential for cancer protection, which remain compromised even if EBV infection disappears. As CRISPR (clustered regularly interspaced short palindromic repeats)--like reminders of past infection during evolution, EBV genome fragments were found regularly interspaced between Piwi-interacting RNA (piRNA) genes on chromosome 6. Both breast and EBV-associated cancers had inactivated genes that guard piRNA defenses and the major histocompatibility complex (MHC) locus. Breast and EBV-associated cancer breakpoints and other variations converged around the highly polymorphic MHC. Not everyone develops cancer because MHC differences produce differing responses to EBV infection. Chromosome shattering and mutation hot spots in breast cancers preferentially occurred at incorporated viral sequences. On chromosome 17, breast cancer breakpoints that clustered around those in EBV-mediated cancers were linked to estrogen effects. Other breast cancer breaks affected sites where EBV inhibits JAK-STAT and SWI-SNF signaling pathways. A characteristic EBV-cancer gene deletion that shifts metabolism to favor tumors was also found in breast cancers. These changes push breast cancer into metastasis and then favor survival of metastatic cells. Conclusions: EBV infection predisposes one to breast cancer and metastasis, even if the virus disappears. Identifying this pathogenic viral damage may improve screening, treatment, and prevention. Immunizing children against EBV may protect against breast, ovarian, other cancers, and potentially even chronic unexplained diseases. ", doi="10.2196/50712", url="https://xmed.jmir.org/2025/1/e50712" } @Article{info:doi/10.2196/64539, author="S{\'a}nchez-Gonz{\'a}lez, Luis Juan and S{\'a}nchez-Rodr{\'i}guez, Luis Juan and Gonz{\'a}lez-Sarmiento, Rogelio and Navarro-L{\'o}pez, V{\'i}ctor and Ju{\'a}rez-Vela, Ra{\'u}l and P{\'e}rez, Jes{\'u}s and Mart{\'i}n-Vallejo, Javier", title="Effect of Physical Exercise on Telomere Length: Umbrella Review and Meta-Analysis", journal="JMIR Aging", year="2025", month="Jan", day="10", volume="8", pages="e64539", keywords="aging", keywords="chromosome", keywords="exercise", keywords="meta-analysis", keywords="telomere", keywords="telomerase", keywords="genes", keywords="genome", keywords="DNA", abstract="Background: Telomere length (TL) is a marker of cellular health and aging. Physical exercise has been associated with longer telomeres and, therefore, healthier aging. However, results supporting such effects vary across studies. Our aim was to synthesize existing evidence on the effect of different modalities and durations of physical exercise on TL. Objective: The aim of this study was to explore the needs and expectations of individuals with physical disabilities and their interventionists for the use of a virtual reality physical activity platform in a community organization. Methods: We performed an umbrella review and meta-analysis. Data sources included PubMed, Embase, Web of Science, Cochrane Library, and Scopus. We selected systematic reviews and meta-analyses of randomized and nonrandomized controlled clinical trials evaluating the effect of physical exercise on TL. Results: Our literature search retrieved 12 eligible systematic reviews, 5 of which included meta-analyses. We identified 22 distinct primary studies to estimate the overall effect size of physical exercise on TL. The overall effect size was 0.28 (95\% CI 0.118-0.439), with a heterogeneity test value Q of 43.08 (P=.003) and I{\texttwosuperior} coefficient of 51\%. The number of weeks of intervention explained part of this heterogeneity (Q\_B=8.25; P=.004), with higher effect sizes found in studies with an intervention of less than 30 weeks. Exercise modality explained additional heterogeneity within this subgroup (Q\_B=10.28, P=.02). The effect sizes were small for aerobic exercise and endurance training, and moderate for high-intensity interval training. Conclusions: Our umbrella review and meta-analysis detected a small-moderate positive effect of physical exercise on TL, which seems to be influenced by the duration and type of physical exercise. High quality studies looking into the impact of standardized, evidence-based physical exercise programs on TL are still warranted. Trial Registration: PROSPERO CRD42024500736; https://www.crd.york.ac.uk/PROSPERO/display\_record.php?RecordID=500736 ", doi="10.2196/64539", url="https://aging.jmir.org/2025/1/e64539" } @Article{info:doi/10.2196/65506, author="Thimmapuram, Jayaram and Patel, D. Kamlesh and Bhatt, Deepti and Chauhan, Ajay and Madhusudhan, Divya and Bhatt, K. Kashyap and Deshpande, Snehal and Budhbhatti, Urvi and Joshi, Chaitanya", title="Effect of a Web-Based Heartfulness Program on the Mental Well-Being, Biomarkers, and Gene Expression Profile of Health Care Students: Randomized Controlled Trial", journal="JMIR Bioinform Biotech", year="2024", month="Dec", day="16", volume="5", pages="e65506", keywords="heartfulness", keywords="meditation", keywords="stress", keywords="anxiety", keywords="depression", keywords="interleukins", keywords="gene expression", keywords="dehydroepiandrosterone", keywords="DHEA", keywords="gene", keywords="mental health", keywords="randomized study", keywords="web-based program", keywords="mental well-being", keywords="well-being", keywords="mental", keywords="health care students", keywords="student", keywords="mRNA", keywords="messenger ribonucleic acid", keywords="youth", keywords="young adults", keywords="web-based", keywords="biomarker", keywords="RNA", keywords="bioinformatics", keywords="randomized", keywords="statistical analysis", keywords="nursing", keywords="physiotherapy", keywords="pharmacy", abstract="Background: Health care students often experience high levels of stress, anxiety, and mental health issues, making it crucial to address these challenges. Variations in stress levels may be associated with changes in dehydroepiandrosterone sulfate (DHEA-S) and interleukin-6 (IL-6) levels and gene expression. Meditative practices have demonstrated effectiveness in reducing stress and improving mental well-being. Objective: This study aims to assess the effects of Heartfulness meditation on mental well-being, DHEA-S, IL-6, and gene expression profile. Methods: The 78 enrolled participants were randomly assigned to the Heartfulness meditation (n=42, 54\%) and control (n=36, 46\%) groups. The participants completed the Perceived Stress Scale (PSS) and Depression Anxiety Stress Scale (DASS-21) at baseline and after week 12. Gene expression with messenger RNA sequencing and DHEA-S and IL-6 levels were also measured at baseline and the completion of the 12 weeks. Statistical analysis included descriptive statistics, paired t test, and 1-way ANOVA with Bonferroni correction. Results: The Heartfulness group exhibited a significant 17.35\% reduction in PSS score (from mean 19.71, SD 5.09 to mean 16.29, SD 4.83; P<.001) compared to a nonsignificant 6\% reduction in the control group (P=.31). DASS-21 scores decreased significantly by 27.14\% in the Heartfulness group (from mean 21.15, SD 9.56 to mean 15.41, SD 7.87; P<.001) while it increased nonsignificantly by 17\% in the control group (P=.04). For the DASS-21 subcomponents---the Heartfulness group showed a statistically significant 28.53\% reduction in anxiety (P=.006) and 27.38\% reduction in stress (P=.002) versus an insignificant 22\% increase in anxiety (P=.02) and 6\% increase in stress (P=.47) in the control group. Further, DHEA-S levels showed a significant 20.27\% increase in the Heartfulness group (from mean 251.71, SD 80.98 to mean 302.74, SD 123.56; P=.002) compared to an insignificant 9\% increase in the control group (from mean 285.33, SD 112.14 to mean 309.90, SD 136.90; P=.10). IL-6 levels showed a statistically significant difference in both the groups (from mean 4.93, SD 1.35 to mean 3.67, SD 1.0; 28.6\%; P<.001 [Heartfulness group] and from mean 4.52, SD 1.40 to mean 2.72, SD 1.74; 40\%; P<.001 [control group]). Notably, group comparison at 12 weeks revealed a significant difference in perceived stress, DASS-21 and its subcomponents, and IL-6 (all P<.05/4). The gene expression profile with messenger RNA sequencing identified 875 upregulated genes and 1539 downregulated genes in the Heartfulness group compared to baseline, and there were 292 upregulated genes and 1180 downregulated genes in the Heartfulness group compared to the control group after the intervention. Conclusions: Heartfulness practice was associated with decreased depression, anxiety, and stress scores and improved health measures in DHEA-S and IL-6 levels. The gene expression data point toward possible mechanisms of alleviation of symptoms of stress, anxiety and depression. Trial Registration: ISRCTN Registry ISRCTN82860715; https://doi.org/10.1186/ISRCTN82860715 ", doi="10.2196/65506", url="https://bioinform.jmir.org/2024/1/e65506", url="http://www.ncbi.nlm.nih.gov/pubmed/39680432" } @Article{info:doi/10.2196/50235, author="Jefferson, Emily and Milligan, Gordon and Johnston, Jenny and Mumtaz, Shahzad and Cole, Christian and Best, Joseph and Giles, Charles Thomas and Cox, Samuel and Masood, Erum and Horban, Scott and Urwin, Esmond and Beggs, Jillian and Chuter, Antony and Reilly, Gerry and Morris, Andrew and Seymour, David and Hopkins, Susan and Sheikh, Aziz and Quinlan, Philip", title="The Challenges and Lessons Learned Building a New UK Infrastructure for Finding and Accessing Population-Wide COVID-19 Data for Research and Public Health Analysis: The CO-CONNECT Project", journal="J Med Internet Res", year="2024", month="Nov", day="20", volume="26", pages="e50235", keywords="COVID-19", keywords="infrastructure", keywords="trusted research environments", keywords="safe havens", keywords="feasibility analysis", keywords="cohort discovery", keywords="federated analytics", keywords="federated discovery", keywords="lessons learned", keywords="population wide", keywords="data", keywords="public health", keywords="analysis", keywords="CO-CONNECT", keywords="challenges", keywords="data transformation", doi="10.2196/50235", url="https://www.jmir.org/2024/1/e50235" } @Article{info:doi/10.2196/62752, author="Hudon, Alexandre and Beaudoin, M{\'e}lissa and Phraxayavong, Kingsada and Potvin, St{\'e}phane and Dumais, Alexandre", title="Exploring the Intersection of Schizophrenia, Machine Learning, and Genomics: Scoping Review", journal="JMIR Bioinform Biotech", year="2024", month="Nov", day="15", volume="5", pages="e62752", keywords="schizophrenia", keywords="genomic data", keywords="machine learning", keywords="artificial intelligence", keywords="classification techniques", keywords="psychiatry", keywords="mental health", keywords="genomics", keywords="predictions", keywords="ML", keywords="psychiatric", keywords="synthesis", keywords="review methods", keywords="searches", keywords="scoping review", keywords="prediction models", abstract="Background: An increasing body of literature highlights the integration of machine learning with genomic data in psychiatry, particularly for complex mental health disorders such as schizophrenia. These advanced techniques offer promising potential for uncovering various facets of these disorders. A comprehensive review of the current applications of machine learning in conjunction with genomic data within this context can significantly enhance our understanding of the current state of research and its future directions. Objective: This study aims to conduct a systematic scoping review of the use of machine learning algorithms with genomic data in the field of schizophrenia. Methods: To conduct a systematic scoping review, a search was performed in the electronic databases MEDLINE, Web of Science, PsycNet (PsycINFO), and Google Scholar from 2013 to 2024. Studies at the intersection of schizophrenia, genomic data, and machine learning were evaluated. Results: The literature search identified 2437 eligible articles after removing duplicates. Following abstract screening, 143 full-text articles were assessed, and 121 were subsequently excluded. Therefore, 21 studies were thoroughly assessed. Various machine learning algorithms were used in the identified studies, with support vector machines being the most common. The studies notably used genomic data to predict schizophrenia, identify schizophrenia features, discover drugs, classify schizophrenia amongst other mental health disorders, and predict the quality of life of patients. Conclusions: Several high-quality studies were identified. Yet, the application of machine learning with genomic data in the context of schizophrenia remains limited. Future research is essential to further evaluate the portability of these models and to explore their potential clinical applications. ", doi="10.2196/62752", url="https://bioinform.jmir.org/2024/1/e62752", url="http://www.ncbi.nlm.nih.gov/pubmed/39546776" } @Article{info:doi/10.2196/63562, author="Fisher, J. Joshua and Grace, Tegan and Castles, A. Nathan and Jones, A. Elizabeth and Delforce, J. Sarah and Peters, E. Alexandra and Crombie, K. Gabrielle and Hoedt, C. Emily and Warren, E. Kirby and Kahl, GS Richard and Hirst, J. Jonathan and Pringle, G. Kirsty and Pennell, E. Craig", title="Methodology for Biological Sample Collection, Processing, and Storage in the Newcastle 1000 Pregnancy Cohort: Protocol for a Longitudinal, Prospective Population-Based Study in Australia", journal="JMIR Res Protoc", year="2024", month="Nov", day="15", volume="13", pages="e63562", keywords="pregnancy cohort study", keywords="biobanking protocol", keywords="toenails", keywords="blood", keywords="microbiome", keywords="urine", keywords="hair", keywords="pregnancy", keywords="cohort study", abstract="Background: Research in the developmental origins of health and disease provides compelling evidence that adverse events during the first 1000 days of life from conception can impact life course health. Despite many decades of research, we still lack a complete understanding of the mechanisms underlying some of these associations. The Newcastle 1000 Study (NEW1000) is a comprehensive, prospective population-based pregnancy cohort study based in Newcastle, New South Wales, Australia, that will recruit pregnant women and their partners at 11-14 weeks' gestation, with assessments at 20, 28, and 36 weeks; birth; 6 weeks; and 6 months, in order to provide detailed data about the first 1000 days of life to investigate the developmental origins of noncommunicable diseases. Objective: The study aims to provide a longitudinal multisystem approach to phenotyping, supported by robust clinical data and collection of biological samples in NEW1000. Methods: This manuscript describes in detail the large variety of samples collected in the study and the method of collection, storage, and utility of the samples in the biobank, with a particular focus on incorporation of the samples into emerging and novel large-scale ``-omics'' platforms, including the genome, microbiome, epigenome, transcriptome, fragmentome, metabolome, proteome, exposome, and cell-free DNA and RNA. Specifically, this manuscript details the methods used to collect, process, and store biological samples, including maternal, paternal, and fetal blood, microbiome (stool, skin, vaginal, oral), urine, saliva, hair, toenail, placenta, colostrum, and breastmilk. Results: Recruitment for the study began in March 2021. As of July 2024, 1040 women and 684 partners were enrolled, with 922 infants born. The NEW1000 biobank contains 24,357 plasma aliquots from ethylenediaminetetraacetic acid (EDTA) tubes, 5284 buffy coat aliquots, 4000 plasma aliquots from lithium heparin tubes, 15,884 blood serum aliquots, 2977 PAX RNA tubes, 26,595 urine sample aliquots, 2280 fecal swabs, 17,687 microbiome swabs, 2356 saliva sample aliquots, 1195 breastmilk sample aliquots, 4007 placental tissue aliquots, 2680 hair samples, and 2193 nail samples. Conclusions: NEW1000 will generate a multigenerational, deeply phenotyped cohort with a comprehensive biobank of samples relevant to a large variety of analyses, including multiple -omics platforms. International Registered Report Identifier (IRRID): DERR1-10.2196/63562 ", doi="10.2196/63562", url="https://www.researchprotocols.org/2024/1/e63562" } @Article{info:doi/10.2196/59556, author="Gutman, Barak and Shmilovitch, Amit-Haim and Aran, Dvir and Shelly, Shahar", title="Twenty-Five Years of AI in Neurology: The Journey of Predictive Medicine and Biological Breakthroughs", journal="JMIR Neurotech", year="2024", month="Nov", day="8", volume="3", pages="e59556", keywords="neurology", keywords="artificial intelligence", keywords="telemedicine", keywords="clinical advancements", keywords="mobile phone", doi="10.2196/59556", url="https://neuro.jmir.org/2024/1/e59556" } @Article{info:doi/10.2196/55632, author="Robertson, J. Alan and Mallett, J. Andrew and Stark, Zornitza and Sullivan, Clair", title="It Is in Our DNA: Bringing Electronic Health Records and Genomic Data Together for Precision Medicine", journal="JMIR Bioinform Biotech", year="2024", month="Jun", day="13", volume="5", pages="e55632", keywords="genomics", keywords="digital health", keywords="genetics", keywords="precision medicine", keywords="genomic", keywords="genomic data", keywords="electronic health records", keywords="DNA", keywords="supports", keywords="decision-making", keywords="timeliness", keywords="diagnosis", keywords="risk reduction", keywords="electronic medical records", doi="10.2196/55632", url="https://bioinform.jmir.org/2024/1/e55632", url="http://www.ncbi.nlm.nih.gov/pubmed/38935958" } @Article{info:doi/10.2196/54332, author="Thomas, Mara and Mackes, Nuria and Preuss-Dodhy, Asad and Wieland, Thomas and Bundschus, Markus", title="Assessing Privacy Vulnerabilities in Genetic Data Sets: Scoping Review", journal="JMIR Bioinform Biotech", year="2024", month="May", day="27", volume="5", pages="e54332", keywords="genetic privacy", keywords="privacy", keywords="data anonymization", keywords="reidentification", abstract="Background: Genetic data are widely considered inherently identifiable. However, genetic data sets come in many shapes and sizes, and the feasibility of privacy attacks depends on their specific content. Assessing the reidentification risk of genetic data is complex, yet there is a lack of guidelines or recommendations that support data processors in performing such an evaluation. Objective: This study aims to gain a comprehensive understanding of the privacy vulnerabilities of genetic data and create a summary that can guide data processors in assessing the privacy risk of genetic data sets. Methods: We conducted a 2-step search, in which we first identified 21 reviews published between 2017 and 2023 on the topic of genomic privacy and then analyzed all references cited in the reviews (n=1645) to identify 42 unique original research studies that demonstrate a privacy attack on genetic data. We then evaluated the type and components of genetic data exploited for these attacks as well as the effort and resources needed for their implementation and their probability of success. Results: From our literature review, we derived 9 nonmutually exclusive features of genetic data that are both inherent to any genetic data set and informative about privacy risk: biological modality, experimental assay, data format or level of processing, germline versus somatic variation content, content of single nucleotide polymorphisms, short tandem repeats, aggregated sample measures, structural variants, and rare single nucleotide variants. Conclusions: On the basis of our literature review, the evaluation of these 9 features covers the great majority of privacy-critical aspects of genetic data and thus provides a foundation and guidance for assessing genetic data risk. ", doi="10.2196/54332", url="https://bioinform.jmir.org/2024/1/e54332", url="http://www.ncbi.nlm.nih.gov/pubmed/38935957" } @Article{info:doi/10.2196/56884, author="Bui, Thu Huong Thi and Nguy?n Th? Ph??ng, Qu?nh and Cam Tu, Ho and Nguyen Phuong, Sinh and Pham, Thi Thuy and Vu, Thu and Nguyen Thi Thu, Huyen and Khanh Ho, Lam and Nguyen Tien, Dung", title="The Roles of NOTCH3 p.R544C and Thrombophilia Genes in Vietnamese Patients With Ischemic Stroke: Study Involving a Hierarchical Cluster Analysis", journal="JMIR Bioinform Biotech", year="2024", month="May", day="7", volume="5", pages="e56884", keywords="Glasgow Coma Scale", keywords="ischemic stroke", keywords="hierarchical cluster analysis", keywords="clustering", keywords="machine learning", keywords="MTHFR", keywords="NOTCH3", keywords="modified Rankin scale", keywords="National Institutes of Health Stroke Scale", keywords="prothrombin", keywords="thrombophilia", keywords="mutations", keywords="genetics", keywords="genomics", keywords="ischemia", keywords="risk", keywords="risk analysis", abstract="Background: The etiology of ischemic stroke is multifactorial. Several gene mutations have been identified as leading causes of cerebral autosomal dominant arteriopathy with subcortical infarcts and leukoencephalopathy (CADASIL), a hereditary disease that causes stroke and other neurological symptoms. Objective: We aimed to identify the variants of NOTCH3 and thrombophilia genes, and their complex interactions with other factors. Methods: We conducted a hierarchical cluster analysis (HCA) on the data of 100 patients diagnosed with ischemic stroke. The variants of NOTCH3 and thrombophilia genes were identified by polymerase chain reaction with confronting 2-pair primers and real-time polymerase chain reaction. The overall preclinical characteristics, cumulative cutpoint values, and factors associated with these somatic mutations were analyzed in unidimensional and multidimensional scaling models. Results: We identified the following optimal cutpoints: creatinine, 83.67 (SD 9.19) {\textmu}mol/L; age, 54 (SD 5) years; prothrombin (PT) time, 13.25 (SD 0.17) seconds; and international normalized ratio (INR), 1.02 (SD 0.03). Using the Nagelkerke method, cutpoint 50\% values of the Glasgow Coma Scale score; modified Rankin scale score; and National Institutes of Health Stroke Scale scores at admission, after 24 hours, and at discharge were 12.77, 2.86 (SD 1.21), 9.83 (SD 2.85), 7.29 (SD 2.04), and 6.85 (SD 2.90), respectively. Conclusions: The variants of MTHFR (C677T and A1298C) and NOTCH3 p.R544C may influence the stroke severity under specific conditions of PT, creatinine, INR, and BMI, with risk ratios of 4.8 (95\% CI 1.53-15.04) and 3.13 (95\% CI 1.60-6.11), respectively (Pfisher<.05). It is interesting that although there are many genes linked to increased atrial fibrillation risk, not all of them are associated with ischemic stroke risk. With the detection of stroke risk loci, more information can be gained on their impacts and interconnections, especially in young patients. ", doi="10.2196/56884", url="https://bioinform.jmir.org/2024/1/e56884", url="http://www.ncbi.nlm.nih.gov/pubmed/38935968" } @Article{info:doi/10.2196/52059, author="Ahmadzia, Khorrami Homa and Dzienny, C. Alexa and Bopf, Mike and Phillips, M. Jaclyn and Federspiel, Jeffrey Jerome and Amdur, Richard and Rice, Murguia Madeline and Rodriguez, Laritza", title="Machine Learning Models for Prediction of Maternal Hemorrhage and Transfusion: Model Development Study", journal="JMIR Bioinform Biotech", year="2024", month="Feb", day="5", volume="5", pages="e52059", keywords="postpartum hemorrhage", keywords="machine learning", keywords="prediction", keywords="maternal", keywords="predict", keywords="predictive", keywords="bleeding", keywords="hemorrhage", keywords="hemorrhaging", keywords="birth", keywords="postnatal", keywords="blood", keywords="transfusion", keywords="antepartum", keywords="obstetric", keywords="obstetrics", keywords="women's health", keywords="gynecology", keywords="gynecological", abstract="Background: Current postpartum hemorrhage (PPH) risk stratification is based on traditional statistical models or expert opinion. Machine learning could optimize PPH prediction by allowing for more complex modeling. Objective: We sought to improve PPH prediction and compare machine learning and traditional statistical methods. Methods: We developed models using the Consortium for Safe Labor data set (2002-2008) from 12 US hospitals. The primary outcome was a transfusion of blood products or PPH (estimated blood loss of ?1000 mL). The secondary outcome was a transfusion of any blood product. Fifty antepartum and intrapartum characteristics and hospital characteristics were included. Logistic regression, support vector machines, multilayer perceptron, random forest, and gradient boosting (GB) were used to generate prediction models. The area under the receiver operating characteristic curve (ROC-AUC) and area under the precision/recall curve (PR-AUC) were used to compare performance. Results: Among 228,438 births, 5760 (3.1\%) women had a postpartum hemorrhage, 5170 (2.8\%) had a transfusion, and 10,344 (5.6\%) met the criteria for the transfusion-PPH composite. Models predicting the transfusion-PPH composite using antepartum and intrapartum features had the best positive predictive values, with the GB machine learning model performing best overall (ROC-AUC=0.833, 95\% CI 0.828-0.838; PR-AUC=0.210, 95\% CI 0.201-0.220). The most predictive features in the GB model predicting the transfusion-PPH composite were the mode of delivery, oxytocin incremental dose for labor (mU/minute), intrapartum tocolytic use, presence of anesthesia nurse, and hospital type. Conclusions: Machine learning offers higher discriminability than logistic regression in predicting PPH. The Consortium for Safe Labor data set may not be optimal for analyzing risk due to strong subgroup effects, which decreases accuracy and limits generalizability. ", doi="10.2196/52059", url="https://bioinform.jmir.org/2024/1/e52059", url="http://www.ncbi.nlm.nih.gov/pubmed/38935950" } @Article{info:doi/10.2196/37951, author="Kurasawa, Hisashi and Waki, Kayo and Chiba, Akihiro and Seki, Tomohisa and Hayashi, Katsuyoshi and Fujino, Akinori and Haga, Tsuneyuki and Noguchi, Takashi and Ohe, Kazuhiko", title="Treatment Discontinuation Prediction in Patients With Diabetes Using a Ranking Model: Machine Learning Model Development", journal="JMIR Bioinform Biotech", year="2022", month="Sep", day="23", volume="3", number="1", pages="e37951", keywords="machine learning", keywords="machine-learned ranking model", keywords="treatment discontinuation", keywords="diabetes", keywords="prediction", keywords="electronic health record", keywords="EHR", keywords="big data", keywords="ranking", keywords="algorithm", abstract="Background: Treatment discontinuation (TD) is one of the major prognostic issues in diabetes care, and several models have been proposed to predict a missed appointment that may lead to TD in patients with diabetes by using binary classification models for the early detection of TD and for providing intervention support for patients. However, as binary classification models output the probability of a missed appointment occurring within a predetermined period, they are limited in their ability to estimate the magnitude of TD risk in patients with inconsistent intervals between appointments, making it difficult to prioritize patients for whom intervention support should be provided. Objective: This study aimed to develop a machine-learned prediction model that can output a TD risk score defined by the length of time until TD and prioritize patients for intervention according to their TD risk. Methods: This model included patients with diagnostic codes indicative of diabetes at the University of Tokyo Hospital between September 3, 2012, and May 17, 2014. The model was internally validated with patients from the same hospital from May 18, 2014, to January 29, 2016. The data used in this study included 7551 patients who visited the hospital after January 1, 2004, and had diagnostic codes indicative of diabetes. In particular, data that were recorded in the electronic medical records between September 3, 2012, and January 29, 2016, were used. The main outcome was the TD of a patient, which was defined as missing a scheduled clinical appointment and having no hospital visits within 3 times the average number of days between the visits of the patient and within 60 days. The TD risk score was calculated by using the parameters derived from the machine-learned ranking model. The prediction capacity was evaluated by using test data with the C-index for the performance of ranking patients, area under the receiver operating characteristic curve, and area under the precision-recall curve for discrimination, in addition to a calibration plot. Results: The means (95\% confidence limits) of the C-index, area under the receiver operating characteristic curve, and area under the precision-recall curve for the TD risk score were 0.749 (0.655, 0.823), 0.758 (0.649, 0.857), and 0.713 (0.554, 0.841), respectively. The observed and predicted probabilities were correlated with the calibration plots. Conclusions: A TD risk score was developed for patients with diabetes by combining a machine-learned method with electronic medical records. The score calculation can be integrated into medical records to identify patients at high risk of TD, which would be useful in supporting diabetes care and preventing TD. ", doi="10.2196/37951", url="https://bioinform.jmir.org/2022/1/e37951" } @Article{info:doi/10.2196/38845, author="Monahan, Corneille Ann and Feldman, S. Sue and Fitzgerald, P. Tony", title="Reducing Crowding in Emergency Departments With Early Prediction of Hospital Admission of Adult Patients Using Biomarkers Collected at Triage: Retrospective Cohort Study", journal="JMIR Bioinform Biotech", year="2022", month="Sep", day="13", volume="3", number="1", pages="e38845", keywords="emergency care", keywords="prehospital", keywords="emergency", keywords="information system", keywords="crowding", keywords="boarding", keywords="exit block", keywords="medical informatics", keywords="application", keywords="health service research", keywords="personalized medicine", keywords="predictive medicine", keywords="model", keywords="probabilistic", keywords="polynomial model", keywords="decision support technique", keywords="decision support", keywords="evidence-based health care", keywords="management information systems", keywords="algorithm", keywords="machine learning", keywords="predict", keywords="risk", abstract="Background: Emergency department crowding continues to threaten patient safety and cause poor patient outcomes. Prior models designed to predict hospital admission have had biases. Predictive models that successfully estimate the probability of patient hospital admission would be useful in reducing or preventing emergency department ``boarding'' and hospital ``exit block'' and would reduce emergency department crowding by initiating earlier hospital admission and avoiding protracted bed procurement processes. Objective: To develop a model to predict imminent adult patient hospital admission from the emergency department early in the patient visit by utilizing existing clinical descriptors (ie, patient biomarkers) that are routinely collected at triage and captured in the hospital's electronic medical records. Biomarkers are advantageous for modeling due to their early and routine collection at triage; instantaneous availability; standardized definition, measurement, and interpretation; and their freedom from the confines of patient histories (ie, they are not affected by inaccurate patient reports on medical history, unavailable reports, or delayed report retrieval). Methods: This retrospective cohort study evaluated 1 year of consecutive data events among adult patients admitted to the emergency department and developed an algorithm that predicted which patients would require imminent hospital admission. Eight predictor variables were evaluated for their roles in the outcome of the patient emergency department visit. Logistic regression was used to model the study data. Results: The 8-predictor model included the following biomarkers: age, systolic blood pressure, diastolic blood pressure, heart rate, respiration rate, temperature, gender, and acuity level. The model used these biomarkers to identify emergency department patients who required hospital admission. Our model performed well, with good agreement between observed and predicted admissions, indicating a well-fitting and well-calibrated model that showed good ability to discriminate between patients who would and would not be admitted. Conclusions: This prediction model based on primary data identified emergency department patients with an increased risk of hospital admission. This actionable information can be used to improve patient care and hospital operations, especially by reducing emergency department crowding by looking ahead to predict which patients are likely to be admitted following triage, thereby providing needed information to initiate the complex admission and bed assignment processes much earlier in the care continuum. ", doi="10.2196/38845", url="https://bioinform.jmir.org/2022/1/e38845" } @Article{info:doi/10.2196/38226, author="Xu, Lingxiao and Liu, Jun and Han, Chunxia and Ai, Zisheng", title="The Application of Machine Learning in Predicting Mortality Risk in Patients With Severe Femoral Neck Fractures: Prediction Model Development Study", journal="JMIR Bioinform Biotech", year="2022", month="Aug", day="19", volume="3", number="1", pages="e38226", keywords="machine learning", keywords="femoral neck fracture", keywords="hospital mortality", keywords="hip", keywords="fracture", keywords="mortality", keywords="prediction", keywords="intensive care unit", keywords="ICU", keywords="decision-making", keywords="risk", keywords="assessment", keywords="prognosis", abstract="Background: Femoral neck fracture (FNF) accounts for approximately 3.58\% of all fractures in the entire body, exhibiting an increasing trend each year. According to a survey, in 1990, the total number of hip fractures in men and women worldwide was approximately 338,000 and 917,000, respectively. In China, FNFs account for 48.22\% of hip fractures. Currently, many studies have been conducted on postdischarge mortality and mortality risk in patients with FNF. However, there have been no definitive studies on in-hospital mortality or its influencing factors in patients with severe FNF admitted to the intensive care unit. Objective: In this paper, 3 machine learning methods were used to construct a nosocomial death prediction model for patients admitted to intensive care units to assist clinicians in early clinical decision-making. Methods: A retrospective analysis was conducted using information of a patient with FNF from the Medical Information Mart for Intensive Care III. After balancing the data set using the Synthetic Minority Oversampling Technique algorithm, patients were randomly separated into a 70\% training set and a 30\% testing set for the development and validation, respectively, of the prediction model. Random forest, extreme gradient boosting, and backpropagation neural network prediction models were constructed with nosocomial death as the outcome. Model performance was assessed using the area under the receiver operating characteristic curve, accuracy, precision, sensitivity, and specificity. The predictive value of the models was verified in comparison to the traditional logistic model. Results: A total of 366 patients with FNFs were selected, including 48 cases (13.1\%) of in-hospital death. Data from 636 patients were obtained by balancing the data set with the in-hospital death group to survival group as 1:1. The 3 machine learning models exhibited high predictive accuracy, and the area under the receiver operating characteristic curve of the random forest, extreme gradient boosting, and backpropagation neural network were 0.98, 0.97, and 0.95, respectively, all with higher predictive performance than the traditional logistic regression model. Ranking the importance of the feature variables, the top 10 feature variables that were meaningful for predicting the risk of in-hospital death of patients were the Simplified Acute Physiology Score II, lactate, creatinine, gender, vitamin D, calcium, creatine kinase, creatine kinase isoenzyme, white blood cell, and age. Conclusions: Death risk assessment models constructed using machine learning have positive significance for predicting the in-hospital mortality of patients with severe disease and provide a valid basis for reducing in-hospital mortality and improving patient prognosis. ", doi="10.2196/38226", url="https://bioinform.jmir.org/2022/1/e38226" } @Article{info:doi/10.2196/38512, author="Skovbjerg, Frederik and Honor{\'e}, Helene and Mechlenburg, Inger and Lipperts, Matthijs and Gade, Rikke and N{\ae}ss-Schmidt, Trillingsgaard Erhard", title="Monitoring Physical Behavior in Rehabilitation Using a Machine Learning--Based Algorithm for Thigh-Mounted Accelerometers: Development and Validation Study", journal="JMIR Bioinform Biotech", year="2022", month="Jul", day="26", volume="3", number="1", pages="e38512", keywords="activity recognition", keywords="random forest", keywords="acquired brain injury", keywords="biometric monitoring", keywords="machine learning", keywords="physical activity", abstract="Background: Physical activity is emerging as an outcome measure. Accelerometers have become an important tool in monitoring physical behavior, and newer analytical approaches of recognition methods increase the degree of details. Many studies have achieved high performance in the classification of physical behaviors through the use of multiple wearable sensors; however, multiple wearables can be impractical and lower compliance. Objective: The aim of this study was to develop and validate an algorithm for classifying several daily physical behaviors using a single thigh-mounted accelerometer and a supervised machine-learning scheme. Methods: We collected training data by adding the behavior classes---running, cycling, stair climbing, wheelchair ambulation, and vehicle driving---to an existing algorithm with the classes of sitting, lying, standing, walking, and transitioning. After combining the training data, we used a random forest learning scheme for model development. We validated the algorithm through a simulated free-living procedure using chest-mounted cameras for establishing the ground truth. Furthermore, we adjusted our algorithm and compared the performance with an existing algorithm based on vector thresholds. Results: We developed an algorithm to classify 11 physical behaviors relevant for rehabilitation. In the simulated free-living validation, the performance of the algorithm decreased to 57\% as an average for the 11 classes (F-measure). After merging classes into sedentary behavior, standing, walking, running, and cycling, the result revealed high performance in comparison to both the ground truth and the existing algorithm. Conclusions: Using a single thigh-mounted accelerometer, we obtained high classification levels within specific behaviors. The behaviors classified with high levels of performance mostly occur in populations with higher levels of functioning. Further development should aim at describing behaviors within populations with lower levels of functioning. ", doi="10.2196/38512", url="https://bioinform.jmir.org/2022/1/e38512" } @Article{info:doi/10.2196/33186, author="Singhal, Richa and Lukose, Rachel and Carr, Gwenyth and Moktar, Afsoon and Gonzales-Urday, Lucia Ana and Rouchka, C. Eric and Vajravelu, N. Bathri", title="Differential Expression of Long Noncoding RNAs in Murine Myoblasts After Short Hairpin RNA-Mediated Dysferlin Silencing In Vitro: Microarray Profiling", journal="JMIR Bioinform Biotech", year="2022", month="Jun", day="17", volume="3", number="1", pages="e33186", keywords="dysferlinopathy", keywords="long noncoding RNAs", keywords="lncRNA", keywords="abnormal expression", keywords="muscular dystrophy", keywords="limb-girdle muscular dystrophy 2B", keywords="LGMD-2B", keywords="messenger RNA", keywords="mRNA", keywords="quantitative real-time polymerase chain reaction", keywords="qRT-PCR", keywords="gene ontology", keywords="bioinformatics", keywords="transcription", keywords="noncoding RNA", keywords="protein expression", abstract="Background: Long noncoding RNAs (lncRNAs) are noncoding RNA transcripts greater than 200 nucleotides in length and are known to play a role in regulating the transcription of genes involved in vital cellular functions. We hypothesized the disease process in dysferlinopathy is linked to an aberrant expression of lncRNAs and messenger RNAs (mRNAs). Objective: In this study, we compared the lncRNA and mRNA expression profiles between wild-type and dysferlin-deficient murine myoblasts (C2C12 cells). Methods: LncRNA and mRNA expression profiling were performed using a microarray. Several lncRNAs with differential expression were validated using quantitative real-time polymerase chain reaction. Gene Ontology (GO) analysis was performed to understand the functional role of the differentially expressed mRNAs. Further bioinformatics analysis was used to explore the potential function, lncRNA-mRNA correlation, and potential targets of the differentially expressed lncRNAs. Results: We found 3195 lncRNAs and 1966 mRNAs that were differentially expressed. The chromosomal distribution of the differentially expressed lncRNAs and mRNAs was unequal, with chromosome 2 having the highest number of lncRNAs and chromosome 7 having the highest number of mRNAs that were differentially expressed. Pathway analysis of the differentially expressed genes indicated the involvement of several signaling pathways including PI3K-Akt, Hippo, and pathways regulating the pluripotency of stem cells. The differentially expressed genes were also enriched for the GO terms, developmental process and muscle system process. Network analysis identified 8 statistically significant (P<.05) network objects from the upregulated lncRNAs and 3 statistically significant network objects from the downregulated lncRNAs. Conclusions: Our results thus far imply that dysferlinopathy is associated with an aberrant expression of multiple lncRNAs, many of which may have a specific function in the disease process. GO terms and network analysis suggest a muscle-specific role for these lncRNAs. To elucidate the specific roles of these abnormally expressed noncoding RNAs, further studies engineering their expression are required. ", doi="10.2196/33186", url="https://bioinform.jmir.org/2022/1/e33186" } @Article{info:doi/10.2196/30890, author="Choudhury, Joydhriti and Ashraf, Bin Faisal", title="An Analysis of Different Distance-Linkage Methods for Clustering Gene Expression Data and Observing Pleiotropy: Empirical Study", journal="JMIR Bioinform Biotech", year="2022", month="Jun", day="17", volume="3", number="1", pages="e30890", keywords="gene clustering", keywords="gene expression", keywords="distance metric", keywords="linkage method", keywords="hierarchical clustering", keywords="pleiotropy", abstract="Background: Large amounts of biological data have been generated over the last few decades, encouraging scientists to look for connections between genes that cause various diseases. Clustering illustrates such a relationship between numerous species and genes. Finding an appropriate distance-linkage metric to construct clusters from diverse biological data sets has thus become critical. Pleiotropy is also important for a gene's expression to vary and create varied consequences in living things. Finding the pleiotropy of genes responsible for various diseases has become a major research challenge. Objective: Our goal was to establish the optimal distance-linkage strategy for creating reliable clusters from diverse data sets and identifying the common genes that cause various tumors to observe genes with pleiotropic effect. Methods: We considered 4 linking methods---single, complete, average, and ward---and 3 distance metrics---Euclidean, maximum, and Manhattan distance. For assessing the quality of different sets of clusters, we used a fitness function that combines silhouette width and within-cluster distance. Results: According to our findings, the maximum distance measure produces the highest-quality clusters. Moreover, for medium data set, the average linkage method, and for large data set, the ward linkage method works best. The outcome is not improved by using ensemble clustering. We also discovered genes that cause 3 different cancers and used gene enrichment to confirm our findings. Conclusions: Accuracy is crucial in clustering, and we investigated the accuracy of numerous clustering techniques in our research. Other studies may aid related works if the data set is similar to ours. ", doi="10.2196/30890", url="https://bioinform.jmir.org/2022/1/e30890" } @Article{info:doi/10.2196/37391, author="Math, K. Renukaradhya and Mudennavar, Nayana and Javaregowda, Kanive Palaksha and Savanur, Ambuja", title="In Silico Comparative Analysis of the Functional, Structural, and Evolutionary Properties of SARS-CoV-2 Variant Spike Proteins", journal="JMIR Bioinform Biotech", year="2022", month="May", day="30", volume="3", number="1", pages="e37391", keywords="spike protein variants", keywords="NCBI", keywords="bioinformatics tools", keywords="pI", keywords="isoelectric point", keywords="2D map", keywords="phylogenetic tree", keywords="COVID-19", keywords="COVID therapy", keywords="SARS-CoV-2 treatment", keywords="therapeutic", keywords="spike protein", keywords="protein", keywords="prophylactic", keywords="sequence analysis", keywords="genomic", keywords="bioinformatics", keywords="viral protein", abstract="Background: A recent global outbreak of COVID-19 caused by the severe acute respiratory syndrome coronavirus-2 (SARS-CoV-2) created a pandemic and emerged as a potential threat to humanity. The analysis of virus genetic composition has revealed that the spike protein, one of the major structural proteins, facilitates the entry of the virus to host cells. Objective: The spike protein has become the main target for prophylactics and therapeutics studies. Here, we compared the spike proteins of SARS-CoV-2 variants using bioinformatics tools. Methods: The spike protein sequences of wild-type SARS-CoV-2 and its 6 variants---D614G, alpha (B.1.1.7), beta (B.1.351), delta (B.1.617.2), gamma (P.1), and omicron (B.1.1.529)---were retrieved from the NCBI database. The ClustalX program was used to sequence multiple alignment and perform mutational analysis. Several online bioinformatics tools were used to predict the physiological, immunological, and structural features of the spike proteins of SARS-CoV-2 variants. A phylogenetic tree was constructed using CLC software. Statistical analysis of the data was done using jamovi 2 software. Results: Multiple sequence analysis revealed that the P681R mutation in the delta variant, which changed an amino acid from histidine (H) to arginine (R), made the protein more alkaline due to arginine's high pKa value (12.5) compared to histidine's (6.0). Physicochemical properties revealed the relatively higher isoelectric point (7.34) and aliphatic index (84.65) of the delta variant compared to other variants. Statistical analysis of the isoelectric point, antigenicity, and immunogenicity of all the variants revealed significant correlation, with P values ranging from <.007 to .04. The generation of a 2D gel map showed the separation of the delta spike protein from a grouping of the other variants. The phylogenetic tree of the spike proteins showed that the delta variant was close to and a mix of the Rousettus bat coronavirus and MERS-CoV. Conclusions: The comparative analysis of SARS-CoV-2 variants revealed that the delta variant is more aliphatic in nature, which provides more stability to it and subsequently influences virus behavior. ", doi="10.2196/37391", url="https://bioinform.jmir.org/2022/1/e37391", url="http://www.ncbi.nlm.nih.gov/pubmed/35669291" } @Article{info:doi/10.2196/32437, author="Khokhar, Manoj and Roy, Dipayan and Tomo, Sojit and Gadwal, Ashita and Sharma, Praveen and Purohit, Purvi", title="Novel Molecular Networks and Regulatory MicroRNAs in Type 2 Diabetes Mellitus: Multiomics Integration and Interactomics Study", journal="JMIR Bioinform Biotech", year="2022", month="Feb", day="23", volume="3", number="1", pages="e32437", keywords="type 2 diabetes mellitus", keywords="interactomics", keywords="integrative genomics", keywords="protein-protein interaction", keywords="microRNAs", keywords="miRNA", keywords="bioinformatics", keywords="multiomics", keywords="genomics", keywords="gene expression", abstract="Background: Type 2 diabetes mellitus (T2DM) is a metabolic disorder with severe comorbidities. A multiomics approach can facilitate the identification of novel therapeutic targets and biomarkers with proper validation of potential microRNA (miRNA) interactions. Objective: The aim of this study was to identify significant differentially expressed common target genes in various tissues and their regulating miRNAs from publicly available Gene Expression Omnibus (GEO) data sets of patients with T2DM using in silico analysis. Methods: Using differentially expressed genes (DEGs) identified from 5 publicly available T2DM data sets, we performed functional enrichment, coexpression, and network analyses to identify pathways, protein-protein interactions, and miRNA-mRNA interactions involved in T2DM. Results: We extracted 2852, 8631, 5501, 3662, and 3753 DEGs from the expression profiles of GEO data sets GSE38642, GSE25724, GSE20966, GSE26887, and GSE23343, respectively. DEG analysis showed that 16 common genes were enriched in insulin secretion, endocrine resistance, and other T2DM-related pathways. Four DEGs, MAML3, EEF1D, NRG1, and CDK5RAP2, were important in the cluster network regulated by commonly targeted miRNAs (hsa-let-7b-5p, hsa-mir-155-5p, hsa-mir-124-3p, hsa-mir-1-3p), which are involved in the advanced glycation end products (AGE)-receptor for advanced glycation end products (RAGE) signaling pathway, culminating in diabetic complications and endocrine resistance. Conclusions: This study identified tissue-specific DEGs in T2DM, especially pertaining to the heart, liver, and pancreas. We identified a total of 16 common DEGs and the top four common targeting miRNAs (hsa-let-7b-5p, hsa-miR-124-3p, hsa-miR-1-3p, and has-miR-155-5p). The miRNAs identified are involved in regulating various pathways, including the phosphatidylinositol-3-kinase-protein kinase B, endocrine resistance, and AGE-RAGE signaling pathways. ", doi="10.2196/32437", url="https://bioinform.jmir.org/2022/1/e32437" } @Article{info:doi/10.2196/jopm.8958, author="Katsanis, Huston Sara and Minear, A. Mollie and Sadeghpour, Azita and Cope, Heidi and Perilla, Yezmin and Cook-Deegan, Robert and and Katsanis, Nicholas and Davis, E. Erica and Angrist, Misha", title="Participant-Partners in Genetic Research: An Exome Study with Families of Children with Unexplained Medical Conditions", journal="J Participat Med", year="2018", month="Jan", day="30", volume="10", number="1", pages="e2", keywords="partnership", keywords="exome sequencing", keywords="genome sequencing", keywords="return of results", keywords="participant engagement", abstract="Background: Unlike aggregate research on groups of participants with a particular disorder, genomic research on discrete families' rare conditions could result in data of use to families, their healthcare, as well as generating knowledge on the human genome. Objective: In a study of families seeking to rule in/out genetic causes for their children's medical conditions via exome sequencing, we solicited their views on the importance of genomic information. Our aim was to learn the interests of parents in seeking genomic research data and to gauge their responsiveness and engagement with the research team. Methods: At enrollment, we offered participants options in the consent form for receiving potentially clinically relevant research results. We also offered an option of being a ``partner'' versus a ``traditional'' participant; partners could be re-contacted for research and study activities. We invited adult partners to complete a pre-exome survey, attend annual family forums, and participate in other inter-family interaction opportunities. Results: Of the 385 adults enrolled, 79\% opted for ``partnership'' with the research team. Nearly all (99.2\%) participants opted to receive research results pertaining to their children's primary conditions. A majority indicated the desire to receive additional clinically relevant outside the scope of their children's conditions (92.7\%) and an interest in non-clinically relevant genetic information (82.7\%). Conclusions: Most participants chose partnership, including its rights and potential burdens; however, active engagement in study activities remained the exception. Not surprisingly, the overwhelming majority of participants---both partners and traditional---expected to receive all genetic information resulting from the research study. ", doi="10.2196/jopm.8958", url="http://jopm.jmir.org/2018/1/e2/" }