Skip to content

Commit 8a5f95e

Browse files
authored
Add MySQL schema for Biological Database integration project
Adds BD_assignment.sql — the SQL schema defining database structure for the Biological Database Integration Project. This script creates the `bioinfo_db` database and associated tables for: - gene_annotations: Ensembl-based gene details and identifiers - kegg_data: KEGG pathway enrichment outputs - mirna_data: miRNA mapping information - summary: combined relational view joining gene, pathway, and miRNA data The schema supports data integration from Ensembl, UniProt, KEGG, and miRBase, enabling structured storage and querying of biological datasets retrieved via R scripts.
1 parent 3bc7085 commit 8a5f95e

File tree

1 file changed

+63
-0
lines changed

1 file changed

+63
-0
lines changed

BD_assignment.sql

Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,63 @@
1+
-- #########################################################
2+
-- Project: Biological Databases – MySQL Schema
3+
-- Student ID: B269797
4+
-- Description:
5+
-- Define schema and relationships for integrated gene,
6+
-- protein, KEGG pathway, and miRNA datasets.
7+
-- #########################################################
8+
9+
CREATE DATABASE IF NOT EXISTS bioinfo_db;
10+
USE bioinfo_db;
11+
12+
-- ---- Gene Annotation Table ----
13+
CREATE TABLE IF NOT EXISTS gene_annotations (
14+
ensembl_gene_id VARCHAR(30) PRIMARY KEY,
15+
external_gene_name VARCHAR(50),
16+
description TEXT,
17+
gene_biotype VARCHAR(50),
18+
chromosome_name VARCHAR(10),
19+
start_position INT,
20+
end_position INT,
21+
strand TINYINT,
22+
uniprotswissprot VARCHAR(20),
23+
entrezgene_id VARCHAR(20)
24+
);
25+
26+
-- ---- KEGG Pathway Table ----
27+
CREATE TABLE IF NOT EXISTS kegg_data (
28+
id INT AUTO_INCREMENT PRIMARY KEY,
29+
pathway_id VARCHAR(20),
30+
Description VARCHAR(255),
31+
pvalue FLOAT,
32+
qvalue FLOAT,
33+
geneID VARCHAR(255)
34+
);
35+
36+
-- ---- miRNA Table ----
37+
CREATE TABLE IF NOT EXISTS mirna_data (
38+
id INT AUTO_INCREMENT PRIMARY KEY,
39+
ensembl_gene_id VARCHAR(30),
40+
external_gene_name VARCHAR(50),
41+
mirbase_id VARCHAR(30),
42+
mirbase_accession VARCHAR(30)
43+
);
44+
45+
-- ---- Summary Integration Table ----
46+
CREATE TABLE IF NOT EXISTS summary AS
47+
SELECT
48+
g.ensembl_gene_id,
49+
g.external_gene_name,
50+
g.description,
51+
g.gene_biotype,
52+
k.Description AS pathway,
53+
k.pvalue,
54+
m.mirbase_id,
55+
m.mirbase_accession
56+
FROM gene_annotations g
57+
LEFT JOIN kegg_data k ON g.entrezgene_id = k.geneID
58+
LEFT JOIN mirna_data m ON g.ensembl_gene_id = m.ensembl_gene_id;
59+
60+
-- #########################################################
61+
-- End of SQL Schema
62+
-- #########################################################
63+

0 commit comments

Comments
 (0)