forked from foliveirafilho/tpch-pgsql
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathload.py
More file actions
130 lines (114 loc) · 4.36 KB
/
load.py
File metadata and controls
130 lines (114 loc) · 4.36 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
import os
from tpch4pgsql import postgresqldb as pgdb
def clean_database(query_root, host, port, db_name, user, password, tables):
"""Drops the tables if they exist
Args:
query_root (str): Directory in which generated queries directory exists
host (str): IP/hostname of the PG instance
port (int): port for the PG instance
db_name (str): name of the tpch database
user (str): user for the PG instance
password (str): password for the PG instance
tables (str): list of tables
Return:
0 if successful
non zero otherwise
"""
try:
conn = pgdb.PGDB(host, port, db_name, user, password)
try:
for table in tables:
conn.executeQuery("DROP TABLE IF EXISTS %s " % table)
except Exception as e:
print("unable to remove existing tables. %s" % e)
return 1
print("dropped existing tables")
conn.commit()
conn.close()
return 0
except Exception as e:
print("unable to connect to the database. %s" % e)
return 1
def create_schema(query_root, host, port, db_name, user, password, prep_query_dir):
"""Creates the schema for the tests. Drops the tables if they exist
Args:
query_root (str): Directory in which generated queries directory exists
host (str): IP/hostname of the PG instance
port (int): port for the PG instance
db_name (str): name of the tpch database
user (str): user for the PG instance
password (str): password for the PG instance
prep_query_dir (str): directory with queries for schema creation
Return:
0 if successful
non zero otherwise
"""
try:
conn = pgdb.PGDB(host, port, db_name, user, password)
try:
conn.executeQueryFromFile(os.path.join(query_root, prep_query_dir, "create_tbl.sql"))
except Exception as e:
print("unable to run create tables. %s" % e)
return 1
conn.commit()
conn.close()
except Exception as e:
print("unable to connect to the database. %s" % e)
return 1
def load_tables(data_dir, host, port, db_name, user, password, tables, load_dir):
"""Loads data into tables. Expects that tables are already empty.
Args:
data_dir (str): Directory in which load data exists
host (str): IP/hostname of the PG instance
port (int): port for the PG instance
db_name (str): name of the tpch database
user (str): user for the PG instance
password (str): password for the PG instance
tables (str): list of tables
load_dir (str): directory with data files to be loaded
Return:
0 if successful
non zero otherwise
"""
try:
conn = pgdb.PGDB(host, port, db_name, user, password)
try:
for table in tables:
filepath = os.path.join(data_dir, load_dir, table.lower() + ".tbl.csv")
conn.copyFrom(filepath, separator="|", table=table.lower())
conn.commit()
except Exception as e:
print("unable to run load tables. %s" %e)
return 1
conn.close()
return 0
except Exception as e:
print("unable to connect to the database. %s" % e)
return 1
def index_tables(query_root, host, port, db_name, user, password, prep_query_dir):
"""Creates indexes and foreign keys for loaded tables.
Args:
query_root (str): Directory in which preparation queries directory exists
host (str): IP/hostname of the PG instance
port (int): port for the PG instance
db_name (str): name of the tpch database
user (str): user for the PG instance
password (str): password for the PG instance
prep_query_dir (str): directory with create index script
Return:
0 if successful
non zero otherwise
"""
try:
conn = pgdb.PGDB(host, port, db_name, user, password)
try:
conn.executeQueryFromFile(os.path.join(query_root, prep_query_dir, "create_idx.sql"))
conn.commit()
except Exception as e:
print("unable to run index tables. %s" % e)
return 1
conn.close()
return 0
except Exception as e:
print("unable to connect to the database. %s" % e)
return 1