-
Notifications
You must be signed in to change notification settings - Fork 1.3k
Expand file tree
/
Copy pathbootstrap.py
More file actions
73 lines (58 loc) · 2.49 KB
/
bootstrap.py
File metadata and controls
73 lines (58 loc) · 2.49 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
import click
from feast.file_utils import replace_str_in_file
from feast.infra.utils import aws_utils
def bootstrap():
# Bootstrap() will automatically be called from the init_repo() during `feast init`
import pathlib
from datetime import datetime, timedelta
from feast.driver_test_data import create_driver_hourly_stats_df
end_date = datetime.now().replace(microsecond=0, second=0, minute=0)
start_date = end_date - timedelta(days=15)
driver_entities = [1001, 1002, 1003, 1004, 1005]
driver_df = create_driver_hourly_stats_df(driver_entities, start_date, end_date)
aws_region = click.prompt("AWS Region (e.g. us-west-2)")
cluster_id = click.prompt("Redshift Cluster ID")
database = click.prompt("Redshift Database Name")
user = click.prompt("Redshift User Name")
s3_staging_location = click.prompt("Redshift S3 Staging Location (s3://*)")
iam_role = click.prompt("Redshift IAM Role for S3 (arn:aws:iam::*:role/*)")
if click.confirm(
"Should I upload example data to Redshift (overwriting 'feast_driver_hourly_stats' table)?",
default=True,
):
client = aws_utils.get_redshift_data_client(aws_region)
s3 = aws_utils.get_s3_resource(aws_region)
aws_utils.execute_redshift_statement(
client,
cluster_id,
None,
database,
user,
"DROP TABLE IF EXISTS feast_driver_hourly_stats",
)
aws_utils.upload_df_to_redshift(
client,
cluster_id,
None,
database,
user,
s3,
f"{s3_staging_location}/data/feast_driver_hourly_stats.parquet",
iam_role,
"feast_driver_hourly_stats",
driver_df,
)
repo_path = pathlib.Path(__file__).parent.absolute() / "feature_repo"
example_py_file = repo_path / "feature_definitions.py"
replace_str_in_file(example_py_file, "%REDSHIFT_DATABASE%", database)
config_file = repo_path / "feature_store.yaml"
replace_str_in_file(config_file, "%AWS_REGION%", aws_region)
replace_str_in_file(config_file, "%REDSHIFT_CLUSTER_ID%", cluster_id)
replace_str_in_file(config_file, "%REDSHIFT_DATABASE%", database)
replace_str_in_file(config_file, "%REDSHIFT_USER%", user)
replace_str_in_file(
config_file, "%REDSHIFT_S3_STAGING_LOCATION%", s3_staging_location
)
replace_str_in_file(config_file, "%REDSHIFT_IAM_ROLE%", iam_role)
if __name__ == "__main__":
bootstrap()