|
| 1 | +import logging |
| 2 | +from pathlib import Path |
| 3 | +from typing import Dict, List |
| 4 | + |
| 5 | +from bowler import Query |
| 6 | +from fissix.pgen2 import token |
| 7 | +from fissix.pygram import python_symbols |
| 8 | +from fissix.pytree import Node |
| 9 | + |
| 10 | +from feast.repo_operations import get_repo_files |
| 11 | + |
| 12 | +SOURCES = { |
| 13 | + "FileSource", |
| 14 | + "BigQuerySource", |
| 15 | + "RedshiftSource", |
| 16 | + "SnowflakeSource", |
| 17 | + "KafkaSource", |
| 18 | + "KinesisSource", |
| 19 | +} |
| 20 | + |
| 21 | + |
| 22 | +class RepoUpgrader: |
| 23 | + def __init__(self, repo_path: str, write: bool): |
| 24 | + self.repo_path = repo_path |
| 25 | + self.write = write |
| 26 | + self.repo_files: List[str] = [ |
| 27 | + str(p) for p in get_repo_files(Path(self.repo_path)) |
| 28 | + ] |
| 29 | + logging.getLogger("RefactoringTool").setLevel(logging.WARNING) |
| 30 | + |
| 31 | + def upgrade(self): |
| 32 | + self.remove_date_partition_column() |
| 33 | + |
| 34 | + def remove_date_partition_column(self): |
| 35 | + def _remove_date_partition_column( |
| 36 | + node: Node, capture: Dict[str, Node], filename: str |
| 37 | + ) -> None: |
| 38 | + self.remove_argument_transform(node, "date_partition_column") |
| 39 | + |
| 40 | + for s in SOURCES: |
| 41 | + Query(self.repo_files).select_class(s).is_call().modify( |
| 42 | + _remove_date_partition_column |
| 43 | + ).execute(write=self.write, interactive=False) |
| 44 | + |
| 45 | + @staticmethod |
| 46 | + def remove_argument_transform(node: Node, argument: str): |
| 47 | + """ |
| 48 | + Removes the specified argument. |
| 49 | + For example, if the argument is "join_key", this method transforms |
| 50 | + driver = Entity( |
| 51 | + name="driver_id", |
| 52 | + join_key="driver_id", |
| 53 | + ) |
| 54 | + into |
| 55 | + driver = Entity( |
| 56 | + name="driver_id", |
| 57 | + ) |
| 58 | + This method assumes that node represents a class call that already has an arglist. |
| 59 | + """ |
| 60 | + if len(node.children) < 2 or len(node.children[1].children) < 2: |
| 61 | + raise ValueError(f"Expected a class call with an arglist but got {node}.") |
| 62 | + class_args = node.children[1].children[1].children |
| 63 | + for i, class_arg in enumerate(class_args): |
| 64 | + if ( |
| 65 | + class_arg.type == python_symbols.argument |
| 66 | + and class_arg.children[0].value == argument |
| 67 | + ): |
| 68 | + class_args.pop(i) |
| 69 | + if i < len(class_args) and class_args[i].type == token.COMMA: |
| 70 | + class_args.pop(i) |
| 71 | + if i < len(class_args) and class_args[i].type == token.NEWLINE: |
| 72 | + class_args.pop(i) |
0 commit comments