-
Notifications
You must be signed in to change notification settings - Fork 700
Expand file tree
/
Copy pathdetect_duplicate_features.py
More file actions
100 lines (76 loc) · 3.2 KB
/
detect_duplicate_features.py
File metadata and controls
100 lines (76 loc) · 3.2 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
# Copyright 2023 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import sys
import logging
import argparse
from pathlib import Path
import capa.rules
from capa.features.common import Feature
logger = logging.getLogger("detect_duplicate_features")
def get_features(rule_path: str) -> set[Feature]:
"""
Extracts all features from a given rule file.
Args:
rule_path (str): The path to the rule file to extract features from.
Returns:
set: A set of all feature statements contained within the rule file.
"""
with Path(rule_path).open("r", encoding="utf-8") as f:
try:
new_rule = capa.rules.Rule.from_yaml(f.read())
return new_rule.extract_all_features()
except Exception as e:
logger.error("Error: New rule %s %s %s", rule_path, str(type(e)), str(e))
sys.exit(-1)
def find_overlapping_rules(new_rule_path, rules_path):
if not new_rule_path.endswith(".yml"):
logger.error("FileNotFoundError ! New rule file name doesn't end with .yml")
sys.exit(-1)
# Loads features of new rule in a list.
new_rule_features = get_features(new_rule_path)
count = 0
overlapping_rules = []
# capa.rules.RuleSet stores all rules in given paths
ruleset = capa.rules.get_rules(rules_path)
for rule_name, rule in ruleset.rules.items():
rule_features = rule.extract_all_features()
if not len(rule_features):
continue
count += 1
# Checks if any features match between existing and new rule.
if any(feature in rule_features for feature in new_rule_features):
overlapping_rules.append(rule_name)
result = {"overlapping_rules": overlapping_rules, "count": count}
return result
def main():
parser = argparse.ArgumentParser(description="Find overlapping features in Capa rules.")
parser.add_argument("rules", type=str, action="append", help="Path to rules")
parser.add_argument("new_rule", type=str, help="Path to new rule")
args = parser.parse_args()
new_rule_path = args.new_rule
rules_path = [Path(rule) for rule in args.rules]
result = find_overlapping_rules(new_rule_path, rules_path)
print("\nNew rule path : %s" % new_rule_path)
print("Number of rules checked : %s " % result["count"])
if result["overlapping_rules"]:
print("Paths to overlapping rules : ")
for r in result["overlapping_rules"]:
print("- %s" % r)
else:
print("Paths to overlapping rules : None")
print("Number of rules containing same features : %s" % len(result["overlapping_rules"]))
print("\n")
return len(result["overlapping_rules"])
if __name__ == "__main__":
sys.exit(main())