1717import hashlib
1818import os .path
1919from collections .abc import Iterable
20- from typing import Callable
20+ from typing import Callable , Literal
2121
2222from coverage .exceptions import CoverageException , NoDataError
2323from coverage .files import PathAliases
@@ -95,6 +95,30 @@ def combinable_files(data_file: str, data_paths: Iterable[str] | None = None) ->
9595 return sorted (files_to_combine )
9696
9797
98+ def hash_for_data_file (f : str ) -> bytes :
99+ """Get the hash of the data in the file."""
100+ with open (f , "rb" ) as fobj :
101+ hasher = hashlib .new ("sha3_256" , usedforsecurity = False )
102+ hasher .update (fobj .read ())
103+ return hasher .digest ()
104+
105+
106+ class DataFileClassifier :
107+ """Track what files to combine and which to skip."""
108+
109+ def __init__ (self ) -> None :
110+ self .file_hashes : set [bytes ] = set ()
111+
112+ def classify (self , f : str ) -> Literal ["combine" , "skip" ]:
113+ """Determine whether to combine or skip this file."""
114+ sha = hash_for_data_file (f )
115+ if sha in self .file_hashes :
116+ return "skip"
117+ else :
118+ self .file_hashes .add (sha )
119+ return "combine"
120+
121+
98122def combine_parallel_data (
99123 data : CoverageData ,
100124 aliases : PathAliases | None = None ,
@@ -140,7 +164,7 @@ def combine_parallel_data(
140164 else :
141165 map_path = functools .cache (aliases .map )
142166
143- file_hashes = set ()
167+ classifier = DataFileClassifier ()
144168 combined_any = False
145169
146170 for f in files_to_combine :
@@ -156,20 +180,15 @@ def combine_parallel_data(
156180 except ValueError :
157181 # ValueError can be raised under Windows when os.getcwd() returns a
158182 # folder from a different drive than the drive of f, in which case
159- # we print the original value of f instead of its relative path
183+ # we print the original value of f instead of its relative path.
160184 rel_file_name = f
161185
162- with open (f , "rb" ) as fobj :
163- hasher = hashlib .new ("sha3_256" , usedforsecurity = False )
164- hasher .update (fobj .read ())
165- sha = hasher .digest ()
166- combine_this_one = sha not in file_hashes
186+ file_action = classifier .classify (f )
167187
168188 delete_this_one = not keep
169- if combine_this_one :
189+ if file_action == "combine" :
170190 if data ._debug .should ("dataio" ):
171191 data ._debug .write (f"Combining data file { f !r} " )
172- file_hashes .add (sha )
173192 try :
174193 new_data = CoverageData (f , debug = data ._debug )
175194 new_data .read ()
0 commit comments