55
66from __future__ import annotations
77
8+ import base64
89import collections
910import datetime
1011import functools
1112import glob
1213import itertools
1314import os
1415import random
16+ import re
1517import socket
1618import sqlite3
1719import string
2527
2628from coverage .debug import NoDebugging , auto_repr , file_summary
2729from coverage .exceptions import CoverageException , DataError
28- from coverage .misc import file_be_gone , isolate_module
30+ from coverage .misc import Hasher , file_be_gone , isolate_module
2931from coverage .numbits import numbits_to_nums , numbits_union , nums_to_numbits
3032from coverage .sqlitedb import SqliteDb
3133from coverage .types import AnyCallable , FilePath , TArc , TDebugCtl , TLineNo , TWarnFn
6365 -- 'sys_argv' text -- The coverage command line that recorded the data.
6466 -- 'version' text -- The version of coverage.py that made the file.
6567 -- 'when' text -- Datetime when the file was created.
68+ -- 'hash' text -- Hash of the data.
6669);
6770
6871CREATE TABLE file (
@@ -250,6 +253,7 @@ def __init__(
250253 self ._no_disk = no_disk
251254 self ._basename = os .path .abspath (basename or ".coverage" )
252255 self ._suffix = suffix
256+ self ._our_suffix = suffix is True
253257 self ._warn = warn
254258 self ._debug = debug or NoDebugging ()
255259
@@ -262,6 +266,9 @@ def __init__(
262266 # Synchronize the operations used during collection.
263267 self ._lock = threading .RLock ()
264268
269+ self ._wrote_hash = False
270+ self ._hasher = Hasher ()
271+
265272 # Are we in sync with the data file?
266273 self ._have_used = False
267274
@@ -355,10 +362,13 @@ def _init_db(self, db: SqliteDb) -> None:
355362
356363 # When writing metadata, avoid information that will needlessly change
357364 # the hash of the data file, unless we're debugging processes.
365+ # If we control the suffix, then the hash is in the file name, and we
366+ # can write any metadata without affecting the hash determination
367+ # later.
358368 meta_data = [
359369 ("version" , __version__ ),
360370 ]
361- if self ._debug .should ("process" ):
371+ if self ._our_suffix or self . _debug .should ("process" ):
362372 meta_data .extend (
363373 [
364374 ("sys_argv" , str (getattr (sys , "argv" , None ))),
@@ -472,6 +482,7 @@ def set_context(self, context: str | None) -> None:
472482 self ._debug .write (f"Setting coverage context: { context !r} " )
473483 self ._current_context = context
474484 self ._current_context_id = None
485+ self ._hasher .update (context )
475486
476487 def _set_context_id (self ) -> None :
477488 """Use the _current_context to set _current_context_id."""
@@ -529,7 +540,9 @@ def add_lines(self, line_data: Mapping[str, Collection[TLineNo]]) -> None:
529540 with self ._connect () as con :
530541 self ._set_context_id ()
531542 for filename , linenos in line_data .items ():
543+ self ._hasher .update (filename )
532544 line_bits = nums_to_numbits (linenos )
545+ self ._hasher .update (line_bits )
533546 file_id = self ._file_id (filename , add = True )
534547 query = "SELECT numbits FROM line_bits WHERE file_id = ? AND context_id = ?"
535548 with con .execute (query , (file_id , self ._current_context_id )) as cur :
@@ -573,6 +586,8 @@ def add_arcs(self, arc_data: Mapping[str, Collection[TArc]]) -> None:
573586 with self ._connect () as con :
574587 self ._set_context_id ()
575588 for filename , arcs in arc_data .items ():
589+ self ._hasher .update (filename )
590+ self ._hasher .update (arcs )
576591 if not arcs :
577592 continue
578593 file_id = self ._file_id (filename , add = True )
@@ -620,6 +635,8 @@ def add_file_tracers(self, file_tracers: Mapping[str, str]) -> None:
620635 self ._start_using ()
621636 with self ._connect () as con :
622637 for filename , plugin_name in file_tracers .items ():
638+ self ._hasher .update (filename )
639+ self ._hasher .update (plugin_name )
623640 file_id = self ._file_id (filename , add = True )
624641 existing_plugin = self .file_tracer (filename )
625642 if existing_plugin :
@@ -897,7 +914,22 @@ def read(self) -> None:
897914
898915 def write (self ) -> None :
899916 """Ensure the data is written to the data file."""
900- self ._debug_dataio ("Writing (no-op) data file" , self ._filename )
917+ if self ._our_suffix and not self ._wrote_hash :
918+ self ._debug_dataio ("Finishing data file" , self ._filename )
919+ with self ._connect () as con :
920+ con .execute_void (
921+ "INSERT OR IGNORE INTO meta (key, value) VALUES ('hash', ?)" ,
922+ (self ._hasher .hexdigest (),),
923+ )
924+ self .close ()
925+ data_hash = base64 .b64encode (self ._hasher .digest (), altchars = b"01" ).decode ()[:NHASH ]
926+ current_filename = self ._filename
927+ self ._filename += f".H{ data_hash } h"
928+ self ._debug_dataio ("Renaming data file to" , self ._filename )
929+ os .rename (current_filename , self ._filename )
930+ self ._wrote_hash = True
931+ else :
932+ self ._debug_dataio ("Writing (no-op) data file" , self ._filename )
901933
902934 def _start_using (self ) -> None :
903935 """Call this before using the database at all."""
@@ -1129,6 +1161,11 @@ def sys_info(cls) -> list[tuple[str, Any]]:
11291161 ]
11301162
11311163
1164+ ASCII = string .ascii_letters + string .digits
1165+ NRAND = 6
1166+ NHASH = 10
1167+
1168+
11321169def filename_suffix (suffix : str | bool | None ) -> str | None :
11331170 """Compute a filename suffix for a data file.
11341171
@@ -1145,9 +1182,31 @@ def filename_suffix(suffix: str | bool | None) -> str | None:
11451182 # `save()` at the last minute so that the pid will be correct even
11461183 # if the process forks.
11471184 die = random .Random (os .urandom (8 ))
1148- letters = string . ascii_uppercase + string . ascii_lowercase
1149- rolls = "" . join ( die . choice ( letters ) for _ in range ( 6 ) )
1150- suffix = f"{ socket . gethostname () } . { os .getpid ()} .X{ rolls } x"
1185+ rolls = "" . join ( die . choice ( ASCII ) for _ in range ( NRAND ))
1186+ host = socket . gethostname (). replace ( "." , "_" )
1187+ suffix = f"{ host } .pid { os .getpid ()} .X{ rolls } x"
11511188 elif suffix is False :
11521189 suffix = None
11531190 return suffix
1191+
1192+
1193+ # A regex to match parallel file name suffixes, with named groups.
1194+ # We combine this with other regexes, so can't use verbose syntax.
1195+ SUFFIX_PATTERN = (
1196+ r"\.(?P<host>[^.]+)"
1197+ + r"\.pid(?P<pid>\d+)"
1198+ + rf"\.X(?P<random>\w{{{ NRAND } }})x"
1199+ + rf"(\.H(?P<hash>\w{{{ NHASH } }}h))?"
1200+ )
1201+
1202+
1203+ def filename_match (filename : str ) -> re .Match [str ] | None :
1204+ """Return a match object to pick apart the filename."""
1205+ return re .search (f"{ SUFFIX_PATTERN } $" , filename )
1206+
1207+
1208+ def good_filename_match (filename : str ) -> re .Match [str ]:
1209+ """Match the filename where we know it will match."""
1210+ m = filename_match (filename )
1211+ assert m is not None
1212+ return m
0 commit comments