You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
feat: complete cross-platform support for all the supported browsers + can now explore multiple default profiles for Chrome and Edge (but not Profile 2 and more, only Profile 1) + speed optimizations + more robust pandas dataframe construction and indexing + bump v0.3.4
[project] # beware if using setuptools: setup.py still gets executed, and even if pyproject.toml fields take precedence, if there is any code error in setup.py, building will fail!
9
9
name = "webactogram"# renamed from online-actogram according to PEP 423 https://peps.python.org/pep-0423/#pick-meaningful-names
10
-
version = "0.3.3"# see PEP 440 https://peps.python.org/pep-0440/#pre-releases and https://packaging.python.org/en/latest/guides/single-sourcing-package-version/
10
+
version = "0.3.4"# see PEP 440 https://peps.python.org/pep-0440/#pre-releases and https://packaging.python.org/en/latest/guides/single-sourcing-package-version/
11
11
description = "Actogram from browsers history, may help to screen sleep-wake patterns & disorders!"
12
12
authors = [
13
13
{name = "Barrett F. Davis", email = "barrettfdavis@gmail.com"},
Copy file name to clipboardExpand all lines: src/webactogram/webactogram.py
+95-64Lines changed: 95 additions & 64 deletions
Original file line number
Diff line number
Diff line change
@@ -40,6 +40,7 @@
40
40
importconfigparser
41
41
importglob
42
42
importos
43
+
importrandom
43
44
importshlex
44
45
importsys
45
46
importsqlite3
@@ -51,6 +52,8 @@
51
52
fromitertoolsimportgroupby
52
53
# Typing
53
54
fromcollections.abcimportSequence
55
+
# Path
56
+
frompathlibimportPath
54
57
55
58
# Scientific stack
56
59
importnumpyasnp
@@ -127,6 +130,7 @@ class ImportData:
127
130
def__init__(self, act):
128
131
super().__init__()
129
132
self.act=act
133
+
self.history_loc_dict_temp= [] # temporary dictionary to store the filepaths of the temporary history files and which browser they relate to (for SQLite queries) -- we copy history files to a temporary folder to avoid modifying the original ones
""" check which OS user is running script from, then
158
162
check typical file paths for popular browser history files """
159
163
160
-
home=os.path.expanduser("~")
164
+
#home = os.path.expanduser("~")
165
+
home=str(Path.home())
166
+
history_filepaths= {} # dictionary to store filepaths for each browser. Structure is: {browser: [filepath1, filepath2, ...]} because there can be multiple filpaths for the browser history, multiple profiles per browser, and sometimes the default profile is not named Default (eg for Chrome).
""" function to copy file at given file location to temporary folder"""
203
223
os.makedirs(dst_folder, exist_ok=True)
224
+
fname=Path(src).name# get the filename from the path
204
225
dst=os.path.join(dst_folder, fname)
226
+
# Test if destination file already exists
227
+
ifos.path.exists(dst):
228
+
# If it already exists (eg, multiple profiles for one browser), change destination folder dst to append a random number to the filename to avoid collision
# Since the output can be different from input, we need to create a new dict to map and remember what are the browsers (to know which SQLite commands to send)
'unixepoch','localtime') FROM urls ORDER BY last_visit_time DESC;"
268
+
}
269
+
270
+
df_list= [] # list of dataframes to concatenate outside of the loop (faster than concatenating inside the loop, otherwise memory is reallocated at each iteration so complexity is O(N^2) quadratic)
271
+
272
+
# For each browsers' history file, import the data into a pandas dataframe and add into a list
""" Function to open SQL styled history files and convert to a pandas
261
290
DataFrame type. SQL objects are closed after copying to Pandas DF. """
262
-
cnx=sqlite3.connect(file_name)
263
-
df=pd.read_sql_query(command_str, cnx)
264
-
cnx.commit()
265
-
cnx.close()
291
+
cnx=sqlite3.connect(file_name)# connect to the SQLite database
292
+
df=pd.read_sql_query(command_str, cnx)# read the SQL query into a pandas dataframe
293
+
cnx.commit()# commit changes (this is necessary to close the connection, and is why we copy the history files to a temporary folder beforehand to avoid tampering the originals)
df.rename(inplace=True, columns={df.columns[0]: 'visit_time'})# rename the column to 'visit_time' for consistency
297
+
df['visit_time']=pd.to_datetime(df['visit_time'], errors='coerce').dropna() # drop NaT values (and keep it as a DataFrame, because it will always return a Series since we are manipulating a single column)
first_visit=self.df.ne(0) # creates a boolean mask where each element is True if the corresponding element in self.df is not equal to 0, and False otherwise.
372
+
first_visit=first_visit.idxmax() # returns the index of the first occurrence of the maximum value in the Series. If the Series is all True/False values, then this will be the index of the first True value.
373
+
first_visit=first_visit.iloc[0] # indexing the Series returned by idxmax().
0 commit comments