-
Notifications
You must be signed in to change notification settings - Fork 10
Expand file tree
/
Copy pathlazy_file_loader.py
More file actions
165 lines (131 loc) · 5.11 KB
/
lazy_file_loader.py
File metadata and controls
165 lines (131 loc) · 5.11 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
"""
Lazy file loading utilities for efficient manifest file processing.
"""
import logging
from typing import List, Tuple, Union, BinaryIO
from io import BytesIO
import os
log = logging.getLogger("socketdev")
class LazyFileLoader:
"""
A file-like object that only opens the actual file when needed for reading.
This prevents keeping too many file descriptors open simultaneously.
This class implements the standard file-like interface that requests library
expects for multipart uploads, making it a drop-in replacement for regular
file objects.
"""
def __init__(self, file_path: str, name: str):
self.file_path = file_path
self.name = name
self._file = None
self._closed = False
self._position = 0
def _ensure_open(self):
"""Ensure the file is open and seek to the correct position."""
if self._closed:
raise ValueError("I/O operation on closed file.")
if self._file is None:
self._file = open(self.file_path, 'rb')
log.debug(f"Opened file for reading: {self.file_path}")
# Seek to the current position if we've been reading before
if self._position > 0:
self._file.seek(self._position)
def read(self, size: int = -1):
"""Read from the file, opening it if needed."""
self._ensure_open()
data = self._file.read(size)
self._position = self._file.tell()
return data
def readline(self, size: int = -1):
"""Read a line from the file."""
self._ensure_open()
data = self._file.readline(size)
self._position = self._file.tell()
return data
def seek(self, offset: int, whence: int = 0):
"""Seek to a position in the file."""
if self._closed:
raise ValueError("I/O operation on closed file.")
# Calculate new position for tracking
if whence == 0: # SEEK_SET
self._position = offset
elif whence == 1: # SEEK_CUR
self._position += offset
elif whence == 2: # SEEK_END
# We need to open the file to get its size
self._ensure_open()
result = self._file.seek(offset, whence)
self._position = self._file.tell()
return result
# If file is already open, seek it too
if self._file is not None:
result = self._file.seek(self._position)
return result
return self._position
def tell(self):
"""Return current file position."""
if self._closed:
raise ValueError("I/O operation on closed file.")
if self._file is not None:
self._position = self._file.tell()
return self._position
def close(self):
"""Close the file if it was opened."""
if self._file is not None:
self._file.close()
log.debug(f"Closed file: {self.file_path}")
self._file = None
self._closed = True
def __enter__(self):
return self
def __exit__(self, exc_type, exc_val, exc_tb):
self.close()
@property
def closed(self):
"""Check if the file is closed."""
return self._closed
@property
def mode(self):
"""Return the file mode."""
return 'rb'
def readable(self):
"""Return whether the file is readable."""
return not self._closed
def writable(self):
"""Return whether the file is writable."""
return False
def seekable(self):
"""Return whether the file supports seeking."""
return True
def load_files_for_sending_lazy(files: List[str], workspace: str) -> List[Tuple[str, Tuple[str, LazyFileLoader]]]:
"""
Prepares files for sending to the Socket API using lazy loading.
This version doesn't open all files immediately, instead it creates
LazyFileLoader objects that only open files when they're actually read.
This prevents "Too many open files" errors when dealing with large numbers
of manifest files.
Args:
files: List of file paths from find_files()
workspace: Base directory path to make paths relative to
Returns:
List of tuples formatted for requests multipart upload:
[(field_name, (filename, lazy_file_object)), ...]
"""
send_files = []
if "\\" in workspace:
workspace = workspace.replace("\\", "/")
for file_path in files:
_, name = file_path.rsplit("/", 1)
if file_path.startswith(workspace):
key = file_path[len(workspace):]
else:
key = file_path
key = key.lstrip("/")
key = key.lstrip("./")
# Create lazy file loader instead of opening file immediately
# Use the relative path (key) as filename instead of truncated basename
lazy_file = LazyFileLoader(file_path, key)
payload = (key, (key, lazy_file))
send_files.append(payload)
log.debug(f"Prepared {len(send_files)} files for lazy loading")
return send_files