localstack/localstack-core/localstack/utils/http.py at main · localstack/localstack

This repository was archived by the owner on Mar 23, 2026. It is now read-only.

History

326 lines (265 loc) · 11.4 KB

Raw

100

101

102

103

104

105

106

107

108

109

110

111

112

113

114

115

116

117

118

119

120

121

122

123

124

125

126

127

128

129

130

131

132

133

134

135

136

137

138

139

140

141

142

143

144

145

146

147

148

149

150

151

152

153

154

155

156

157

158

159

160

161

162

163

164

165

166

167

168

169

170

171

172

173

174

175

176

177

178

179

180

181

182

183

184

185

186

187

188

189

190

191

192

193

194

195

196

197

198

199

200

201

202

203

204

205

206

207

208

209

210

211

212

213

214

215

216

217

218

219

220

221

222

223

224

225

226

227

228

229

230

231

232

233

234

235

236

237

238

239

240

241

242

243

244

245

246

247

248

249

250

251

252

253

254

255

256

257

258

259

260

261

262

263

264

265

266

267

268

269

270

271

272

273

274

275

276

277

278

279

280

281

282

283

284

285

286

287

288

289

290

291

292

293

294

295

296

297

298

299

300

301

302

303

304

305

306

307

308

309

310

311

312

313

314

315

316

317

318

319

320

321

322

323

324

325

326

import logging

import math

import os

import re

from urllib.parse import parse_qs, parse_qsl, urlencode, urlparse, urlunparse

import requests

from requests.models import CaseInsensitiveDict, Response

from localstack import config

from .strings import to_str

# chunk size for file downloads

DOWNLOAD_CHUNK_SIZE = 1024 * 1024

ACCEPT = "accept"

LOG = logging.getLogger(__name__)

def uses_chunked_encoding(response):

return response.headers.get("Transfer-Encoding", "").lower() == "chunked"

def parse_chunked_data(data):

"""Parse the body of an HTTP message transmitted with chunked transfer encoding."""

data = (data or "").strip()

chunks = []

while data:

length = re.match(r"^([0-9a-zA-Z]+)\r\n.*", data)

if not length:

break

length = length.group(1).lower()

length = int(length, 16)

data = data.partition("\r\n")[2]

chunks.append(data[:length])

data = data[length:].strip()

return "".join(chunks)

def create_chunked_data(data, chunk_size: int = 80):

dl = len(data)

ret = ""

for i in range(dl // chunk_size):

ret += f"{hex(chunk_size)[2:]}\r\n"

ret += f"{data[i * chunk_size : (i + 1) * chunk_size]}\r\n\r\n"

if len(data) % chunk_size != 0:

ret += f"{hex(len(data) % chunk_size)[2:]}\r\n"

ret += f"{data[-(len(data) % chunk_size) :]}\r\n"

ret += "0\r\n\r\n"

return ret

def canonicalize_headers(headers: dict | CaseInsensitiveDict) -> dict:

if not headers:

return headers

def _normalize(name):

if name.lower().startswith(ACCEPT):

return name.lower()

return name

result = {_normalize(k): v for k, v in headers.items()}

return result

def add_path_parameters_to_url(uri: str, path_params: list):

url = urlparse(uri)

last_character = (

"/" if (len(url.path) == 0 or url.path[-1] != "/") and len(path_params) > 0 else ""

)

new_path = url.path + last_character + "/".join(path_params)

return urlunparse(url._replace(path=new_path))

def add_query_params_to_url(uri: str, query_params: dict) -> str:

"""

Add query parameters to the uri.

:param uri: the base uri it can contains path arguments and query parameters

:param query_params: new query parameters to be added

:return: the resulting URL

"""

# parse the incoming uri

url = urlparse(uri)

# parses the query part, if exists, into a dict

query_dict = dict(parse_qsl(url.query))

# updates the dict with new query parameters

query_dict.update(query_params)

# encodes query parameters

url_query = urlencode(query_dict)

# replaces the existing query

url_parse = url._replace(query=url_query)

return urlunparse(url_parse)

def make_http_request(

url: str, data: bytes | str = None, headers: dict[str, str] = None, method: str = "GET"

) -> Response:

return requests.request(

url=url, method=method, headers=headers, data=data, auth=NetrcBypassAuth(), verify=False

)

class NetrcBypassAuth(requests.auth.AuthBase):

def __call__(self, r):

return r

class _RequestsSafe:

"""Wrapper around requests library, which can prevent it from verifying

SSL certificates or reading credentials from ~/.netrc file"""

verify_ssl = True

def __getattr__(self, name):

method = requests.__dict__.get(name.lower())

if not method:

return method

def _wrapper(*args, **kwargs):

if "auth" not in kwargs:

kwargs["auth"] = NetrcBypassAuth()

url = kwargs.get("url") or (args[1] if name == "request" else args[0])

if not self.verify_ssl and url.startswith("https://") and "verify" not in kwargs:

kwargs["verify"] = False

return method(*args, **kwargs)

return _wrapper

# create safe_requests instance

safe_requests = _RequestsSafe()

def parse_request_data(method: str, path: str, data=None, headers=None) -> dict:

"""Extract request data either from query string as well as request body (e.g., for POST)."""

result = {}

headers = headers or {}

content_type = headers.get("Content-Type", "")

# add query params to result

parsed_path = urlparse(path)

result.update(parse_qs(parsed_path.query))

# add params from url-encoded payload

if method in ["POST", "PUT", "PATCH"] and (not content_type or "form-" in content_type):

# content-type could be either "application/x-www-form-urlencoded" or "multipart/form-data"

try:

params = parse_qs(to_str(data or ""))

result.update(params)

except Exception:

pass # probably binary / JSON / non-URL encoded payload - ignore

# select first elements from result lists (this is assuming we are not using parameter lists!)

result = {k: v[0] for k, v in result.items()}

return result

def get_proxies() -> dict[str, str]:

proxy_map = {}

if config.OUTBOUND_HTTP_PROXY:

proxy_map["http"] = config.OUTBOUND_HTTP_PROXY

if config.OUTBOUND_HTTPS_PROXY:

proxy_map["https"] = config.OUTBOUND_HTTPS_PROXY

return proxy_map

def download(

url: str,

path: str,

verify_ssl: bool = True,

timeout: float = None,

request_headers: dict | None = None,

quiet: bool = False,

) -> None:

"""Downloads file at url to the given path. Raises TimeoutError if the optional timeout (in secs) is reached.

If `quiet` is passed, do not log any status messages. Error messages are still logged.

"""

# make sure we're creating a new session here to enable parallel file downloads

s = requests.Session()

proxies = get_proxies()

if proxies:

s.proxies.update(proxies)

# Use REQUESTS_CA_BUNDLE path. If it doesn't exist, use the method provided settings.

# Note that a value that is not False, will result to True and will get the bundle file.

_verify = os.getenv("REQUESTS_CA_BUNDLE", verify_ssl)

r = None

try:

r = s.get(url, stream=True, verify=_verify, timeout=timeout, headers=request_headers)

# check status code before attempting to read body

if not r.ok:

raise Exception(f"Failed to download {url}, response code {r.status_code}")

total_size = 0

if r.headers.get("Content-Length"):

total_size = int(r.headers.get("Content-Length"))

total_downloaded = 0

if not os.path.exists(os.path.dirname(path)):

os.makedirs(os.path.dirname(path))

if not quiet:

LOG.debug("Starting download from %s to %s", url, path)

with open(path, "wb") as f:

iter_length = 0

percentage_limit = next_percentage_record = 10 # print a log line for every 10%

iter_limit = (

1000000 # if we can't tell the percentage, print a log line for every 1MB chunk

)

for chunk in r.iter_content(DOWNLOAD_CHUNK_SIZE):

# explicitly check the raw stream, since the size from the chunk can be bigger than the amount of

# bytes transferred over the wire due to transparent decompression (f.e. GZIP)

new_total_downloaded = r.raw.tell()

iter_length += new_total_downloaded - total_downloaded

total_downloaded = new_total_downloaded

if chunk: # filter out keep-alive new chunks

f.write(chunk)

elif not quiet:

LOG.debug(

"Empty chunk %s (total %dK of %dK) from %s",

chunk,

total_downloaded / 1024,

total_size / 1024,

url,

)

if total_size > 0 and (

(current_percent := total_downloaded / total_size * 100)

>= next_percentage_record

# increment the limit for the next log output (ensure that there is max 1 log message per block)

# f.e. percentage_limit is 10, current percentage is 71: next log is earliest at 80%

next_percentage_record = (

math.floor(current_percent / percentage_limit) * percentage_limit

+ percentage_limit

)

if not quiet:

LOG.debug(

"Downloaded %d%% (total %dK of %dK) to %s",

current_percent,

total_downloaded / 1024,

total_size / 1024,

path,

)

iter_length = 0

elif total_size <= 0 and iter_length >= iter_limit:

if not quiet:

# print log message every x K if the total size is not known

LOG.debug(

"Downloaded %dK (total %dK) to %s",

iter_length / 1024,

total_downloaded / 1024,

path,

)

iter_length = 0

f.flush()

os.fsync(f)

if os.path.getsize(path) == 0:

LOG.warning("Zero bytes downloaded from %s, retrying", url)

download(url, path, verify_ssl)

return

if not quiet:

LOG.debug(

"Done downloading %s, response code %s, total %dK",

url,

r.status_code,

total_downloaded / 1024,

)

except requests.exceptions.ReadTimeout as e:

raise TimeoutError(f"Timeout ({timeout}) reached on download: {url} - {e}")

finally:

if r is not None:

r.close()

s.close()

def download_github_artifact(url: str, target_file: str, timeout: int = None):

"""Download file from main URL or fallback URL (to avoid firewall errors if github.com is blocked).

Optionally allows to define a timeout in seconds."""

def do_download(

download_url: str, request_headers: dict | None = None, print_error: bool = False

try:

download(download_url, target_file, timeout=timeout, request_headers=request_headers)

return True

except Exception as e:

if print_error:

LOG.error(

"Unable to download Github artifact from %s to %s: %s %s",

url,

target_file,

exc_info=LOG.isEnabledFor(logging.DEBUG),

)

# if a GitHub API token is set, use it to avoid rate limiting issues

gh_token = os.environ.get("GITHUB_API_TOKEN")

gh_auth_headers = None

if gh_token:

gh_auth_headers = {"authorization": f"Bearer {gh_token}"}

result = do_download(url, request_headers=gh_auth_headers)

if not result:

# TODO: use regex below to allow different branch names than "master"

url = url.replace("https://github.com", "https://cdn.jsdelivr.net/gh")

# The URL structure is https://cdn.jsdelivr.net/gh/user/repo@branch/file.js

url = url.replace("/raw/master/", "@master/")

# Do not send the GitHub auth token to the CDN

do_download(url, print_error=True)

# TODO move to aws_responses.py?

def replace_response_content(response, pattern, replacement):

content = to_str(response.content or "")

response._content = re.sub(pattern, replacement, content)

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

Uh oh!

FilesExpand file tree

http.py

Latest commit

History

http.py

File metadata and controls