riverqueue-python/src/riverqueue/client.py at sqlalchemy12 · ewhauser/riverqueue-python

History

480 lines (378 loc) · 14.5 KB

Raw

100

101

102

103

104

105

106

107

108

109

110

111

112

113

114

115

116

117

118

119

120

121

122

123

124

125

126

127

128

129

130

131

132

133

134

135

136

137

138

139

140

141

142

143

144

145

146

147

148

149

150

151

152

153

154

155

156

157

158

159

160

161

162

163

164

165

166

167

168

169

170

171

172

173

174

175

176

177

178

179

180

181

182

183

184

185

186

187

188

189

190

191

192

193

194

195

196

197

198

199

200

201

202

203

204

205

206

207

208

209

210

211

212

213

214

215

216

217

218

219

220

221

222

223

224

225

226

227

228

229

230

231

232

233

234

235

236

237

238

239

240

241

242

243

244

245

246

247

248

249

250

251

252

253

254

255

256

257

258

259

260

261

262

263

264

265

266

267

268

269

270

271

272

273

274

275

276

277

278

279

280

281

282

283

284

285

286

287

288

289

290

291

292

293

294

295

296

297

298

299

300

301

302

303

304

305

306

307

308

309

310

311

312

313

314

315

316

317

318

319

320

321

322

323

324

325

326

327

328

329

330

331

332

333

334

335

336

337

338

339

340

341

342

343

344

345

346

347

348

349

350

351

352

353

354

355

356

357

358

359

360

361

362

363

364

365

366

367

368

369

370

371

372

373

374

375

376

377

378

379

380

381

382

383

384

385

386

387

388

389

390

391

392

393

394

395

396

397

398

399

400

401

402

403

404

405

406

407

408

409

410

411

412

413

414

415

416

417

418

419

420

421

422

423

424

425

426

427

428

429

430

431

432

433

434

435

436

437

438

439

440

441

442

443

444

445

446

447

448

449

450

451

452

453

454

455

456

457

458

459

460

461

462

463

464

465

466

467

468

469

470

471

472

473

474

475

476

477

478

479

480

from dataclasses import dataclass, field

from datetime import datetime, timezone

from hashlib import sha256

import json

import re

from typing import (

Optional,

Protocol,

List,

cast,

runtime_checkable,

)

from riverqueue.insert_opts import InsertOpts, UniqueOpts

from .driver import (

JobInsertParams,

DriverProtocol,

ExecutorProtocol,

)

from .job import Job, JobState

from .fnv import fnv1_hash

MAX_ATTEMPTS_DEFAULT: int = 25

"""

Default number of maximum attempts for a job.

"""

PRIORITY_DEFAULT: int = 1

"""

Default priority for a job.

"""

QUEUE_DEFAULT: str = "default"

"""

Default queue for a job.

"""

UNIQUE_STATES_DEFAULT: list[str] = [

JobState.AVAILABLE.value,

JobState.COMPLETED.value,

JobState.PENDING.value,

JobState.RETRYABLE.value,

JobState.RUNNING.value,

JobState.SCHEDULED.value,

]

"""

Default job states included during a unique job insertion.

"""

@dataclass

class InsertResult:

job: "Job"

"""

Inserted job row, or an existing job row if insert was skipped due to a

previously existing unique job.

"""

unique_skipped_as_duplicated: bool = field(default=False)

"""

True if for a unique job, the insertion was skipped due to an equivalent job

matching unique property already being present.

"""

class JobArgs(Protocol):

"""

Protocol that should be implemented by all job args.

"""

kind: str

def to_json(self) -> str:

pass

@runtime_checkable

class JobArgsWithInsertOpts(Protocol):

"""

Protocol that's optionally implemented by a JobArgs implementation so that

every inserted instance of them provides the same custom `InsertOpts`.

`InsertOpts` passed to insert functions will take precedence of one returned

by `JobArgsWithInsertOpts`.

"""

def insert_opts(self) -> InsertOpts:

pass

@dataclass

class InsertManyParams:

"""

A single job to insert that's part of an `insert_many()` batch insert.

Unlike sending raw job args, supports an `InsertOpts` to pair with the job.

"""

args: JobArgs

"""

Job args to insert.

"""

insert_opts: Optional[InsertOpts] = None

"""

Insertion options to use with the insert.

"""

class Client:

"""

Provides a client for River that inserts jobs. Unlike the Go version of the

River client, this one can insert jobs only. Jobs can only be worked from Go

code, so job arg kinds and JSON encoding details must be shared between Ruby

and Go code.

Used in conjunction with a River driver like:

```

import riverqueue

from riverqueue.driver import riversqlalchemy

engine = sqlalchemy.create_engine("postgresql://...")

client = riverqueue.Client(riversqlalchemy.Driver(engine))

```

"""

def __init__(

self, driver: DriverProtocol, advisory_lock_prefix: Optional[int] = None

self.driver = driver

self.advisory_lock_prefix = _check_advisory_lock_prefix_bounds(

advisory_lock_prefix

)

def insert(

self, args: JobArgs, insert_opts: Optional[InsertOpts] = None

) -> InsertResult:

"""

Inserts a new job for work given a job args implementation and insertion

options (which may be omitted).

With job args only:

```

insert_res = client.insert(

SortArgs(strings=["whale", "tiger", "bear"]),

)

insert_res.job # inserted job row

```

With insert opts:

```

insert_res = client.insert(

SortArgs(strings=["whale", "tiger", "bear"]),

insert_opts=riverqueue.InsertOpts(

max_attempts=17,

priority=3,

queue: "my_queue",

tags: ["custom"]

)

insert_res.job # inserted job row

```

Job arg implementations are expected to respond to:

* `kind` is a unique string that identifies them the job in the

database, and which a Go worker will recognize.

* `to_json()` defines how the job will serialize to JSON, which of

course will have to be parseable as an object in Go.

They may also respond to `insert_opts()` which is expected to return an

`InsertOpts` that contains options that will apply to all jobs of this

kind. Insertion options provided as an argument to `insert()` override

those returned by job args.

For example:

```

@dataclass

class SortArgs:

strings: list[str]

kind: str = "sort"

def to_json(self) -> str:

return json.dumps({"strings": self.strings})

```

We recommend using `@dataclass` for job args since they should ideally

be minimal sets of primitive properties with little other embellishment,

and `@dataclass` provides a succinct way of accomplishing this.

Returns an instance of `InsertResult`.

"""

if insert_opts:

setattr(args, 'insert_opts', insert_opts)

with self.driver.executor() as exec:

res = exec.job_insert_many(_make_driver_insert_params_many([args]))

return cast(InsertResult, list(res)[0])

def insert_tx(

self, tx, args: JobArgs, insert_opts: Optional[InsertOpts] = None

) -> InsertResult:

"""

Inserts a new job for work given a job args implementation and insertion

options (which may be omitted).

This variant inserts a job in an open transaction. For example:

```

with engine.begin() as session:

insert_res = client.insert_tx(

session,

SortArgs(strings=["whale", "tiger", "bear"]),

)

```

With insert opts:

```

with engine.begin() as session:

insert_res = client.insert_tx(

session,

SortArgs(strings=["whale", "tiger", "bear"]),

insert_opts=riverqueue.InsertOpts(

max_attempts=17,

priority=3,

queue: "my_queue",

tags: ["custom"]

)

insert_res.job # inserted job row

```

"""

if insert_opts:

setattr(args, 'insert_opts', insert_opts)

exec = self.driver.unwrap_executor(tx)

res = exec.job_insert_many(_make_driver_insert_params_many([args]))

return cast(InsertResult, list(res)[0])

def insert_many(self, args: List[JobArgs | InsertManyParams]) -> int:

"""

Inserts many new jobs as part of a single batch operation for improved

efficiency.

Takes an array of job args or `InsertManyParams` which encapsulate job

args and a paired `InsertOpts`.

With job args:

```

num_inserted = client.insert_many([

SimpleArgs(job_num: 1),

SimpleArgs(job_num: 2)

])

```

With `InsertManyParams`:

```

num_inserted = client.insert_many([

InsertManyParams(args=SimpleArgs.new(job_num: 1), insert_opts=riverqueue.InsertOpts.new(max_attempts=5)),

InsertManyParams(args=SimpleArgs.new(job_num: 2), insert_opts=riverqueue.InsertOpts.new(queue="high_priority"))

])

```

Unique job insertion isn't supported with bulk insertion because it'd

run the risk of major lock contention.

Returns the number of jobs inserted.

"""

with self.driver.executor() as exec:

return exec.job_insert_many_no_returning(_make_driver_insert_params_many(args))

def insert_many_tx(self, tx, args: List[JobArgs | InsertManyParams]) -> int:

"""

Inserts many new jobs as part of a single batch operation for improved

efficiency.

This variant inserts a job in an open transaction. For example:

```

with engine.begin() as session:

num_inserted = client.insert_many_tx(session, [

SimpleArgs(job_num: 1),

SimpleArgs(job_num: 2)

])

```

With `InsertManyParams`:

```

with engine.begin() as session:

num_inserted = client.insert_many_tx(session, [

InsertManyParams(args=SimpleArgs.new(job_num: 1), insert_opts=riverqueue.InsertOpts.new(max_attempts=5)),

InsertManyParams(args=SimpleArgs.new(job_num: 2), insert_opts=riverqueue.InsertOpts.new(queue="high_priority"))

])

```

Unique job insertion isn't supported with bulk insertion because it'd

run the risk of major lock contention.

Returns the number of jobs inserted.

"""

exec = self.driver.unwrap_executor(tx)

return exec.job_insert_many_no_returning(_make_driver_insert_params_many(args))

def _check_advisory_lock_prefix_bounds(

advisory_lock_prefix: Optional[int],

) -> Optional[int]:

"""

Checks that an advisory lock prefix fits in 4 bytes, which is the maximum

space reserved for one.

"""

if advisory_lock_prefix:

# We only reserve 4 bytes for the prefix, so make sure the given one

# properly fits. This will error in case that's not the case.

advisory_lock_prefix.to_bytes(4)

return advisory_lock_prefix

def _hash_lock_key(advisory_lock_prefix: Optional[int], lock_key: str) -> int:

"""

Generates an FNV-1 hash from the given lock key string suitable for use with

a PG advisory lock while checking for the existence of a unique job.

"""

if advisory_lock_prefix is None:

lock_key_hash = fnv1_hash(lock_key.encode("utf-8"), 64)

else:

prefix = advisory_lock_prefix

lock_key_hash = (prefix << 32) | fnv1_hash(lock_key.encode("utf-8"), 32)

return _uint64_to_int64(lock_key_hash)

def _make_driver_insert_params(

args: JobArgs,

insert_opts: InsertOpts,

is_insert_many: bool = False,

) -> JobInsertParams:

"""

Converts user-land job args and insert options to insert params for an

underlying driver.

"""

if not insert_opts:

insert_opts = InsertOpts()

args.kind # fail fast in case args don't respond to kind

args_json = args.to_json()

assert args_json is not None, "args should return non-nil from `to_json`"

args_insert_opts = InsertOpts()

if isinstance(args, JobArgsWithInsertOpts):

args_insert_opts = args.insert_opts

scheduled_at = insert_opts.scheduled_at or args_insert_opts.scheduled_at

unique_opts = insert_opts.unique_opts or args_insert_opts.unique_opts

queue = insert_opts.queue or args_insert_opts.queue or QUEUE_DEFAULT

insert_params = JobInsertParams(

args=args_json,

kind=args.kind,

max_attempts=insert_opts.max_attempts

or args_insert_opts.max_attempts

or MAX_ATTEMPTS_DEFAULT,

priority=insert_opts.priority or args_insert_opts.priority or PRIORITY_DEFAULT,

queue=queue,

scheduled_at=scheduled_at and scheduled_at.astimezone(timezone.utc),

state="scheduled" if scheduled_at else "available",

tags=_validate_tags(insert_opts.tags or args_insert_opts.tags or []),

)

if unique_opts:

unique_key, unique_state = _build_unique_key_and_state(insert_params, unique_opts)

insert_params.unique_key = unique_key

insert_params.unique_state = unique_state

return insert_params

def _build_unique_key_and_state(

insert_params: JobInsertParams, unique_opts: UniqueOpts

) -> tuple[Optional[memoryview], Optional[int]]:

any_unique_opts = False

unique_key = ""

# Always include kind for parity with upstream implementation

unique_key += f"&kind={insert_params.kind}"

if unique_opts.by_args:

any_unique_opts = True

try:

args_dict = json.loads(insert_params.args)

except (TypeError, json.JSONDecodeError):

args_dict = insert_params.args

sorted_args = json.dumps(args_dict, sort_keys=True, separators=(",", ":"))

unique_key += f"&args={sorted_args}"

if unique_opts.by_period:

any_unique_opts = True

lower_period_bound = _truncate_time(

datetime.now(timezone.utc), unique_opts.by_period

)

unique_key += f"&period={lower_period_bound.strftime('%FT%TZ')}"

if unique_opts.by_queue:

any_unique_opts = True

unique_key += f"&queue={insert_params.queue}"

states_for_key: list[str] | list[JobState]

if unique_opts.by_state:

any_unique_opts = True

states_for_key = unique_opts.by_state

else:

states_for_key = UNIQUE_STATES_DEFAULT

normalized_states = _normalize_state_names(states_for_key)

unique_key += f"&state={','.join(normalized_states)}"

if not any_unique_opts:

return None, None

unique_key_hash = memoryview(sha256(unique_key.encode("utf-8")).digest())

unique_state = unique_opts.state_bitmask()

return unique_key_hash, unique_state

def _normalize_state_names(states: list[str | JobState]) -> list[str]:

normalized: list[str] = []

for state in states:

if isinstance(state, JobState):

normalized.append(state.value)

else:

normalized.append(str(state))

return normalized

def _make_driver_insert_params_many(

args: List[JobArgs | InsertManyParams],

) -> List[JobInsertParams]:

return [

_make_driver_insert_params(

arg.args, arg.insert_opts or InsertOpts(), is_insert_many=True

)

if isinstance(arg, InsertManyParams)

else _make_driver_insert_params(arg, InsertOpts(), is_insert_many=True)

for arg in args

]

def _truncate_time(time, interval_seconds) -> datetime:

return datetime.fromtimestamp(

(time.timestamp() // interval_seconds) * interval_seconds, tz=timezone.utc

)

def _uint64_to_int64(uint64):

# Packs a uint64 then unpacks to int64 to fit within Postgres bigint

return (uint64 + (1 << 63)) % (1 << 64) - (1 << 63)

tag_re = re.compile(r"\A[\w][\w\-]+[\w]\Z")

def _validate_tags(tags: list[str]) -> list[str]:

for tag in tags:

assert (

len(tag) <= 255 and tag_re.match(tag)

), f"tags should be less than 255 characters in length and match regex {tag_re.pattern}"

return tags

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

FilesExpand file tree

client.py

Latest commit

History

client.py

File metadata and controls