Serial-Studio/scripts/code_verify_rules.py at master · Serial-Studio/Serial-Studio

History

3120 lines (2861 loc) · 127 KB

Raw

100

101

102

103

104

105

106

107

108

109

110

111

112

113

114

115

116

117

118

119

120

121

122

123

124

125

126

127

128

129

130

131

132

133

134

135

136

137

138

139

140

141

142

143

144

145

146

147

148

149

150

151

152

153

154

155

156

157

158

159

160

161

162

163

164

165

166

167

168

169

170

171

172

173

174

175

176

177

178

179

180

181

182

183

184

185

186

187

188

189

190

191

192

193

194

195

196

197

198

199

200

201

202

203

204

205

206

207

208

209

210

211

212

213

214

215

216

217

218

219

220

221

222

223

224

225

226

227

228

229

230

231

232

233

234

235

236

237

238

239

240

241

242

243

244

245

246

247

248

249

250

251

252

253

254

255

256

257

258

259

260

261

262

263

264

265

266

267

268

269

270

271

272

273

274

275

276

277

278

279

280

281

282

283

284

285

286

287

288

289

290

291

292

293

294

295

296

297

298

299

300

301

302

303

304

305

306

307

308

309

310

311

312

313

314

315

316

317

318

319

320

321

322

323

324

325

326

327

328

329

330

331

332

333

334

335

336

337

338

339

340

341

342

343

344

345

346

347

348

349

350

351

352

353

354

355

356

357

358

359

360

361

362

363

364

365

366

367

368

369

370

371

372

373

374

375

376

377

378

379

380

381

382

383

384

385

386

387

388

389

390

391

392

393

394

395

396

397

398

399

400

401

402

403

404

405

406

407

408

409

410

411

412

413

414

415

416

417

418

419

420

421

422

423

424

425

426

427

428

429

430

431

432

433

434

435

436

437

438

439

440

441

442

443

444

445

446

447

448

449

450

451

452

453

454

455

456

457

458

459

460

461

462

463

464

465

466

467

468

469

470

471

472

473

474

475

476

477

478

479

480

481

482

483

484

485

486

487

488

489

490

491

492

493

494

495

496

497

498

499

500

501

502

503

504

505

506

507

508

509

510

511

512

513

514

515

516

517

518

519

520

521

522

523

524

525

526

527

528

529

530

531

532

533

534

535

536

537

538

539

540

541

542

543

544

545

546

547

548

549

550

551

552

553

554

555

556

557

558

559

560

561

562

563

564

565

566

567

568

569

570

571

572

573

574

575

576

577

578

579

580

581

582

583

584

585

586

587

588

589

590

591

592

593

594

595

596

597

598

599

600

601

602

603

604

605

606

607

608

609

610

611

612

613

614

615

616

617

618

619

620

621

622

623

624

625

626

627

628

629

630

631

632

633

634

635

636

637

638

639

640

641

642

643

644

645

646

647

648

649

650

651

652

653

654

655

656

657

658

659

660

661

662

663

664

665

666

667

668

669

670

671

672

673

674

675

676

677

678

679

680

681

682

683

684

685

686

687

688

689

690

691

692

693

694

695

696

697

698

699

700

701

702

703

704

705

706

707

708

709

710

711

712

713

714

715

716

717

718

719

720

721

722

723

724

725

726

727

728

729

730

731

732

733

734

735

736

737

738

739

740

741

742

743

744

745

746

747

748

749

750

751

752

753

754

755

756

757

758

759

760

761

762

763

764

765

766

767

768

769

770

771

772

773

774

775

776

777

778

779

780

781

782

783

784

785

786

787

788

789

790

791

792

793

794

795

796

797

798

799

800

801

802

803

804

805

806

807

808

809

810

811

812

813

814

815

816

817

818

819

820

821

822

823

824

825

826

827

828

829

830

831

832

833

834

835

836

837

838

839

840

841

842

843

844

845

846

847

848

849

850

851

852

853

854

855

856

857

858

859

860

861

862

863

864

865

866

867

868

869

870

871

872

873

874

875

876

877

878

879

880

881

882

883

884

885

886

887

888

889

890

891

892

893

894

895

896

897

898

899

900

901

902

903

904

905

906

907

908

909

910

911

912

913

914

915

916

917

918

919

920

921

922

923

924

925

926

927

928

929

930

931

932

933

934

935

936

937

938

939

940

941

942

943

944

945

946

947

948

949

950

951

952

953

954

955

956

957

958

959

960

961

962

963

964

965

966

967

968

969

970

971

972

973

974

975

976

977

978

979

980

981

982

983

984

985

986

987

988

989

990

991

992

993

994

995

996

997

998

999

1000

"""Static-analysis rules for scripts/code-verify.py.

Adds CLAUDE.md-derived semantic checks on top of the formatter's existing

style rules. C++ rules use tree-sitter's C++ grammar to walk a real AST;

QML rules stay line-based on top of the tokenizer that already lives in

code-verify.py.

Each rule returns a list of (line, kind, message) tuples. The driver in

code-verify.py wraps them as Violations and routes them through the

existing flag-only / auto-fixable pipeline. Every rule here is flag-only.

Tree-sitter is the only new dependency. The module degrades gracefully:

when tree-sitter or tree-sitter-cpp aren't importable, C++ semantic

checks are silently skipped and the formatter still runs its line-based

rules. The CI install pins both in tests/requirements.txt.

`code-verify off / on` fences mask every rule here too, same as the

existing rules — the driver passes the fence mask in.

"""

from __future__ import annotations

import re

from dataclasses import dataclass

from pathlib import Path

try:

import tree_sitter

import tree_sitter_cpp

_CPP_LANG = tree_sitter.Language(tree_sitter_cpp.language())

_CPP_PARSER = tree_sitter.Parser(_CPP_LANG)

HAS_TREE_SITTER = True

except Exception:

HAS_TREE_SITTER = False

_CPP_LANG = None

_CPP_PARSER = None

# ---------------------------------------------------------------------------

# Public types

# ---------------------------------------------------------------------------

@dataclass(frozen=True)

class Finding:

line: int

kind: str

message: str

# ---------------------------------------------------------------------------

# Hotpath method names (CLAUDE.md: never allocate on the dashboard path)

# ---------------------------------------------------------------------------

# Methods named here are walked for new/make_shared/append calls. The names

# come straight from CLAUDE.md's "Threading Rules" / "Hotpath" sections.

_HOTPATH_METHODS = frozenset(

{

"hotpathRxFrame",

"hotpathRxSourceFrame",

"processData",

"onReadyRead",

"onFrameReady",

"onRawDataReceived",

"appendChunk",

"frameTimestamp",

"applyTransform",

"parseProjectFrame",

"updateData",

"updateLineSeries",

"pushSample",

}

)

# Calls / patterns banned on the hotpath. Each entry is (regex, message).

_HOTPATH_BANNED_CALLS = [

(re.compile(r"\bnew\s+[A-Za-z_]"), "`new` allocation on hotpath"),

(re.compile(r"\bstd::make_shared\b"), "`std::make_shared` allocation on hotpath"),

(re.compile(r"\bstd::make_unique\b"), "`std::make_unique` allocation on hotpath"),

(re.compile(r"\.append\("), "`.append(` (likely Qt container resize) on hotpath"),

(re.compile(r"\.push_back\("), "`.push_back(` on hotpath (pre-reserve at init)"),

(re.compile(r"\bemit\b"), "bare `emit` on hotpath -- use `Q_EMIT`"),

]

# ---------------------------------------------------------------------------

# CPU-microarchitecture / performance rules

# ---------------------------------------------------------------------------

# These rules apply knowledge of how compiled C++ behaves at the assembly /

# register / branch-predictor / cache level. The cycle counts in the rule

# messages are representative for current Intel (Skylake-derived) and ARM

# (Cortex-A7x/A78) microarchitectures; exact numbers vary with the target.

# All rules ship as advisory -- the goal is a checklist for a follow-up

# human / LLM pass, not a CI gate.

# Heavy types -- known to be expensive to copy by value. Even implicitly

# shared Qt containers (QString/QByteArray/QList/...) pay an atomic refcount

# bump on the COW pointer, which is a `lock`-prefix instruction on x86 or an

# `ldxr/stxr` loop on ARM without LSE. std:: containers do a full deep copy.

_HEAVY_TYPES = frozenset(

{

"QString",

"QByteArray",

"QStringList",

"QVariant",

"QVariantMap",

"QVariantList",

"QVariantHash",

"QList",

"QVector",

"QMap",

"QHash",

"QSet",

"QQueue",

"QStack",

"QJsonObject",

"QJsonArray",

"QJsonDocument",

"QJsonValue",

"QImage",

"QPixmap",

"QPolygon",

"QPolygonF",

"QPainterPath",

"QBitArray",

"QDateTime",

"std::string",

"std::wstring",

"std::vector",

"std::map",

"std::unordered_map",

"std::list",

"std::deque",

"std::set",

"std::unordered_set",

"std::multimap",

"std::unordered_multimap",

}

)

_REFCOUNTED_TYPES = frozenset(

{

"std::shared_ptr",

"QSharedPointer",

"QSharedDataPointer",

"QExplicitlySharedDataPointer",

"boost::shared_ptr",

}

)

# File-wide perf patterns: scanned inside every function body, not just

# hotpath methods. Cost matters everywhere these appear; the user can

# wrap a region in `// code-verify off` when the slow path is intentional

# (init code that builds a regex once, error path that throws, etc.).

_PERF_BODY_PATTERNS = [

# `/ <floating-literal>` -- compilers do NOT fold `a / 2.5` to

# `a * 0.4` without `-ffast-math` (would lose 1 ULP for non-exact

# reciprocals). Multiplying by a precomputed reciprocal is ~3 cyc

# vs ~12-22 cyc for divsd.

(

re.compile(

r"(?<![*/=<>!&|^])/\s*(?:\d+\.\d*|\.\d+|\d+\.\d*[eE][+-]?\d+)" r"[fFlL]?"

"perf-divide-by-float-literal",

"`/` with a floating-point literal -- compilers don't fold to "

"reciprocal multiply (would lose IEEE accuracy without -ffast-math). "

"Precompute `constexpr double kInvX = 1.0 / X;` and multiply (~3 cyc "

"mulsd vs ~12-22 cyc divsd).",

# `pow(x, N)` -- libm transcendental, goes through `exp(log(x) * y)`.

# 40+ cyc on Intel, similar on ARM. Caller-saved FPU/SIMD state gets

# clobbered too.

(

re.compile(r"\b(?:std::)?pow\s*\("),

"perf-pow-call",

"`pow(...)` -- libm transcendental via `exp(log(x) * y)` (40+ cyc on "

"Intel/ARM) and clobbers caller-saved FPU/SIMD state. For small "

"integer exponents write the multiply (`x*x`, `x*x*x`); for "

"`pow(x, 0.5)` use `std::sqrt(x)`; for `pow(2.0, n)` use "

"`std::ldexp(1.0, n)` (single mantissa-shift insn).",

# `dynamic_cast<T>` -- walks the inheritance graph via RTTI typeinfo

# string comparisons; 50-200+ cyc worst case and a function call.

(

re.compile(r"\bdynamic_cast\s*<"),

"perf-dynamic-cast",

"`dynamic_cast<...>` -- walks the inheritance graph via RTTI typeinfo "

"string compares (50-200+ cyc worst case, runtime call). Use a "

"discriminating enum + `static_cast`, or pre-resolve the cast once "

"(store the typed pointer at object init).",

# malloc / free family -- same arena-mutex cost as `new`/`delete`,

# just less visible. Both Linux glibc and Windows HeapAlloc serialize

# on a per-arena mutex; on contended workloads this is a real cost.

(

re.compile(

"perf-malloc-family",

"C heap call -- malloc/free contend on a per-arena mutex (glibc, "

"RtlHeap) and aren't pipelineable. In a hot loop, reuse a "

"pre-reserved buffer or a small-object pool.",

# `QRegularExpression(...)` constructor -- compiles the regex to a

# state machine, heap-allocates capture tables. If invoked in a loop,

# the regex gets recompiled every iteration.

(

re.compile(r"\bQRegularExpression\s*\([^)]"),

"perf-regex-construct",

"`QRegularExpression(...)` constructor -- compiles a DFA/NFA state "

"machine and heap-allocates capture state. Build the regex once "

"(file-scope `static const`, or a class member init) and reuse the "

"`.match(...)` path each iteration.",

# `.arg(...).arg(...)` chains -- each call returns a new QString

# (heap alloc + copy). Two .arg()s = two allocs. Pass all args in

# one call (`s.arg(a, b, c)`) or use QStringBuilder (`%` operator

# with `<QStringBuilder>` included).

(

re.compile(r"\.arg\s*\([^()]*\)\s*\.arg\s*\("),

"perf-arg-chain",

"`.arg(...).arg(...)` chain -- each call allocates a fresh QString "

"(heap + memcpy). Combine into one call (`.arg(a, b, c)`) or include "

"`<QStringBuilder>` and use the `%` operator (single allocation, "

"sized exactly).",

]

# Hotpath-only perf patterns: too noisy to flag file-wide in this codebase

# (qDebug and QString allocation are pervasive in setup/teardown/error

# paths and aren't wrong there). The hotpath methods listed in

# `_HOTPATH_METHODS` run at kHz+ rates -- THAT'S where the cost bites.

_HOTPATH_PERF_PATTERNS = [

# QString / QByteArray construction with a literal -- each call hits

# the heap allocator (malloc on Linux, RtlAllocateHeap on Windows),

# contended on the arena mutex, not pipelineable. Cache the result

# at init or hoist into a file-scope `static const`.

# `QStringLiteral("...")` is deliberately NOT flagged: by design it

# constant-folds into a static read-only QString with zero heap touch

# (that's why Qt has it). The other entries are the genuine heap-

# allocating constructors/conversions.

(

re.compile(

r"\bQString\s*\(\s*[\"R]"

r"|\bQByteArray\s*\(\s*[\"R]"

r"|\.toUtf8\s*\(\s*\)"

r"|\.toStdString\s*\(\s*\)"

r"|\.toLatin1\s*\(\s*\)"

r"|\.toLocal8Bit\s*\(\s*\)"

r"|\bQString::fromUtf8\s*\("

r"|\bQString::fromLatin1\s*\("

"perf-string-alloc-hotpath",

"string construction/conversion on the hotpath -- heap allocation + "

"memcpy. malloc contends on a per-arena mutex; the new buffer "

"pollutes L1 (32-48 KB). Cache the QString at init, or use a "

"fixed stack buffer for transient formatting.",

# qDebug / qWarning -- builds a QDebug stream object, takes the global

# message-handler mutex, formats and writes. Even filtered-out

# categories pay the format cost because `<<` is eager. Hundreds of

# cycles minimum per call; thousands when the handler dispatches to

# a Console widget that re-enters the event loop.

(

re.compile(r"\bq(?:Debug|Info|Warning|Critical|Fatal)\s*\("),

"perf-log-on-hotpath",

"Qt logging call on the hotpath -- builds a QDebug stream, takes "

"the global message-handler mutex, formats and writes. `<<` is "

"eager: even filtered-out categories pay the format cost. Gate "

"behind `#ifdef SERIAL_STUDIO_DEBUG` or move to a sampled counter.",

# `throw` on the hotpath -- exception throw runs the personality "

# routine, walks DWARF / SEH unwind tables (1000s of cycles per

# frame), mispredicts every catch on the way out, trashes the

# return-address stack. `noexcept` callers crash hard.

(

re.compile(r"\bthrow\s+\w"),

"perf-throw-on-hotpath",

"`throw` on the hotpath -- stack unwinding via DWARF/SEH personality "

"routines (1000s of cycles), mispredicts every catch frame, trashes "

"the return-address stack predictor. Return an error code, an "

"`std::expected`-style variant, or a sentinel value instead.",

# Mutex / lock-guard acquisition on the hotpath -- ~20 cyc lock-prefix

# RMW on x86, ldaxr+stxr+DMB on ARM, serializes the store buffer, and

# contended bouncing thrashes the L1 line. Outside the kHz frame path

# the cost is irrelevant; locks are the right answer for once-per-event

# state mutation. Only flag inside known-hot methods.

(

re.compile(

r"\b(?:QMutexLocker|QReadLocker|QWriteLocker|QRecursiveMutex"

r"|std::lock_guard|std::unique_lock|std::scoped_lock"

r"|std::shared_lock)\b"

"perf-lock-acquire",

"lock acquisition on the hotpath -- atomic RMW with full memory "

"barrier (~20 cyc x86 `lock`-prefix, ldaxr+stxr+DMB on ARM), "

"serializes the store buffer; contended bouncing thrashes the L1 "

"line. Prefer thread-local / SPSC / per-core state, or a relaxed "

"`std::atomic` when the invariant fits a single word.",

# Bare mutex.lock() / lockForRead() calls -- same physical cost.

(

re.compile(r"\b\w+\.(?:lock|try_lock|lockForRead|lockForWrite|tryLock)\s*\("),

"perf-lock-acquire",

"explicit `.lock()`/`.try_lock()`/`.lockForRead()` call on the "

"hotpath -- same `lock`-prefix RMW cost as the locker types.",

# Integer / float division by a non-literal divisor on the hotpath.

# `idiv`/`udiv` is the slowest ALU op (20-40 cyc Skylake/Zen, not

# pipelined; 12-40 cyc Cortex-A78). When the divisor is constexpr the

# compiler emits a magic-number multiply; the hotpath cost only bites

# when the divisor is a true runtime variable. `sizeof(...)` is

# compile-time and skipped via lookahead. Reciprocal-cache lines

# (`auto inv = 1.0 / x`) are skipped via _is_reciprocal_cache_line.

(

re.compile(r"(?<![*/=<>!&|^])/\s*(?!/)(?!sizeof\b)[A-Za-z_]\w*"),

"perf-divide-runtime-divisor",

"`/` with a non-literal divisor on the hotpath -- division is the "

"slowest ALU op (divsd ~11-22 cyc Skylake, fdiv ~10-40 cyc Cortex-A78; "

"idiv 20-40 cyc, not pipelined). Cache the reciprocal once "

"(`r = 1.0 / d`) and multiply in the loop, or use a bit-shift for "

"power-of-two integer cases.",

# Modulo by a non-literal divisor on the hotpath. Same idiv cost as

# integer divide; power-of-two N can be replaced with `& (N - 1)`.

(

re.compile(r"(?<![%=*/+\-<>!&|^])%\s*[A-Za-z_]\w*"),

"perf-modulo-runtime-divisor",

"`%` with a non-literal divisor on the hotpath -- emits `idiv`/`udiv` "

"(20-40 cyc x86, 12-40 cyc ARM). For power-of-two N use `& (N - 1)` "

"(single-cycle `and`); for runtime divisors hoist out of the loop or "

"use a libdivide-style precomputed magic-number multiply.",

]

# Header line-pattern: a hotpath method declared `virtual`. Every call

# site emits an indirect branch through the vtable; the predictor learns

# monomorphic sites but can't inline, and polymorphic sites mispredict

# (15-20 cycle bubble on x86, similar on ARM). `final` partially helps

# when the dynamic type is known.

_VIRTUAL_HOTPATH_RE = re.compile(

r"\bvirtual\b[^;{]*\b("

+ "|".join(sorted(re.escape(n) for n in _HOTPATH_METHODS))

+ r")\s*\("

)

# Generic atomic-type detector used by the false-sharing rule. Catches

# `std::atomic<T>`, `std::atomic_int`, `std::atomic_flag`, and the Qt

# `QAtomicInt`/`QAtomicPointer<T>`/`QAtomicInteger<T>` family.

_ATOMIC_DECL_RE = re.compile(

r"\b(?:std::atomic(?:_[a-z0-9_]+)?\s*(?:<|\s+m?_?\w)"

r"|std::atomic_flag\b"

r"|QAtomic(?:Int|Pointer|Integer)\b)"

)

# Local fixed-size array declaration with a numeric size, e.g.

# `char buf[8192];`, `double samples[2048] = {};`.

_STACK_ARRAY_RE = re.compile(

r"|quint32|quint64|qreal)\s+"

r"(?:const\s+)?"

r"\w+\s*\[\s*(\d+)\s*\]\s*[;={,]"

)

def _walk_to_function_declarator(decl):

"""Drill through pointer/reference declarators to the innermost

function_declarator. Returns None when the chain doesn't lead to one."""

seen = 0

while decl is not None and seen < 16:

if decl.type == "function_declarator":

return decl

nested = decl.child_by_field_name("declarator")

if nested is None:

return None

decl = nested

seen += 1

return None

def _sink_param_names(func_node, src: bytes) -> set:

"""Return parameter names that this function treats as sinks.

A sink parameter is one where pass-by-value + move is the right call

instead of pass-by-const-ref:

- The body (or a constructor's field initializer list) calls

`std::move(<param>)`.

- The body ends with `return <param>;` AFTER mutating it -- the

param is the function's return value, so the implicit move on

`return` makes the by-value form at least as cheap as

`const T&` + explicit copy."""

names: set = set()

body = func_node.child_by_field_name("body")

if body is not None:

body_text = _node_text(body, src)

for m in re.finditer(r"\bstd::move\s*\(\s*([A-Za-z_]\w*)\s*\)", body_text):

names.add(m.group(1))

# `return <name>;` at any point in the body: param flows out as

# the return value, which the compiler implicitly moves from.

for m in re.finditer(r"\breturn\s+([A-Za-z_]\w*)\s*;", body_text):

names.add(m.group(1))

for c in func_node.children:

if c.type != "field_initializer_list":

continue

for m in re.finditer(

r"\bstd::move\s*\(\s*([A-Za-z_]\w*)\s*\)", _node_text(c, src)

names.add(m.group(1))

return names

def _parameter_perf_findings(func_node, src: bytes, fenced) -> list:

"""Flag heavy types or refcounted smart pointers passed by value.

Applies universally -- by-value copies are wasteful regardless of

whether the function is in the hotpath list.

Sink parameters (those `std::move`'d in the body) are skipped: the

by-value + move idiom is correct C++ for a function that conditionally

keeps a local copy of the argument."""

findings: list = []

decl = func_node.child_by_field_name("declarator")

fdecl = _walk_to_function_declarator(decl)

if fdecl is None:

return findings

params = fdecl.child_by_field_name("parameters")

if params is None:

return findings

sink_params = _sink_param_names(func_node, src)

for param in params.children:

if param.type != "parameter_declaration":

continue

param_type = param.child_by_field_name("type")

param_decl = param.child_by_field_name("declarator")

if param_type is None:

continue

if param_decl is not None and param_decl.type in (

"pointer_declarator",

"reference_declarator",

"abstract_pointer_declarator",

"abstract_reference_declarator",

"rvalue_reference_declarator",

"abstract_rvalue_reference_declarator",

continue

# Resolve the parameter's identifier name (if it has one) so we

# can suppress sink-param idioms.

pname = None

if param_decl is not None:

cur = param_decl

while cur is not None and cur.type != "identifier":

next_cur = None

for c in cur.children:

if c.type == "identifier":

next_cur = c

break

if next_cur is None:

for c in cur.children:

if hasattr(c, "children") and c.children:

next_cur = c

break

cur = next_cur

if cur is not None and cur.type == "identifier":

pname = _node_text(cur, src)

if pname is not None and pname in sink_params:

continue

type_text = _node_text(param_type, src).strip()

base = type_text

for q in ("const ", "constexpr ", "volatile ", "mutable ", "register "):

while base.startswith(q):

base = base[len(q) :].lstrip()

cuts = [i for i in (base.find("<"), base.find(" ")) if i >= 0]

if cuts:

base = base[: min(cuts)]

line = _line_of(param)

if fenced(line):

continue

if base in _HEAVY_TYPES:

findings.append(

Finding(

line,

"perf-large-by-value-param",

f"`{type_text}` passed by value -- forces a copy in the "

f"prologue (atomic ref-bump for Qt COW types: `lock`-prefix "

f"on x86, ldxr+stxr on ARM without LSE; full deep memcpy "

f"for std:: containers). Pass `const {base}&` and copy "

f"only when you genuinely keep a local copy.",

)

elif base in _REFCOUNTED_TYPES:

findings.append(

Finding(

line,

"perf-shared-ptr-by-value",

f"`{type_text}` by value -- two atomic refcount ops per "

f"call (`lock add`/`lock sub` on x86, ~20 cyc each; "

f"ldxr/stxr loop on ARM without LSE/v8.1 atomics). "

f"Pass `const {base}<...>&` and copy only when you "

f"actually store the pointer.",

)

return findings

def _init_only_decl_line_span(body, src: bytes) -> set:

"""Return the set of 1-based line numbers that belong to a declaration

whose initializer runs at most once: `constexpr` (compile-time folded)

or `static const`/`static constexpr` (function-local one-shot init).

Runtime-cost rules (divide, modulo, regex-construct, ...) reason about

the per-call cost of code inside a function body. These declarations

are not on the per-call path -- the optimizer folds `constexpr` and the

runtime evaluates `static const` exactly once -- so the rules must not

fire on their initializer lines, no matter how many physical lines the

initializer spans."""

if body is None:

return set()

exempt: set = set()

for node in _walk(body):

if node.type != "declaration":

continue

specifiers = []

for c in node.children:

if c.type == "storage_class_specifier":

specifiers.append(_node_text(c, src))

elif c.type == "type_qualifier":

specifiers.append(_node_text(c, src))

spec_set = set(s.strip() for s in specifiers)

is_constexpr = "constexpr" in spec_set

is_static_const = "static" in spec_set and "const" in spec_set

if not (is_constexpr or is_static_const):

continue

first = node.start_point[0] + 1

last = node.end_point[0] + 1

for ln in range(first, last + 1):

exempt.add(ln)

return exempt

def _cold_branch_line_span(body, src: bytes) -> set:

"""Return the set of 1-based line numbers inside cold-path branches:

`[[unlikely]]`-attributed statements and `catch_clause` bodies.

Both are reached only on error / overflow / exception, not on the

steady-state hotpath that the perf rules are designed to flag.

`qWarning(...)` inside an overflow branch or a catch block is correct

code, not a hotpath log call."""

if body is None:

return set()

exempt: set = set()

for node in _walk(body):

if node.type == "catch_clause":

cs = node.child_by_field_name("body")

if cs is None:

for c in node.children:

if c.type == "compound_statement":

cs = c

break

if cs is not None:

for ln in range(cs.start_point[0] + 1, cs.end_point[0] + 2):

exempt.add(ln)

continue

if node.type == "attributed_statement":

attr_text = _node_text(node, src)[:64]

if "[[unlikely]]" not in attr_text and "[[gnu::unlikely]]" not in attr_text:

continue

for c in node.children:

if c.type == "attribute_declaration":

continue

for ln in range(c.start_point[0] + 1, c.end_point[0] + 2):

exempt.add(ln)

return exempt

_RECIPROCAL_CACHE_RE = re.compile(r"\b1(?:\.0+f?|\.0+L?|\.0+|\b)\s*/\s*[A-Za-z_(]")

_DIV_OR_MOD_DIVISOR_RE = re.compile(

r"(?<![*/=<>!&|^])[/%]\s*(?!/)(?!sizeof\b)([A-Za-z_]\w*)"

)

def _is_reciprocal_cache_line(scrubbed: str) -> bool:

"""True when the line is a reciprocal-cache declaration like

`const float inv = 1.0f / x;` or `auto r = 1.0 / qMax(...);`.

These are the rule's RECOMMENDED fix for runtime-divisor cost --

cache the reciprocal once, multiply in the loop -- so flagging them

is exactly backwards. We detect by the literal-1 numerator pattern."""

return bool(_RECIPROCAL_CACHE_RE.search(scrubbed))

# Well-known math/system identifiers that resolve to compile-time constants

# even though they're macros (M_PI family) or constants the compiler

# substitutes via the standard library headers. Treating these as

# compile-time means the divisor / modulo rules don't fire on them.

_KNOWN_COMPILE_TIME_NAMES = frozenset(

{

"M_PI",

"M_PI_2",

"M_PI_4",

"M_1_PI",

"M_2_PI",

"M_2_SQRTPI",

"M_E",

"M_LOG2E",

"M_LOG10E",

"M_LN2",

"M_LN10",

"M_SQRT2",

"M_SQRT1_2",

"INT8_MAX",

"INT16_MAX",

"INT32_MAX",

"INT64_MAX",

"UINT8_MAX",

"UINT16_MAX",

"UINT32_MAX",

"UINT64_MAX",

"CHAR_BIT",

"CHAR_MAX",

"CHAR_MIN",

}

)

def _compile_time_constants_in_scope(body, src: bytes) -> set:

"""Walk the function body for `constexpr` declarations and return the

set of their identifier names, plus a fixed set of well-known math/

system macros that resolve to compile-time constants. The divisor /

modulo rules can then skip lines whose divisor resolves to one of

these -- the compiler folds them into a multiply-by-magic-number, no

idiv at runtime."""

if body is None:

return set(_KNOWN_COMPILE_TIME_NAMES)

names: set = set(_KNOWN_COMPILE_TIME_NAMES)

for node in _walk(body):

if node.type != "declaration":

continue

is_constexpr = any(

c.type == "type_qualifier" and _node_text(c, src).strip() == "constexpr"

for c in node.children

)

if not is_constexpr:

continue

for c in node.children:

if c.type != "init_declarator":

continue

decl = c.child_by_field_name("declarator")

if decl is None:

continue

cur = decl

while cur is not None and cur.type != "identifier":

next_cur = None

for cc in cur.children:

if cc.type == "identifier":

next_cur = cc

break

cur = next_cur

if cur is not None and cur.type == "identifier":

names.add(_node_text(cur, src))

return names

def _scan_body_lines(body, src: bytes, fname: str, fenced, patterns) -> list:

"""Run a list of `(regex, kind, message)` triples over each line of a

function body. First-match wins per line so a single problematic

expression doesn't fire every pattern.

Lines skipped (all driven by AST walks for multi-line statements):

- `constexpr` / `static const` declarations -- init code, not per-call

- `[[unlikely]]`-attributed substatement bodies -- cold path

- `catch_clause` bodies -- error / exception path, not steady-state

Lines skipped per-pattern: reciprocal-cache declarations

(`const T inv = 1.0 / x;`) bypass the divide rule -- they ARE the

recommended fix for runtime-divisor cost."""

if body is None:

return []

findings: list = []

body_text = _node_text(body, src)

body_start = body.start_point[0] + 1

exempt_lines = _init_only_decl_line_span(body, src) | _cold_branch_line_span(

body, src

)

constexpr_names = _compile_time_constants_in_scope(body, src)

for j, line in enumerate(body_text.split("\n")):

abs_line = body_start + j

if fenced(abs_line) or abs_line in exempt_lines:

continue

scrubbed = _strip_strings_and_line_comments(line)

is_recip_cache = _is_reciprocal_cache_line(scrubbed)

# If every divisor / modulo on this line resolves to a constexpr

# name we know is in scope, the compiler folds them. Skip the

# divide/modulo runtime rules for this line.

divisors = _DIV_OR_MOD_DIVISOR_RE.findall(scrubbed)

all_compile_time = bool(divisors) and all(

d in constexpr_names for d in divisors

)

for pat, kind, msg in patterns:

if is_recip_cache and kind == "perf-divide-runtime-divisor":

continue

if all_compile_time and kind in (

"perf-divide-runtime-divisor",

"perf-modulo-runtime-divisor",

continue

if pat.search(scrubbed):

findings.append(Finding(abs_line, kind, msg))

break

return findings

def _recursion_findings(func_node, fname: str, body, src: bytes, fenced) -> list:

"""Flag direct self-recursion in a hotpath method. Recursion at kHz

rates blows the i-cache (200+ cyc for an L2 miss), trashes the RAS

predictor (mispredict on every return), and prevents inlining.

Only flags **stack** recursion. The following are NOT recursion and

are skipped:

- Calls inside a `lambda_expression` body (deferred to whichever

executor consumes the lambda; doesn't grow the stack here).

- Qualified calls like `Base::fname(...)` or `Foo::fname(...)`

(statically dispatched to a different function).

- Method calls on a different object (`other.fname(...)`,

`other->fname(...)`); only bare `fname(...)` and `this->fname(...)`

are real self-calls."""

if body is None or not fname or fname not in _HOTPATH_METHODS:

return []

findings: list = []

seen_lines: set = set()

for node in _walk(body):

if node.type != "call_expression":

continue

# Skip calls that live inside a lambda body -- those execute when

# the lambda runs, not on this call's stack frame.

cur = node.parent

in_lambda = False

while cur is not None and cur is not body:

if cur.type == "lambda_expression":

in_lambda = True

break

cur = cur.parent

if in_lambda:

continue

callee = node.child_by_field_name("function")

if callee is None:

continue

# Recognise: `fname(...)` (identifier) or `this->fname(...)` (field_expression

# whose object is `this`). Reject `Foo::fname`, `obj.fname`, `obj->fname`.

is_self = False

if callee.type == "identifier":

is_self = _node_text(callee, src) == fname

elif callee.type == "field_expression":

obj = callee.child_by_field_name("argument")

field = callee.child_by_field_name("field")

if (

obj is not None

and field is not None

and obj.type == "this"

and _node_text(field, src) == fname

is_self = True

if not is_self:

continue

line = node.start_point[0] + 1

if fenced(line) or line in seen_lines:

continue

seen_lines.add(line)

findings.append(

Finding(

line,

"perf-recursive-hotpath",

f"hotpath `{fname}` calls itself -- recursion on a kHz "

f"frame loop blows the i-cache (200+ cyc per L2 miss), "

f"mispredicts the RAS on every return, and prevents the "

f"compiler from inlining. Rewrite iteratively (explicit "

f"work-list / std::stack).",

)

return findings

_RUN_LOOP_COND_RE = re.compile(r"\bm_\w+\s*(?:\.load\s*\(|\)\s*\.\s*load\s*\()")

# Qt event-handler suffixes. These methods fire only on user input, geometry

# changes, focus changes, etc. -- cold paths by construction. Any per-call

# divide/modulo/pow inside them is irrelevant to the steady-state hotpath.

_QT_EVENT_HANDLER_SUFFIXES = (

"Event", # mousePressEvent, wheelEvent, keyPressEvent, paintEvent...

"EventFilter", # eventFilter override

"ChangeEvent", # geometryChange, focusChange, etc.

)

def _is_qt_event_handler(fname: str) -> bool:

"""True when @a fname matches Qt's event-handler naming convention.

Qt's QObject / QWidget / QQuickItem event handlers all end in `Event`

(`mousePressEvent`, `wheelEvent`, `paintEvent`, `geometryChange`,

`eventFilter`, ...). They are dispatched once per user gesture or

window event -- nowhere near the kHz frame rate the perf rules

target."""

if not fname:

return False

for suffix in _QT_EVENT_HANDLER_SUFFIXES:

if fname.endswith(suffix) and len(fname) > len(suffix):

return True

return False

# Method-name patterns for the QQuickPaintedItem / QPainter render path. These

# functions are called at most at the screen refresh rate (~60 Hz) -- two

# orders of magnitude below the kHz frame loop the perf rules target.

_PAINT_METHOD_NAMES = frozenset(

{

"paint",

"render",

}

)

_PAINT_METHOD_PREFIXES = ("draw", "render", "paint")

def _is_paint_method(fname: str) -> bool:

"""True when @a fname is a paint / render method (`paint`, `paintEvent`,

`drawXAxis`, `drawGrid`, `renderTile`, `paintBackground`, ...).

Paint callbacks fire at the screen refresh rate at most. Compared to

the kHz frame hotpath, the per-call cost of one or two divides is

invisible. Locks, divides, etc. on these paths are not the rule's

target."""

if not fname:

return False

if fname in _PAINT_METHOD_NAMES:

return True

for prefix in _PAINT_METHOD_PREFIXES:

if fname.startswith(prefix) and len(fname) > len(prefix):

# Next char must be uppercase to avoid false positives like

# `drained` or `paintbrush`.

tail = fname[len(prefix)]

if tail.isupper():

return True

return False

def _is_constexpr_or_consteval(func_node, src: bytes) -> bool:

"""True when the function carries `constexpr`, `consteval`, or `constinit`

among its specifiers. Such functions are compile-time-evaluable (or are

only meaningful at compile time), so runtime-cost rules don't apply to

their bodies: any non-literal divisor / modulo / pow / etc. that survives

constant folding does so because the function is being called with

runtime arguments at a single specific site that the user is choosing

to keep generic."""

for c in func_node.children:

if c.type in ("type_qualifier", "storage_class_specifier"):

if _node_text(c, src).strip() in ("constexpr", "consteval", "constinit"):

return True

return False

def _is_long_running_loop_function(body, src: bytes) -> bool:

"""Heuristic: the function's body contains a `while`/`for`/`do` loop

whose condition reads a member atomic flag (e.g. `m_running.load()`),

at any nesting level inside the body. That's the canonical

thread-entry / event-loop pattern in this codebase -- the function is

called once per thread start, not per frame, so a 4 KB stack buffer

in front of the loop amortizes.

Descends through `preproc_ifdef` and similar nesting so a function

body that's entirely wrapped in `#ifdef Q_OS_WIN` still matches."""

if body is None:

return False

for node in _walk(body):

if node.type not in ("while_statement", "for_statement", "do_statement"):

continue

cond = node.child_by_field_name("condition")

if cond is None:

for c in node.children:

if c.type == "condition_clause" or c.type == "parenthesized_expression":

cond = c

break

if cond is None:

continue

if _RUN_LOOP_COND_RE.search(_node_text(cond, src)):

return True

return False

def _large_stack_buffer_findings(body, src: bytes, fenced) -> list:

"""Flag local fixed-size arrays > ~4 KB. Stack-frame setup cost,

pollutes L1 (32-48 KB) when the function recurses or is called in a

hot loop with other state already on the stack, and on deep call

paths risks overflow. The 1024-element threshold catches `double[512]`

(4 KB), `int[1024]` (4 KB), `char[4096]` (4 KB) and similar.

Functions that ARE the long-running loop (e.g. `pipeReadLoopWin`,

detected by a top-level `while (m_*.load())`) are exempted: the stack

frame is set up once per thread start, the buffer is reused every

iteration, and the cost amortizes."""

if body is None:

return []

if _is_long_running_loop_function(body, src):

return []

findings: list = []

body_text = _node_text(body, src)

body_start = body.start_point[0] + 1

for j, line in enumerate(body_text.split("\n")):

abs_line = body_start + j

if fenced(abs_line):

continue

scrubbed = _strip_strings_and_line_comments(line)

m = _STACK_ARRAY_RE.search(scrubbed)

if m is None:

continue

try:

n = int(m.group(1))

except ValueError:

continue

if n < 1024:

continue

findings.append(

Finding(

abs_line,

"perf-large-stack-buffer",

f"local array of {n} elements on the stack -- frame-setup "

f"cost, pollutes L1 (32-48 KB) when called in a hot loop, "

f"risks overflow on deep call paths (Windows default 1 MB, "

f"Linux 8 MB). Promote to a member, thread_local, or a "

f"pre-reserved buffer.",

)

return findings

def _adjacent_atomic_findings(class_node, src: bytes, fenced) -> list:

"""Two `std::atomic<>` (or QAtomic*) members within a few lines of

each other almost certainly share a 64-byte cache line. When two

cores write to atomics that share a line, MESI/MOESI invalidations

bounce the line across cores -- a 50-200x slowdown vs the uncontended

case (false sharing). The fix is `alignas(64)` (or

`std::hardware_destructive_interference_size`) on each, or explicit

`char _pad[64];` padding.

Pointer-to-atomic fields are skipped: the pointer itself is set once

at construction, and the actual atomics live behind the indirection

in some other object whose layout we can't reason about from here."""

findings: list = []

body = class_node.child_by_field_name("body")

if body is None:

return findings

prev_line = -100

for child in body.children:

if child.type != "field_declaration":

continue

text = _node_text(child, src)

if not _ATOMIC_DECL_RE.search(text):

prev_line = -100

continue

# Skip pointer-to-atomic and reference-to-atomic fields: the

# atomic that would suffer false sharing lives elsewhere.

if re.search(r"atomic\w*\s*(?:<[^>]*>)?\s*[*&]", text):

prev_line = -100

continue

if "alignas" in text:

prev_line = _line_of(child)

continue

line = _line_of(child)

if not fenced(line) and 0 < line - prev_line <= 4:

findings.append(

Finding(

line,

"perf-false-sharing-risk",

"adjacent atomic members will share a cache line "

"(64 B Intel/AArch64, up to 128 B on Apple Silicon "

"M-series via the 128 B speculative line). Cross-core "

"writes thrash MESI/MOESI invalidations (50-200x slowdown "

"vs uncontended). Add `alignas(64)` / "

"`alignas(std::hardware_destructive_interference_size)` "

"or insert `char _pad[64 - sizeof(prev)];` between them.",

)

prev_line = line

return findings

def _virtual_hotpath_findings(src_text: str, path: Path, fenced) -> list:

"""Header line-scan: a hotpath method declared `virtual`. Every call

site emits a vtable load + indirect branch (5-10 cyc best case, 15-20

cyc misprediction penalty on polymorphic sites) and the compiler

can't inline through it. If there's only one implementation, mark

`final` (devirtualizes when the dynamic type is statically known)

or drop `virtual` entirely.

Skipped when the method is taken as a Qt member-function pointer

View remainder of file in raw view

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

FilesExpand file tree

code_verify_rules.py

Latest commit

History

code_verify_rules.py

File metadata and controls