-
-
Notifications
You must be signed in to change notification settings - Fork 1.8k
feat(core): Add db.query.summary functionality #21670
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
875050c
c58d7fb
7bd0b64
34b5ca7
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,133 @@ | ||
| const MAX_SUMMARY_LENGTH = 255; | ||
|
|
||
| const TABLE_NAME_CHARS = /[^\s(,;)]+/; | ||
| const TABLE_NAME = TABLE_NAME_CHARS.source; | ||
|
|
||
| const DDL_RE = new RegExp( | ||
| `^\\s*(?<operation>(?:CREATE|DROP)\\s+(?:TABLE|INDEX)|ALTER\\s+TABLE)(?:\\s+IF\\s+(?:NOT\\s+)?EXISTS)?\\s+(?<table>${TABLE_NAME})`, | ||
| 'i', | ||
| ); | ||
|
|
||
| const INSERT_RE = new RegExp(`^\\s*(?<operation>INSERT)\\s+INTO\\s+(?<table>${TABLE_NAME})`, 'i'); | ||
| const UPDATE_RE = new RegExp(`^\\s*(?<operation>UPDATE)\\s+(?<table>${TABLE_NAME})`, 'i'); | ||
| const DELETE_RE = new RegExp(`^\\s*(?<operation>DELETE)\\s+FROM\\s+(?<table>${TABLE_NAME})`, 'i'); | ||
|
|
||
| const SELECT_RE = /^\s*\(?\s*(?<operation>SELECT)\b/i; | ||
|
|
||
| const PRAGMA_RE = /^\s*(?<operation>PRAGMA)\s+(?<command>\S+)/i; | ||
|
|
||
| const TOKEN_RE = /\b(?:FROM|JOIN)\s+|\(\s*(SELECT)\b|\b(?:UNION|INTERSECT|EXCEPT|MINUS)\s+(?:ALL\s+)?(SELECT)\b/gi; | ||
| const QUOTED_OR_PLAIN_TABLE_RE = /^(?:"[^"]*"|'[^']*'|[^\s(,;)]+)/; | ||
| const COMMA_TABLE_RE = /^\s*,\s*((?:"[^"]*"|'[^']*'|[^\s(,;)]+))/; | ||
| const SUBQUERY_SELECT_RE = /^\(\s*(SELECT)\b/i; | ||
|
|
||
| /** | ||
| * Derives a low-cardinality summary from a SQL query for use as `db.query.summary`. | ||
| * | ||
| * Conforms to the OTEL semantic convention for generating query summaries: | ||
| * - Preserves original case of operations and identifiers (no normalization) | ||
| * - Uses format: `{operation} {target1} {target2} ...` | ||
| * - Strips filler words (INTO, FROM) from the operation | ||
| * - Captures multiple table targets (JOINs) | ||
| * - Handles INSERT...SELECT with both targets | ||
| * - Truncates to 255 characters without splitting mid-value | ||
| * | ||
| * @see https://opentelemetry.io/docs/specs/semconv/database/database-spans/#generating-a-summary-of-the-query | ||
| */ | ||
| export function getSqlQuerySummary(query: string | undefined): string | undefined { | ||
| if (!query) { | ||
| return undefined; | ||
| } | ||
|
|
||
| const pragmaMatch = PRAGMA_RE.exec(query); | ||
| if (pragmaMatch?.groups?.['operation'] && pragmaMatch.groups['command']) { | ||
| const operation = pragmaMatch.groups['operation']; | ||
| const command = pragmaMatch.groups['command']; | ||
| const parenIdx = command.indexOf('('); | ||
| return truncate(`${operation} ${parenIdx >= 0 ? command.substring(0, parenIdx) : command}`); | ||
| } | ||
|
|
||
| const ddlMatch = DDL_RE.exec(query); | ||
| if (ddlMatch?.groups?.['operation'] && ddlMatch.groups['table']) { | ||
| return truncate(`${ddlMatch.groups['operation']} ${ddlMatch.groups['table']}`); | ||
| } | ||
|
|
||
| const insertMatch = INSERT_RE.exec(query); | ||
| if (insertMatch?.groups?.['operation'] && insertMatch.groups['table']) { | ||
| const parts = [insertMatch.groups['operation'], insertMatch.groups['table']]; | ||
| const rest = query.slice(insertMatch[0].length); | ||
| const subSelect = /\b(SELECT)\b/i.exec(rest); | ||
| if (subSelect?.[1]) { | ||
| parts.push(subSelect[1]); | ||
| const selectTables = extractTableNames(rest.slice(subSelect.index)); | ||
| parts.push(...selectTables); | ||
| } | ||
| return truncate(parts.join(' ')); | ||
| } | ||
|
|
||
| const updateMatch = UPDATE_RE.exec(query); | ||
| if (updateMatch?.groups?.['operation'] && updateMatch.groups['table']) { | ||
| return truncate(`${updateMatch.groups['operation']} ${updateMatch.groups['table']}`); | ||
| } | ||
|
|
||
| const deleteMatch = DELETE_RE.exec(query); | ||
| if (deleteMatch?.groups?.['operation'] && deleteMatch.groups['table']) { | ||
| return truncate(`${deleteMatch.groups['operation']} ${deleteMatch.groups['table']}`); | ||
| } | ||
|
|
||
| const selectMatch = SELECT_RE.exec(query); | ||
| if (selectMatch?.groups?.['operation']) { | ||
| const tables = extractTableNames(query.slice(selectMatch[0].length)); | ||
| if (tables.length > 0) { | ||
| return truncate(`${selectMatch.groups['operation']} ${tables.join(' ')}`); | ||
| } | ||
| return selectMatch.groups['operation']; | ||
| } | ||
|
|
||
| return truncate(query.trim().split(/\s+/)[0] ?? query); | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Whitespace-only query mishandledLow Severity
Reviewed by Cursor Bugbot for commit 34b5ca7. Configure here. |
||
| } | ||
|
|
||
| function extractTableNames(sql: string): string[] { | ||
| const tables: string[] = []; | ||
| TOKEN_RE.lastIndex = 0; | ||
| let match: RegExpExecArray | null; | ||
|
|
||
| while ((match = TOKEN_RE.exec(sql)) !== null) { | ||
| if (match[1] || match[2]) { | ||
| tables.push((match[1] || match[2])!); | ||
|
cursor[bot] marked this conversation as resolved.
|
||
| continue; | ||
| } | ||
|
|
||
| const rest = sql.slice(match.index + match[0].length); | ||
|
|
||
| const subqueryMatch = SUBQUERY_SELECT_RE.exec(rest); | ||
| if (subqueryMatch?.[1]) { | ||
| tables.push(subqueryMatch[1]); | ||
| TOKEN_RE.lastIndex = match.index + match[0].length + subqueryMatch[0].length; | ||
| continue; | ||
| } | ||
|
|
||
| const tableMatch = QUOTED_OR_PLAIN_TABLE_RE.exec(rest); | ||
| if (!tableMatch) continue; | ||
| tables.push(tableMatch[0]); | ||
|
|
||
| let afterTable = rest.slice(tableMatch[0].length); | ||
| let commaMatch: RegExpExecArray | null; | ||
| while ((commaMatch = COMMA_TABLE_RE.exec(afterTable)) !== null) { | ||
| if (!commaMatch[1]) break; | ||
| tables.push(commaMatch[1]); | ||
| afterTable = afterTable.slice(commaMatch[0].length); | ||
| } | ||
| } | ||
|
|
||
| return tables; | ||
| } | ||
|
|
||
| function truncate(summary: string): string { | ||
| if (summary.length <= MAX_SUMMARY_LENGTH) { | ||
| return summary; | ||
| } | ||
| const truncated = summary.substring(0, MAX_SUMMARY_LENGTH); | ||
| const lastSpace = truncated.lastIndexOf(' '); | ||
| return lastSpace > 0 ? truncated.substring(0, lastSpace) : truncated; | ||
| } | ||


There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
INSERT misreads SELECT in strings
Medium Severity
For
INSERTstatements, the follow-up scan uses a word-boundarySELECTmatch on the remainder of the query after the target table. That can matchSELECTinside quoted string literals inVALUESclauses, so plainINSERT ... VALUES (...)queries may be summarized likeINSERT ... SELECT ...with extra tokens fromextractTableNames, skewingdb.query.summarycardinality.Reviewed by Cursor Bugbot for commit e94685f. Configure here.