Skip to content

Commit fde9ab2

Browse files
authored
Merge pull request #2539 from openzim/remove-mini-citation
Remove any citation material from article content in nodet flavor
2 parents 3650729 + 64d8953 commit fde9ab2

File tree

2 files changed

+28
-1
lines changed

2 files changed

+28
-1
lines changed

Changelog

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,8 @@ Unreleased:
4242
* CHANGED: Rework file downloads to reduce pressure on servers and better handle throttling (@benoit74 #2377)
4343
* FIX: Automatically delete old Redis lists (@benoit74 #1906)
4444
* FIX: Also retry 524 HTTP errors - usually coming from Cloudflare handling of timeouts (@benoit74 #2526)
45-
# FIX: Remove any iframe in article content (@benoit74 #2537)
45+
* FIX: Remove any iframe in article content (@benoit74 #2537)
46+
* FIX: Remove any citation material from article content in nodet flavor (@benoit74 #2520)
4647

4748
1.16.0:
4849
* CHANGED: ActionParse renderer is now the preferred one when available (@benoit74 #2183)

src/renderers/abstract.renderer.ts

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -688,6 +688,28 @@ export abstract class Renderer {
688688
}
689689
}
690690

691+
private removeCitations(parsoidDoc: DominoElement) {
692+
// Remove all citation-related material
693+
694+
const sups: DominoElement[] = Array.from(parsoidDoc.getElementsByTagName('sup'))
695+
for (const sup of sups) {
696+
if (!(sup.getAttribute('class') || '').includes('reference')) {
697+
continue
698+
}
699+
DU.deleteNode(sup)
700+
}
701+
702+
const citeErrors: DominoElement[] = Array.from(parsoidDoc.getElementsByClassName('mw-ext-cite-error'))
703+
for (const citeError of citeErrors) {
704+
DU.deleteNode(citeError)
705+
}
706+
707+
const references: DominoElement[] = Array.from(parsoidDoc.getElementsByClassName('mw-references-wrap'))
708+
for (const reference of references) {
709+
DU.deleteNode(reference)
710+
}
711+
}
712+
691713
private clearLinkAndInputTags(parsoidDoc: DominoElement, filtersConfig: any, dump: Dump) {
692714
/* Don't need <link> and <input> tags */
693715
const nodesToDelete: Array<{ class?: string; tag?: string; filter?: (n: any) => boolean }> = [{ tag: 'link' }, { tag: 'input' }]
@@ -792,6 +814,10 @@ export abstract class Renderer {
792814
private applyOtherTreatments(parsoidDoc: DominoElement, dump: Dump, articleId: string) {
793815
this.removeIframeTags(parsoidDoc)
794816

817+
if (dump.nodet) {
818+
this.removeCitations(parsoidDoc)
819+
}
820+
795821
const filtersConfig = config.filters
796822
this.clearLinkAndInputTags(parsoidDoc, filtersConfig, dump)
797823

0 commit comments

Comments
 (0)