Skip to content

Commit b6d484c

Browse files
committed
add uniquification of link ref ids with diff URLs and same text
1 parent f63c795 commit b6d484c

File tree

6 files changed

+86
-16
lines changed

6 files changed

+86
-16
lines changed

VERSION.md

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -119,7 +119,12 @@
119119

120120
## 0.64.8
121121

122-
* [ ] Fix: reference links in html to md conversion use the same ref when link text is the same.
122+
* Fix: reference links in html to md conversion use the same ref when link text is the same. Now
123+
the reference id will be generated by adding `_xxx` suffix, where `xxx` is an increasing
124+
integer, starting at 1, and incremented until a unique reference id is generated.
125+
* Add: `UNIQUE_LINK_REF_ID_GENERATOR`, default `(refId, index) -> String.format("%s_%d",
126+
refId, index)`, a `BiFunction<String, Integer, String>` taking refId string and integer
127+
index and returning a string for the "uniquified" ref id to use for a reference link.
123128

124129
## 0.64.6
125130

@@ -2165,6 +2170,7 @@
21652170
[NodeInsertingPostProcessorSample.java]: https://github.com/vsch/flexmark-java/blob/master/flexmark-java-samples/src/com/vladsch/flexmark/java/samples/NodeInsertingPostProcessorSample.java
21662171
[PdfLandscapeConverter.java]: https://github.com/vsch/flexmark-java/blob/master/flexmark-java-samples/src/com/vladsch/flexmark/java/samples/PdfLandscapeConverter.java
21672172
[Prevent StringIndexOutOfBounds in ext-resizable-image by MiniDigger · Pull Request #503 · vsch/flexmark-java · GitHub]: https://github.com/vsch/flexmark-java/pull/503 "Prevent StringIndexOutOfBounds in ext-resizable-image by MiniDigger · Pull Request #503 · vsch/flexmark-java · GitHub"
2173+
[TextCollectingVisitor works better with code blocks by roxspring · Pull Request #575 · vsch/flexmark-java · GitHub]: https://github.com/vsch/flexmark-java/pull/575 "TextCollectingVisitor works better with code blocks by roxspring · Pull Request #575 · vsch/flexmark-java · GitHub"
21682174
[Update to latest maven bundle plugin. Fix for #529 by cziegeler · Pull Request #530 · vsch/flexmark-java · GitHub]: https://github.com/vsch/flexmark-java/pull/530 "Update to latest maven bundle plugin. Fix for #529 by cziegeler · Pull Request #530 · vsch/flexmark-java · GitHub"
21692175
[YouTrack: IDEA-207453]: https://youtrack.jetbrains.com/issue/IDEA-207453 "Add Conversion of ref anchor to UrlFilter for file line navigation"
21702176
[ext-resizable-image: fix images inside links by e-im · Pull Request #543 · vsch/flexmark-java · GitHub]: https://github.com/vsch/flexmark-java/pull/543 "ext-resizable-image: fix images inside links by e-im · Pull Request #543 · vsch/flexmark-java · GitHub"
@@ -2175,5 +2181,4 @@
21752181
[migrate flexmark-java 0_42_x to 0_50_0.xml]: https://github.com/vsch/flexmark-java/blob/master/assets/migrations/migrate%20flexmark-java%200_42_x%20to%200_50_0.xml
21762182
[test parsing long sequence of underscores by niklasf · Pull Request #495 · vsch/flexmark-java · GitHub]: https://github.com/vsch/flexmark-java/pull/495 "test parsing long sequence of underscores by niklasf · Pull Request #495 · vsch/flexmark-java · GitHub"
21772183
[update plugins and configure for Reproducible Builds by hboutemy · Pull Request #507 · vsch/flexmark-java · GitHub]: https://github.com/vsch/flexmark-java/pull/507 "update plugins and configure for Reproducible Builds by hboutemy · Pull Request #507 · vsch/flexmark-java · GitHub"
2178-
[TextCollectingVisitor works better with code blocks by roxspring · Pull Request #575 · vsch/flexmark-java · GitHub]: https://github.com/vsch/flexmark-java/pull/575 "TextCollectingVisitor works better with code blocks by roxspring · Pull Request #575 · vsch/flexmark-java · GitHub"
21792184

flexmark-html2md-converter/src/main/java/com/vladsch/flexmark/html2md/converter/FlexmarkHtmlConverter.java

Lines changed: 25 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@
3434
import org.jsoup.nodes.TextNode;
3535

3636
import java.util.*;
37+
import java.util.function.BiFunction;
3738
import java.util.regex.Matcher;
3839
import java.util.regex.Pattern;
3940

@@ -80,6 +81,10 @@ public class FlexmarkHtmlConverter {
8081
final public static DataKey<String> EOL_IN_TITLE_ATTRIBUTE = new DataKey<>("EOL_IN_TITLE_ATTRIBUTE", " ");
8182
final public static DataKey<String> THEMATIC_BREAK = new DataKey<>("THEMATIC_BREAK", "*** ** * ** ***");
8283

84+
// Format to resolve duplicate ref id for links, use %s for the RefID and %d for the numeric addition to the text
85+
final public static DataKey<BiFunction<String, Integer, String>> UNIQUE_LINK_REF_ID_GENERATOR = new DataKey<>("UNIQUE_LINK_REF_ID_GENERATOR"
86+
, (refId, index) -> String.format("%s_%d", refId, index));
87+
8388
// Render HTML contents - UNWRAPPED
8489
final public static DataKey<String[]> UNWRAPPED_TAGS = new DataKey<>("UNWRAPPED_TAGS", new String[] {
8590
"article",
@@ -544,6 +549,7 @@ private class MainHtmlConverter extends HtmlNodeConverterSubContext {
544549
private @Nullable Parser myParser = null;
545550
final private @NotNull HtmlLinkResolver[] myHtmlLinkResolvers;
546551
final private @NotNull HashMap<String, Reference> myReferenceUrlToReferenceMap; // map of URL to reference node
552+
final private @NotNull HashMap<String, Reference> myReferenceIdToReferenceMap; // map of RefId to reference node
547553
final private @NotNull HashSet<Reference> myExternalReferences; // map of URL to reference node
548554

549555
@Override
@@ -576,6 +582,7 @@ public HtmlConverterState getState() {
576582
//myTrace = true;
577583
myStateStack = new Stack<>();
578584
myReferenceUrlToReferenceMap = new HashMap<>();
585+
myReferenceIdToReferenceMap = new HashMap<>();
579586
myExternalReferences = new HashSet<>();
580587
myState = null;
581588

@@ -630,16 +637,16 @@ private class SubHtmlNodeConverter extends HtmlNodeConverterSubContext implement
630637
}
631638

632639
@Override
633-
public @NotNull DataHolder getOptions() {return myOptions;}
640+
public @NotNull DataHolder getOptions() { return myOptions; }
634641

635642
@Override
636-
public @NotNull HtmlConverterOptions getHtmlConverterOptions() {return myMainNodeRenderer.getHtmlConverterOptions();}
643+
public @NotNull HtmlConverterOptions getHtmlConverterOptions() { return myMainNodeRenderer.getHtmlConverterOptions(); }
637644

638645
@Override
639-
public @NotNull Document getDocument() {return myMainNodeRenderer.getDocument();}
646+
public @NotNull Document getDocument() { return myMainNodeRenderer.getDocument(); }
640647

641648
@Override
642-
public HtmlConverterPhase getFormattingPhase() {return myMainNodeRenderer.getFormattingPhase();}
649+
public HtmlConverterPhase getFormattingPhase() { return myMainNodeRenderer.getFormattingPhase(); }
643650

644651
@Override
645652
public void render(@NotNull Node node) {
@@ -765,6 +772,11 @@ public void delegateRender() {
765772
return myMainNodeRenderer.getReferenceUrlToReferenceMap();
766773
}
767774

775+
@Override
776+
public @NotNull HashMap<String, Reference> getReferenceIdToReferenceMap() {
777+
return myMainNodeRenderer.getReferenceIdToReferenceMap();
778+
}
779+
768780
@Override
769781
public @NotNull HashSet<Reference> getExternalReferences() {
770782
return myMainNodeRenderer.getExternalReferences();
@@ -891,6 +903,11 @@ public void setTrace(boolean trace) {
891903
return myReferenceUrlToReferenceMap;
892904
}
893905

906+
@Override
907+
public @NotNull HashMap<String, Reference> getReferenceIdToReferenceMap() {
908+
return myReferenceIdToReferenceMap;
909+
}
910+
894911
@Override
895912
public @NotNull HashSet<Reference> getExternalReferences() {
896913
return myExternalReferences;
@@ -938,10 +955,10 @@ public Reference getOrCreateReference(@NotNull String url, @NotNull String text,
938955
// create a new one with URL and if no conflict with text as id
939956
String referenceId = text;
940957

941-
if (myReferenceUrlToReferenceMap.containsKey(referenceId)) {
958+
if (myReferenceIdToReferenceMap.containsKey(referenceId)) {
942959
for (int i = 1; ; i++) {
943-
referenceId = text + "_" + i;
944-
if (!myReferenceUrlToReferenceMap.containsKey(referenceId)) {
960+
referenceId = myHtmlConverterOptions.uniqueLinkRefIdGenerator.apply(text, i);
961+
if (!myReferenceIdToReferenceMap.containsKey(referenceId)) {
945962
break;
946963
}
947964
}
@@ -958,6 +975,7 @@ public Reference getOrCreateReference(@NotNull String url, @NotNull String text,
958975
if (firstChild instanceof Reference) {
959976
reference = (Reference) firstChild;
960977
myReferenceUrlToReferenceMap.put(url, reference);
978+
myReferenceIdToReferenceMap.put(referenceId, reference);
961979
return reference;
962980
}
963981
return null;

flexmark-html2md-converter/src/main/java/com/vladsch/flexmark/html2md/converter/HtmlConverterOptions.java

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
import org.jetbrains.annotations.NotNull;
1010

1111
import java.util.Map;
12+
import java.util.function.BiFunction;
1213
import java.util.regex.Pattern;
1314

1415
@SuppressWarnings({ "WeakerAccess" })
@@ -61,6 +62,7 @@ public class HtmlConverterOptions implements MutableDataSetter {
6162
public String nbspText;
6263
public String thematicBreak;
6364
public String outputAttributesNamesRegex;
65+
public BiFunction<String, Integer, String> uniqueLinkRefIdGenerator;
6466
public Pattern outputAttributesNamesRegexPattern;
6567
public String outputIdAttributeRegex;
6668
public Pattern outputIdAttributeRegexPattern;
@@ -125,6 +127,7 @@ public HtmlConverterOptions(HtmlConverterOptions other) {
125127
nbspText = other.nbspText;
126128
thematicBreak = other.thematicBreak;
127129
outputAttributesNamesRegex = other.outputAttributesNamesRegex;
130+
uniqueLinkRefIdGenerator = other.uniqueLinkRefIdGenerator;
128131
outputAttributesNamesRegexPattern = other.outputAttributesNamesRegexPattern;
129132
tableCellAlignmentMap = other.tableCellAlignmentMap;
130133
tableOptions = other.tableOptions;
@@ -193,6 +196,7 @@ public HtmlConverterOptions(DataHolder options) {
193196
thematicBreak = FlexmarkHtmlConverter.THEMATIC_BREAK.get(options);
194197
outputAttributesNamesRegex = FlexmarkHtmlConverter.OUTPUT_ATTRIBUTES_NAMES_REGEX.get(options);
195198
outputAttributesNamesRegexPattern = Pattern.compile(outputAttributesNamesRegex);
199+
uniqueLinkRefIdGenerator = FlexmarkHtmlConverter.UNIQUE_LINK_REF_ID_GENERATOR.get(options);
196200
outputIdAttributeRegex = FlexmarkHtmlConverter.OUTPUT_ID_ATTRIBUTE_REGEX.get(options);
197201
outputIdAttributeRegexPattern = Pattern.compile(outputIdAttributeRegex);
198202
tableCellAlignmentMap = FlexmarkHtmlConverter.TABLE_CELL_ALIGNMENT_MAP.get(options);
@@ -254,6 +258,7 @@ public MutableDataHolder setIn(@NotNull MutableDataHolder dataHolder) {
254258
dataHolder.set(FlexmarkHtmlConverter.NBSP_TEXT, nbspText);
255259
dataHolder.set(FlexmarkHtmlConverter.THEMATIC_BREAK, thematicBreak);
256260
dataHolder.set(FlexmarkHtmlConverter.OUTPUT_ATTRIBUTES_NAMES_REGEX, outputAttributesNamesRegex);
261+
dataHolder.set(FlexmarkHtmlConverter.UNIQUE_LINK_REF_ID_GENERATOR, uniqueLinkRefIdGenerator);
257262
dataHolder.set(FlexmarkHtmlConverter.TABLE_CELL_ALIGNMENT_MAP, tableCellAlignmentMap);
258263
dataHolder.set(FlexmarkHtmlConverter.OUTPUT_ID_ATTRIBUTE_REGEX, outputIdAttributeRegex);
259264
dataHolder.set(FlexmarkHtmlConverter.EXT_MATH, extMath);

flexmark-html2md-converter/src/main/java/com/vladsch/flexmark/html2md/converter/HtmlNodeConverterContext.java

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -104,6 +104,8 @@ public interface HtmlNodeConverterContext extends NodeContext<Node, HtmlNodeConv
104104

105105
@NotNull HashMap<String, Reference> getReferenceUrlToReferenceMap();
106106

107+
@NotNull HashMap<String, Reference> getReferenceIdToReferenceMap();
108+
107109
@NotNull HashSet<Reference> getExternalReferences();
108110

109111
boolean isTrace();

flexmark-html2md-converter/src/test/java/com/vladsch/flexmark/html2md/converter/HtmlConverterTest.java

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,7 @@ public abstract class HtmlConverterTest extends ComboSpecTestCase {
6666
optionsMap.put("links-none", new MutableDataSet().set(FlexmarkHtmlConverter.EXT_INLINE_LINK, LinkConversion.NONE));
6767
optionsMap.put("links-exp", new MutableDataSet().set(FlexmarkHtmlConverter.EXT_INLINE_LINK, LinkConversion.MARKDOWN_EXPLICIT));
6868
optionsMap.put("links-ref", new MutableDataSet().set(FlexmarkHtmlConverter.EXT_INLINE_LINK, LinkConversion.MARKDOWN_REFERENCE));
69+
optionsMap.put("links-ref-uniquifier", new MutableDataSet().set(FlexmarkHtmlConverter.UNIQUE_LINK_REF_ID_GENERATOR, (refId, index) -> String.format("%s - %d", refId, index)));
6970
optionsMap.put("links-text", new MutableDataSet().set(FlexmarkHtmlConverter.EXT_INLINE_LINK, LinkConversion.TEXT));
7071
optionsMap.put("links-html", new MutableDataSet().set(FlexmarkHtmlConverter.EXT_INLINE_LINK, LinkConversion.HTML));
7172
optionsMap.put("img-none", new MutableDataSet().set(FlexmarkHtmlConverter.EXT_INLINE_IMAGE, LinkConversion.NONE));

flexmark-html2md-converter/src/test/resources/flexmark_html_converter_spec.md

Lines changed: 46 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1399,9 +1399,47 @@ As ref
13991399
````````````````````````````````
14001400

14011401

1402+
References with text as ref id
1403+
1404+
```````````````````````````````` example(Links: 31) options(links-ref)
1405+
[Link 1][]
1406+
1407+
[Link 1]: http://example.com
1408+
1409+
.
1410+
<a href="http://example.com">Link 1</a>
1411+
````````````````````````````````
1412+
1413+
1414+
References with text as ref id, duplicated
1415+
1416+
```````````````````````````````` example(Links: 32) options(links-ref)
1417+
[Link 1][] [Link 1][Link 1_1]
1418+
1419+
[Link 1]: http://example.com
1420+
[Link 1_1]: http://example.com/link2
1421+
1422+
.
1423+
<a href="http://example.com">Link 1</a> <a href="http://example.com/link2">Link 1</a>
1424+
````````````````````````````````
1425+
1426+
1427+
References with text as ref id, duplicated
1428+
1429+
```````````````````````````````` example(Links: 33) options(links-ref, links-ref-uniquifier)
1430+
[Link 1][] [Link 1][Link 1 - 1]
1431+
1432+
[Link 1]: http://example.com
1433+
[Link 1 - 1]: http://example.com/link2
1434+
1435+
.
1436+
<a href="http://example.com">Link 1</a> <a href="http://example.com/link2">Link 1</a>
1437+
````````````````````````````````
1438+
1439+
14021440
custom resolver
14031441

1404-
```````````````````````````````` example(Links: 31) options(links-ref, link-resolver)
1442+
```````````````````````````````` example(Links: 34) options(links-ref, link-resolver)
14051443
[http://example.com][]
14061444
14071445
[http://example.com]: https://example.com 'Title'
@@ -1411,14 +1449,14 @@ custom resolver
14111449
````````````````````````````````
14121450

14131451

1414-
```````````````````````````````` example(Links: 32) options(links-ref, link-resolver)
1452+
```````````````````````````````` example(Links: 35) options(links-ref, link-resolver)
14151453
<https://example.com>
14161454
.
14171455
<a href="http://example.com">http://example.com</a>
14181456
````````````````````````````````
14191457

14201458

1421-
```````````````````````````````` example(Links: 33) options(links-ref)
1459+
```````````````````````````````` example(Links: 36) options(links-ref)
14221460
[\[Text **Bold**\]][]
14231461
14241462
[\[Text **Bold**\]]: http://example.com
@@ -1428,7 +1466,7 @@ custom resolver
14281466
````````````````````````````````
14291467

14301468

1431-
```````````````````````````````` example(Links: 34) options(links-ref)
1469+
```````````````````````````````` example(Links: 37) options(links-ref)
14321470
[![alt](image.png)](http://example.com)
14331471
.
14341472
<a href="http://example.com"><img src="image.png" alt="alt"></a>
@@ -1437,20 +1475,20 @@ custom resolver
14371475

14381476
As ref re-use document
14391477

1440-
```````````````````````````````` example(Links: 35) options(no-autolinks, links-ref, for-document)
1478+
```````````````````````````````` example(Links: 38) options(no-autolinks, links-ref, for-document)
14411479
[http://example.com][example.com]
14421480
.
14431481
<a href="http://example.com">http://example.com</a>
14441482
````````````````````````````````
14451483

14461484

1447-
```````````````````````````````` example(Links: 36) options(links-none)
1485+
```````````````````````````````` example(Links: 39) options(links-none)
14481486
.
14491487
<a href="http://example.com">http://example.com</a>
14501488
````````````````````````````````
14511489

14521490

1453-
```````````````````````````````` example Links: 37
1491+
```````````````````````````````` example Links: 40
14541492
[](#30xxx93---bug-fix-release)
14551493
.
14561494
<a href="#30xxx93---bug-fix-release"></a>
@@ -3544,6 +3582,7 @@ special symbols: \*~^&<>[]|`
35443582
</code></pre>
35453583
````````````````````````````````
35463584

3585+
35473586
## Skipped Fenced Code
35483587

35493588
```````````````````````````````` example(Skipped Fenced Code: 1) options(skip-fenced-code)

0 commit comments

Comments
 (0)