Skip to content

Commit aa6f077

Browse files
committed
align words with no refs with themselves
1 parent 6f1ef94 commit aa6f077

File tree

2 files changed

+21
-12
lines changed

2 files changed

+21
-12
lines changed

example/src/App/App.js

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -5,11 +5,11 @@ import 'alignment-react/dist/index.css';
55

66
const xml = `
77
<aligned-text xmlns="http://alpheios.net/namespaces/aligned-text">
8-
<language lnum="en" xml:lang="en" dir="ltr"/>
9-
<language lnum="fr" xml:lang="fr" dir="ltr"/>
8+
<language lnum="L1" xml:lang="eng" dir="ltr"/>
9+
<language lnum="L2" xml:lang="fre" dir="ltr"/>
1010
<comment class="title">alignment</comment>
1111
<sentence id="1" document_id="">
12-
<wds lnum="en">
12+
<wds lnum="L1">
1313
<comment class="uri"/>
1414
<w n="1-1">
1515
<text>hello</text>
@@ -20,7 +20,7 @@ const xml = `
2020
<refs nrefs="1-2 1-3"/>
2121
</w>
2222
</wds>
23-
<wds lnum="fr">
23+
<wds lnum="L2">
2424
<comment class="uri"/>
2525
<w n="1-1">
2626
<text>bonjour</text>
@@ -42,8 +42,8 @@ const xml = `
4242
const App = () => (
4343
<Alignment alignment={xml}>
4444
<Sentence id="1">
45-
<Segment lnum="en" />
46-
<Segment lnum="fr" />
45+
<Segment lnum="L1" />
46+
<Segment lnum="L2" />
4747
</Sentence>
4848
</Alignment>
4949
);

src/components/Alignment/Sentence/Sentence.js

Lines changed: 15 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -8,11 +8,11 @@ const uniq = (array) => Array.from(new Set(array));
88
// The XML structure used by Perseids for alignments includes some gotchas:
99
// * Word IDs are not globally unique
1010
// * It is possible for A to be aligned with B without B being aligned with A
11-
// * References do not have a field for `lnum`, meaning that alignments with more
11+
// * References do not have a field for lnum, meaning that alignments with more
1212
// than one language are impossible
1313
// Future versions of Perseids should improve on the format, but this application needs
1414
// to be backwards compatible. This function converts the XML into an object that looks
15-
// like this (assuming languages have `lnum`s of `L1` and `L2`:
15+
// like this (assuming languages have lnums of L1 and L2):
1616
// {
1717
// L1: {
1818
// '1-1': {
@@ -48,6 +48,13 @@ const buildIdMap = (alignedText, sentence, id) => {
4848
idMap[outerLnum][outerN][innerLnum].add(innerN);
4949
};
5050

51+
// This is the hairiest part of the algorithm. But I'm not sure if there's a better way
52+
// to do things. When a word in L1 aligns with a word in L2, we add all members from
53+
// the L1 set to the L2 set. (At this time, JavaScript has no Set unify operation.)
54+
// We then actually set the pointer for L1 and L2 to the *same Set*. This means
55+
// that any future additions affect both L1 and L2. The reason we do this is to account
56+
// for "sibling" words: two words with the same lnum that align to the same word in
57+
// the other lnum.
5158
const unifySets = (lnum1, n1, lnum2, n2) => {
5259
idMap[lnum1][n1][lnum1].forEach((v) => idMap[lnum2][n2][lnum1].add(v));
5360
idMap[lnum1][n1][lnum2].forEach((v) => idMap[lnum2][n2][lnum2].add(v));
@@ -56,19 +63,19 @@ const buildIdMap = (alignedText, sentence, id) => {
5663
idMap[lnum2][n2][lnum2] = idMap[lnum1][n1][lnum2];
5764
};
5865

59-
sentence.wds.forEach(wd => {
66+
(sentence.wds || []).forEach(wd => {
6067
const lnum = wd.$.lnum;
6168

6269
wd.w.forEach(word => {
6370
const n = word.$.n;
71+
// Align every word with itself
72+
addToSet(lnum, n, lnum, n);
6473

6574
if (word.refs) {
6675
const nrefs = word.refs[0].$.nrefs.split(/\s+/);
6776

6877
lnums.forEach(lnumRef => {
6978
if (lnumRef !== lnum) {
70-
addToSet(lnum, n, lnum, n);
71-
7279
nrefs.forEach(nref => {
7380
addToSet(lnum, n, lnumRef, nref);
7481
addToSet(lnumRef, nref, lnum, n);
@@ -90,7 +97,9 @@ const WrappedSentence = ({ id, json, children }) => {
9097

9198
useEffect(() => {
9299
const alignedText = json['aligned-text'];
93-
const sentence = alignedText.sentence.find(({ $: { id: sentenceId }}) => sentenceId === id);
100+
const sentence = alignedText
101+
? alignedText.sentence.find(({ $: { id: sentenceId }}) => sentenceId === id)
102+
: {};
94103

95104
setSentence(sentence);
96105
setIdMap(buildIdMap(alignedText, sentence));

0 commit comments

Comments
 (0)