@@ -8,11 +8,11 @@ const uniq = (array) => Array.from(new Set(array));
88// The XML structure used by Perseids for alignments includes some gotchas:
99// * Word IDs are not globally unique
1010// * It is possible for A to be aligned with B without B being aligned with A
11- // * References do not have a field for ` lnum` , meaning that alignments with more
11+ // * References do not have a field for lnum, meaning that alignments with more
1212// than one language are impossible
1313// Future versions of Perseids should improve on the format, but this application needs
1414// to be backwards compatible. This function converts the XML into an object that looks
15- // like this (assuming languages have `lnum`s of `L1` and `L2` :
15+ // like this (assuming languages have lnums of L1 and L2) :
1616// {
1717// L1: {
1818// '1-1': {
@@ -48,6 +48,13 @@ const buildIdMap = (alignedText, sentence, id) => {
4848 idMap [ outerLnum ] [ outerN ] [ innerLnum ] . add ( innerN ) ;
4949 } ;
5050
51+ // This is the hairiest part of the algorithm. But I'm not sure if there's a better way
52+ // to do things. When a word in L1 aligns with a word in L2, we add all members from
53+ // the L1 set to the L2 set. (At this time, JavaScript has no Set unify operation.)
54+ // We then actually set the pointer for L1 and L2 to the *same Set*. This means
55+ // that any future additions affect both L1 and L2. The reason we do this is to account
56+ // for "sibling" words: two words with the same lnum that align to the same word in
57+ // the other lnum.
5158 const unifySets = ( lnum1 , n1 , lnum2 , n2 ) => {
5259 idMap [ lnum1 ] [ n1 ] [ lnum1 ] . forEach ( ( v ) => idMap [ lnum2 ] [ n2 ] [ lnum1 ] . add ( v ) ) ;
5360 idMap [ lnum1 ] [ n1 ] [ lnum2 ] . forEach ( ( v ) => idMap [ lnum2 ] [ n2 ] [ lnum2 ] . add ( v ) ) ;
@@ -56,19 +63,19 @@ const buildIdMap = (alignedText, sentence, id) => {
5663 idMap [ lnum2 ] [ n2 ] [ lnum2 ] = idMap [ lnum1 ] [ n1 ] [ lnum2 ] ;
5764 } ;
5865
59- sentence . wds . forEach ( wd => {
66+ ( sentence . wds || [ ] ) . forEach ( wd => {
6067 const lnum = wd . $ . lnum ;
6168
6269 wd . w . forEach ( word => {
6370 const n = word . $ . n ;
71+ // Align every word with itself
72+ addToSet ( lnum , n , lnum , n ) ;
6473
6574 if ( word . refs ) {
6675 const nrefs = word . refs [ 0 ] . $ . nrefs . split ( / \s + / ) ;
6776
6877 lnums . forEach ( lnumRef => {
6978 if ( lnumRef !== lnum ) {
70- addToSet ( lnum , n , lnum , n ) ;
71-
7279 nrefs . forEach ( nref => {
7380 addToSet ( lnum , n , lnumRef , nref ) ;
7481 addToSet ( lnumRef , nref , lnum , n ) ;
@@ -90,7 +97,9 @@ const WrappedSentence = ({ id, json, children }) => {
9097
9198 useEffect ( ( ) => {
9299 const alignedText = json [ 'aligned-text' ] ;
93- const sentence = alignedText . sentence . find ( ( { $ : { id : sentenceId } } ) => sentenceId === id ) ;
100+ const sentence = alignedText
101+ ? alignedText . sentence . find ( ( { $ : { id : sentenceId } } ) => sentenceId === id )
102+ : { } ;
94103
95104 setSentence ( sentence ) ;
96105 setIdMap ( buildIdMap ( alignedText , sentence ) ) ;
0 commit comments