@@ -1019,79 +1019,103 @@ def get_title(self):
10191019
10201020 # ------------------------------------------------------------------
10211021 def get_refs (self ):
1022- refType = 'unk'
1023- refs = {}
1024- typemap = {
1025- 'normative' : 'norm' ,
1026- 'informative' : 'info' ,
1027- 'informational' : 'info' ,
1028- 'non-normative' : 'info' ,
1029- None : 'old'
1030- }
1031- # Bill's horrible "references section" regexps, built up over lots of years
1032- # of fine tuning for different formats.
1033- # Examples:
1034- # Appendix A. References:
1035- # A.1. Informative References:
1036- sectionre = re .compile ( r'(?i)(?:Appendix\s+)?(?:(?:[A-Z]\.)?[0-9.]*\s+)?(?:(\S+)\s*)?references:?$' )
1037- # 9.1 Normative
1038- sectionre2 = re .compile ( r'(?i)(?:(?:[A-Z]\.)?[0-9.]*\s+)?(\S+ormative)$' )
1039- # One other reference section type seen:
1040- sectionre3 = re .compile ( r'(?i)References \((\S+ormative)\)$' )
1041- # An Internet-Draft reference.
1042- idref = re .compile ( r'(?i)\b(draft-(?:[-\w]+(?=-\d\d)|[-\w]+))(-\d\d)?\b' )
1043- # An RFC-and-other-series reference.
1044- rfcref = re .compile ( r'(?i)\b(rfc|std|bcp|fyi)[- ]?(\d+)\b' )
1022+ # Bill's horrible "references section" regexps, built up over lots of years
1023+ # of fine tuning for different formats.
1024+ # Examples:
1025+ # Appendix A. References:
1026+ # A.1. Informative References:
1027+ sectionre = re .compile ( r'(?i)(?:Appendix\s+)?(?:(?:[A-Z]\.)?[0-9.]*\s+)?(?:(\S+)\s*)?references:?$' )
1028+ # 9.1 Normative
1029+ sectionre2 = re .compile ( r'(?i)(?:(?:[A-Z]\.)?[0-9.]*\s+)?(\S+ormative)$' )
1030+ # One other reference section type seen:
1031+ sectionre3 = re .compile ( r'(?i)References \((\S+ormative)\)$' )
1032+ # An Internet-Draft reference.
1033+ idref = re .compile ( r'(?i)\b(draft-(?:[-\w]+(?=-\d\d)|[-\w]+))(-\d\d)?\b' )
1034+ # An RFC-and-other-series reference.
1035+ rfcref = re .compile ( r'(?i)\b(rfc|std|bcp|fyi)[- ]?(\d+)\b' )
10451036 # False positives for std
10461037 not_our_std_ref = re .compile ( r'(?i)((\b(n?csc|fed|mil|is-j)-std\b)|(\bieee\s*std\d*\b)|(\bstd\s+802\b))' )
1047- # An Internet-Draft or series reference hyphenated by a well-meaning line break.
1048- eol = re .compile ( r'(?i)\b(draft[-\w]*-|rfc|std|bcp|fyi)$' )
1038+ # An Internet-Draft or series reference hyphenated by a well-meaning line break.
1039+ eol = re .compile ( r'(?i)\b(draft[-\w]*-|rfc|std|bcp|fyi)$' )
10491040 # std at the front of a line can hide things like IEEE STD or MIL-STD
10501041 std_start = re .compile ( r'(?i)std\n*\b' )
10511042
1052- for i in range ( 15 , len ( self .lines ) ):
1053- line = self .lines [ i ].strip ()
1054- m = sectionre .match ( line )
1055- if m :
1056- match = m .group ( 1 )
1057- if match is not None :
1058- match = match .lower ()
1059- refType = typemap .get ( match , 'unk' )
1060- continue
1061- m = sectionre2 .match ( line )
1062- if m :
1063- refType = typemap .get ( m .group ( 1 ).lower (), 'unk' )
1064- continue
1065- m = sectionre3 .match ( line )
1066- if m :
1067- refType = typemap .get ( m .group ( 1 ).lower (), 'unk' )
1068- continue
1069- # If something got split badly, rejoin it.
1070- if eol .search ( line ) and i < len ( self .lines ) - 1 :
1071- line += self .lines [ i + 1 ].lstrip ()
1072- m = idref .search ( line )
1073- if m :
1074- draft = m .group ( 1 )
1075- refs [ draft ] = refType
1076- continue
1077- m = rfcref .search ( line )
1078- if m :
1079- ( series , number ) = m .groups ()
1080- if series .lower ()== 'std' and std_start .search (line ) and i > 15 :
1081- line = self .lines [i - 1 ].rstrip ()+ line
1082- if series .lower ()!= 'std' or not not_our_std_ref .search ( line ):
1083- name = series .lower () + number .lstrip ( '0' )
1084- refs [ name ] = refType
1085- continue
1086- # References to BCP78 and BCP79 in boilerplate will appear as "unk".
1087- # Remove them.
1088- for boilerplate in ( 'bcp78' , 'bcp79' ):
1089- if refs .get ( boilerplate ) == 'unk' :
1090- del refs [ boilerplate ]
1043+ refs = {}
1044+ in_ref_sect = False
1045+ in_norm_ref_sect = False
1046+ refType = 'unk'
1047+
1048+ for i in range ( 15 , len ( self .lines ) ):
1049+ line = self .lines [ i ].strip ()
1050+
1051+ # skip over lines until we find the start of the reference section
1052+ if not in_ref_sect :
1053+ m = sectionre .match ( line )
1054+ if not m :
1055+ m = sectionre2 .match ( line )
1056+ if not m :
1057+ m = sectionre3 .match ( line )
1058+
1059+ if m :
1060+ in_ref_sect = True
1061+ refType = 'info'
1062+ if line .lower ().find ("normative" ) > 1 :
1063+ in_norm_ref_sect = True
1064+ refType = 'norm'
1065+
1066+ # might be subsections within a references section
1067+ if in_ref_sect and not in_norm_ref_sect :
1068+ m = sectionre .match ( line )
1069+ if not m :
1070+ m = sectionre2 .match ( line )
1071+ if not m :
1072+ m = sectionre3 .match ( line )
1073+
1074+ if m :
1075+ in_ref_sect = True
1076+ if line .lower ().find ("normative" ) > 1 :
1077+ in_norm_ref_sect = True
1078+ refType = 'norm'
1079+
1080+ # look for the end of the normative reference section
1081+ if in_norm_ref_sect :
1082+ m = sectionre .match ( line )
1083+ if not m :
1084+ m = sectionre2 .match ( line )
1085+ if not m :
1086+ m = sectionre3 .match ( line )
1087+
1088+ if m and line .lower ().find ("normative" ) < 0 :
1089+ in_norm_ref_sect = False
1090+ refType = 'info'
1091+
1092+ # find references within the section
1093+ if in_ref_sect :
1094+ # If something got split badly, rejoin it.
1095+ if eol .search ( line ) and i < len ( self .lines ) - 1 :
1096+ line += self .lines [ i + 1 ].lstrip ()
1097+
1098+ m = idref .search ( line )
1099+ if m :
1100+ draft = m .group ( 1 )
1101+ if draft not in refs :
1102+ refs [ draft ] = refType
1103+
1104+ m = rfcref .search ( line )
1105+ if m :
1106+ ( series , number ) = m .groups ()
1107+ if series .lower ()== 'std' and std_start .search (line ) and i > 15 :
1108+ line = self .lines [i - 1 ].rstrip ()+ line
1109+ if series .lower ()!= 'std' or not not_our_std_ref .search ( line ):
1110+ name = series .lower () + number .lstrip ( '0' )
1111+ if name not in refs :
1112+ refs [ name ] = refType
1113+
10911114 # Don't add any references that point back into this doc
10921115 if self .filename in refs :
10931116 del refs [self .filename ]
1094- return refs
1117+
1118+ return refs
10951119
10961120 def old_get_refs ( self ):
10971121 refs = []
0 commit comments