2121import _base
2222import iso639codes
2323import rfc3987
24+ import rfc2046
2425from html5lib .constants import E , spaceCharacters , digits
2526from html5lib import tokenizer
2627import gettext
6566 _ (u"Root namespace must be 'http://www.w3.org/1999/xhtml', or omitted." ),
6667 "invalid-browsing-context" :
6768 _ (u"Value must be one of ('_self', '_parent', '_top'), or a name that does not start with '_': '%(attributeName)s' attribute on <%(tagName)s>." ),
69+ "invalid-tag-uri" :
70+ _ (u"Invalid URI: '%(attributeName)s' attribute on <%(tagName)s>." ),
71+ "invalid-urn" :
72+ _ (u"Invalid URN: '%(attributeName)s' attribute on <%(tagName)s>." ),
73+ "invalid-uri-char" :
74+ _ (u"Illegal character in URI: '%(attributeName)s' attribute on <%(tagName)s>." ),
75+ "uri-not-iri" :
76+ _ (u"Expected a URI but found an IRI: '%(attributeName)s' attribute on <%(tagName)s>." ),
77+ "invalid-uri" :
78+ _ (u"Invalid URI: '%(attributeName)s' attribute on <%(tagName)s>." ),
79+ "invalid-http-or-ftp-uri" :
80+ _ (u"Invalid URI: '%(attributeName)s' attribute on <%(tagName)s>." ),
81+ "invalid-scheme" :
82+ _ (u"Unregistered URI scheme: '%(attributeName)s' attribute on <%(tagName)s>." ),
83+ "invalid-rel" :
84+ _ (u"Invalid link relation: '%(attributeName)s' attribute on <%(tagName)s>." ),
85+ "invalid-mime-type" :
86+ _ (u"Invalid MIME type: '%(attributeName)s' attribute on <%(tagName)s>." ),
6887})
6988
7089globalAttributes = frozenset (('class' , 'contenteditable' , 'contextmenu' , 'dir' ,
236255 'password' : frozenset (('size' ,))
237256}
238257
258+ linkRelValues = frozenset (('alternate' , 'archive' , 'archives' , 'author' , 'contact' , 'feed' , 'first' , 'begin' , 'start' , 'help' , 'icon' , 'index' , 'top' , 'contents' , 'toc' , 'last' , 'end' , 'license' , 'copyright' , 'next' , 'pingback' , 'prefetch' , 'prev' , 'previous' , 'search' , 'stylesheet' , 'sidebar' , 'tag' , 'up' ))
259+ aRelValues = frozenset (('alternate' , 'archive' , 'archives' , 'author' , 'contact' , 'feed' , 'first' , 'begin' , 'start' , 'help' , 'index' , 'top' , 'contents' , 'toc' , 'last' , 'end' , 'license' , 'copyright' , 'next' , 'prev' , 'previous' , 'search' , 'sidebar' , 'tag' , 'up' , 'bookmark' , 'external' , 'nofollow' ))
260+
239261class HTMLConformanceChecker (_base .Filter ):
240262 def __init__ (self , stream , encoding , parseMeta , ** kwargs ):
241263 _base .Filter .__init__ (self , tokenizer .HTMLTokenizer (
@@ -340,17 +362,17 @@ def checkStartTagUnknownAttributes(self, token):
340362 # Attribute validation helpers
341363 ##########################################################################
342364
343- def checkURI (self , token , tagName , attrName , attrValue ):
344- isValid , errorCode = rfc3987 .isValidURI (attrValue )
345- if not isValid :
346- yield {"type" : "ParseError" ,
347- "data" : errorCode ,
348- "datavars" : {"tagName" : tagName ,
349- "attributeName" : attrName }}
350- yield {"type" : "ParseError" ,
351- "data" : "invalid-attribute-value" ,
352- "datavars" : {"tagName" : tagName ,
353- "attributeName" : attrName }}
365+ # def checkURI(self, token, tagName, attrName, attrValue):
366+ # isValid, errorCode = rfc3987.isValidURI(attrValue)
367+ # if not isValid:
368+ # yield {"type": "ParseError",
369+ # "data": errorCode,
370+ # "datavars": {"tagName": tagName,
371+ # "attributeName": attrName}}
372+ # yield {"type": "ParseError",
373+ # "data": "invalid-attribute-value",
374+ # "datavars": {"tagName": tagName,
375+ # "attributeName": attrName}}
354376
355377 def checkIRI (self , token , tagName , attrName , attrValue ):
356378 isValid , errorCode = rfc3987 .isValidIRI (attrValue )
@@ -382,26 +404,36 @@ def checkID(self, token, tagName, attrName, attrValue):
382404 "attributeName" : attrName }}
383405 break
384406
385- def checkTokenList (self , tagName , attrName , attrValue ):
386- # The "token" in the method name refers to tokens in an attribute value
387- # i.e. http://www.whatwg.org/specs/web-apps/current-work/#set-of
388- # but the "token" parameter refers to the token generated from
389- # HTMLTokenizer. Sorry for the confusion.
407+ def parseTokenList (self , value ):
390408 valueList = []
391409 currentValue = ''
392- for c in attrValue + ' ' :
410+ for c in value + ' ' :
393411 if c in spaceCharacters :
394412 if currentValue :
395- if currentValue in valueList :
396- yield {"type" : "ParseError" ,
397- "data" : "duplicate-value-in-token-list" ,
398- "datavars" : {"tagName" : tagName ,
399- "attributeName" : attrName ,
400- "attributeValue" : currentValue }}
401413 valueList .append (currentValue )
402414 currentValue = ''
403415 else :
404416 currentValue += c
417+ if currentValue :
418+ valueList .append (currentValue )
419+ return valueList
420+
421+ def checkTokenList (self , tagName , attrName , attrValue ):
422+ # The "token" in the method name refers to tokens in an attribute value
423+ # i.e. http://www.whatwg.org/specs/web-apps/current-work/#set-of
424+ # but the "token" parameter refers to the token generated from
425+ # HTMLTokenizer. Sorry for the confusion.
426+ valueList = self .parseTokenList (attrValue )
427+ valueDict = {}
428+ for currentValue in valueList :
429+ if valueDict .has_key (currentValue ):
430+ yield {"type" : "ParseError" ,
431+ "data" : "duplicate-value-in-token-list" ,
432+ "datavars" : {"tagName" : tagName ,
433+ "attributeName" : attrName ,
434+ "attributeValue" : currentValue }}
435+ break
436+ valueDict [currentValue ] = 1
405437
406438 def checkEnumeratedValue (self , token , tagName , attrName , attrValue , enumeratedValues ):
407439 if not attrValue and ('' not in enumeratedValues ):
@@ -422,7 +454,7 @@ def checkEnumeratedValue(self, token, tagName, attrName, attrValue, enumeratedVa
422454 "datavars" : {"tagName" : tagName ,
423455 "attributeName" : attrName }}
424456
425- def checkBooleanValue (self , token , tagName , attrName , attrValue ):
457+ def checkBoolean (self , token , tagName , attrName , attrValue ):
426458 enumeratedValues = frozenset ((attrName , '' ))
427459 if attrValue not in enumeratedValues :
428460 yield {"type" : "ParseError" ,
@@ -435,7 +467,7 @@ def checkBooleanValue(self, token, tagName, attrName, attrValue):
435467 "datavars" : {"tagName" : tagName ,
436468 "attributeName" : attrName }}
437469
438- def checkIntegerValue (self , token , tagName , attrName , attrValue ):
470+ def checkInteger (self , token , tagName , attrName , attrValue ):
439471 sign = 1
440472 numberString = ''
441473 state = 'begin' # ('begin', 'initial-number', 'number', 'trailing-junk')
@@ -476,6 +508,10 @@ def checkIntegerValue(self, token, tagName, attrName, attrValue):
476508 "datavars" : {"tagName" : tagName ,
477509 "attributeName" : attrName }}
478510
511+ def checkFloatingPointNumber (self , token , tagName , attrName , attrValue ):
512+ # XXX
513+ pass
514+
479515 def checkBrowsingContext (self , token , tagName , attrName , attrValue ):
480516 if not attrValue : return
481517 if attrValue [0 ] != '_' : return
@@ -486,6 +522,56 @@ def checkBrowsingContext(self, token, tagName, attrName, attrValue):
486522 "datavars" : {"tagName" : tagName ,
487523 "attributeName" : attrName }}
488524
525+ def checkLangCode (self , token , tagName , attrName , attrValue ):
526+ if not attrValue : return # blank is OK
527+ if not iso639codes .isValidLangCode (attrValue ):
528+ yield {"type" : "ParseError" ,
529+ "data" : "invalid-lang-code" ,
530+ "datavars" : {"tagName" : tagName ,
531+ "attributeName" : attrName ,
532+ "attributeValue" : attrValue }}
533+
534+ def checkMIMEType (self , token , tagName , attrName , attrValue ):
535+ # XXX needs tests
536+ if not attrValue :
537+ yield {"type" : "ParseError" ,
538+ "data" : "attribute-value-can-not-be-blank" ,
539+ "datavars" : {"tagName" : tagName ,
540+ "attributeName" : attrName }}
541+
542+ if not rfc2046 .isValidMIMEType (attrValue ):
543+ yield {"type" : "ParseError" ,
544+ "data" : "invalid-mime-type" ,
545+ "datavars" : {"tagName" : tagName ,
546+ "attributeName" : attrName ,
547+ "attributeValue" : attrValue }}
548+
549+ def checkMediaQuery (self , token , tagName , attrName , attrValue ):
550+ # XXX
551+ pass
552+
553+ def checkLinkRelation (self , token , tagName , attrName , attrValue ):
554+ for t in self .checkTokenList (tagName , attrName , attrValue ) or []: yield t
555+ valueList = self .parseTokenList (attrValue )
556+ allowedValues = (tagName == 'link' ) and linkRelValues or aRelValues
557+ for currentValue in valueList :
558+ if currentValue not in allowedValues :
559+ yield {"type" : "ParseError" ,
560+ "data" : "invalid-rel" ,
561+ "datavars" : {"tagName" : tagName ,
562+ "attributeName" : attrName }}
563+
564+ def checkDateTime (self , token , tagName , attrName , attrValue ):
565+ # XXX
566+ state = 'begin' # ('begin', '...
567+ # for c in attrValue:
568+ # if state == 'begin':
569+ # if c in spaceCharacters:
570+ # continue
571+ # elif c in digits:
572+ # state = ...
573+
574+
489575 ##########################################################################
490576 # Attribute validation
491577 ##########################################################################
@@ -521,17 +607,8 @@ def validateAttributeValueDir(self, token, tagName, attrName, attrValue):
521607 def validateAttributeValueDraggable (self , token , tagName , attrName , attrValue ):
522608 for t in self .checkEnumeratedValue (token , tagName , attrName , attrValue , frozenset (('true' , 'false' ))) or []: yield t
523609
524- def validateAttributeValueIrrelevant (self , token , tagName , attrName , attrValue ):
525- for t in self .checkBooleanValue (token , tagName , attrName , attrValue ) or []: yield t
526-
527- def validateAttributeValueLang (self , token , tagName , attrName , attrValue ):
528- if not attrValue : return # blank is OK
529- if not iso639codes .isValidLangCode (attrValue ):
530- yield {"type" : "ParseError" ,
531- "data" : "invalid-lang-code" ,
532- "datavars" : {"tagName" : tagName ,
533- "attributeName" : attrName ,
534- "attributeValue" : attrValue }}
610+ validateAttributeValueIrrelevant = checkBoolean
611+ validateAttributeValueLang = checkLangCode
535612
536613 def validateAttributeValueContextmenu (self , token , tagName , attrName , attrValue ):
537614 for t in self .checkID (token , tagName , attrName , attrValue ) or []: yield t
@@ -552,7 +629,7 @@ def validateAttributeValueId(self, token, tagName, attrName, attrValue):
552629 self .IDsWeHaveKnownAndLoved .append (attrValue )
553630 self .thingsThatDefineAnID .append (token )
554631
555- validateAttributeValueTabindex = checkIntegerValue
632+ validateAttributeValueTabindex = checkInteger
556633
557634 def validateAttributeValueRef (self , token , tagName , attrName , attrValue ):
558635 # XXX
@@ -569,13 +646,47 @@ def validateAttributeValueHtmlXmlns(self, token, tagName, attrName, attrValue):
569646 "datavars" : {"tagName" : tagName ,
570647 "attributeName" : attrName }}
571648
572- def validateAttributeValueBaseHref (self , token , tagName , attrName , attrValue ):
573- # XXX
574- pass
575-
576649 validateAttributeValueBaseHref = checkIRI
577650 validateAttributeValueBaseTarget = checkBrowsingContext
578651 validateAttributeValueLinkHref = checkIRI
652+ validateAttributeValueLinkRel = checkLinkRelation
653+ validateAttributeValueLinkMedia = checkMediaQuery
654+ validateAttributeValueLinkHreflang = checkLangCode
655+ validateAttributeValueLinkType = checkMIMEType
656+ # XXX <meta> attributes
657+ validateAttributeValueStyleMedia = checkMediaQuery
658+ validateAttributeValueStyleType = checkMIMEType
659+ validateAttributeValueStyleScoped = checkBoolean
660+ validateAttributeValueBlockquoteCite = checkIRI
661+ validateAttributeValueOlStart = checkInteger
662+ validateAttributeValueLiValue = checkInteger
663+ # XXX need tests from here on
664+ validateAttributeValueAHref = checkIRI
665+ validateAttributeValueATarget = checkBrowsingContext
666+
667+ def validateAttributeValueAPing (self , token , tagName , attrName , attrValue ):
668+ valueList = self .parseTokenList (attrValue )
669+ for currentValue in valueList :
670+ for t in self .checkIRI (token , tagName , attrName , attrValue ) or []: yield t
671+
672+ validateAttributeValueARel = checkLinkRelation
673+ validateAttributeValueAMedia = checkMediaQuery
674+ validateAttributeValueAHreflang = checkLangCode
675+ validateAttributeValueAType = checkMIMEType
676+ validateAttributeValueQCite = checkIRI
677+ validateAttributeValueTimeDatetime = checkDateTime
678+ validateAttributeValueMeterValue = checkFloatingPointNumber
679+ validateAttributeValueMeterMin = checkFloatingPointNumber
680+ validateAttributeValueMeterLow = checkFloatingPointNumber
681+ validateAttributeValueMeterHigh = checkFloatingPointNumber
682+ validateAttributeValueMeterMax = checkFloatingPointNumber
683+ validateAttributeValueMeterOptimum = checkFloatingPointNumber
684+ validateAttributeValueProgressValue = checkFloatingPointNumber
685+ validateAttributeValueProgressMax = checkFloatingPointNumber
686+ validateAttributeValueInsCite = checkIRI
687+ validateAttributeValueInsDatetime = checkDateTime
688+ validateAttributeValueDelCite = checkIRI
689+ validateAttributeValueDelDatetime = checkDateTime
579690
580691 ##########################################################################
581692 # Whole document validation (IDs, etc.)
0 commit comments