1- /* 2e2c8ce5f11a473d65ec313ab20ceee6afefb355f5405afc06e7204e2e41c8c0 (2.4.4 +)
1+ /* a30d2613dcfdef81475a9d1a349134d2d42722172fdaa7d5bb12ed2aa74b9596 (2.4.6 +)
22 __ __ _
33 ___\ \/ /_ __ __ _| |_
44 / _ \\ /| '_ \ / _` | __|
1111 Copyright (c) 2000-2006 Fred L. Drake, Jr. <fdrake@users.sourceforge.net>
1212 Copyright (c) 2001-2002 Greg Stein <gstein@users.sourceforge.net>
1313 Copyright (c) 2002-2016 Karl Waclawek <karl@waclawek.net>
14- Copyright (c) 2005-2009 Steven Solie <ssolie@users.sourceforge.net >
14+ Copyright (c) 2005-2009 Steven Solie <steven@solie.ca >
1515 Copyright (c) 2016 Eric Rahm <erahm@mozilla.com>
1616 Copyright (c) 2016-2022 Sebastian Pipping <sebastian@pipping.org>
1717 Copyright (c) 2016 Gaurav <g.gupta@samsung.com>
@@ -718,8 +718,7 @@ XML_ParserCreate(const XML_Char *encodingName) {
718718
719719XML_Parser XMLCALL
720720XML_ParserCreateNS (const XML_Char * encodingName , XML_Char nsSep ) {
721- XML_Char tmp [2 ];
722- * tmp = nsSep ;
721+ XML_Char tmp [2 ] = {nsSep , 0 };
723722 return XML_ParserCreate_MM (encodingName , NULL , tmp );
724723}
725724
@@ -1344,8 +1343,7 @@ XML_ExternalEntityParserCreate(XML_Parser oldParser, const XML_Char *context,
13441343 would be otherwise.
13451344 */
13461345 if (parser -> m_ns ) {
1347- XML_Char tmp [2 ];
1348- * tmp = parser -> m_namespaceSeparator ;
1346+ XML_Char tmp [2 ] = {parser -> m_namespaceSeparator , 0 };
13491347 parser = parserCreate (encodingName , & parser -> m_mem , tmp , newDtd );
13501348 } else {
13511349 parser = parserCreate (encodingName , & parser -> m_mem , NULL , newDtd );
@@ -2563,6 +2561,7 @@ storeRawNames(XML_Parser parser) {
25632561 while (tag ) {
25642562 int bufSize ;
25652563 int nameLen = sizeof (XML_Char ) * (tag -> name .strLen + 1 );
2564+ size_t rawNameLen ;
25662565 char * rawNameBuf = tag -> buf + nameLen ;
25672566 /* Stop if already stored. Since m_tagStack is a stack, we can stop
25682567 at the first entry that has already been copied; everything
@@ -2574,7 +2573,11 @@ storeRawNames(XML_Parser parser) {
25742573 /* For re-use purposes we need to ensure that the
25752574 size of tag->buf is a multiple of sizeof(XML_Char).
25762575 */
2577- bufSize = nameLen + ROUND_UP (tag -> rawNameLength , sizeof (XML_Char ));
2576+ rawNameLen = ROUND_UP (tag -> rawNameLength , sizeof (XML_Char ));
2577+ /* Detect and prevent integer overflow. */
2578+ if (rawNameLen > (size_t )INT_MAX - nameLen )
2579+ return XML_FALSE ;
2580+ bufSize = nameLen + (int )rawNameLen ;
25782581 if (bufSize > tag -> bufEnd - tag -> buf ) {
25792582 char * temp = (char * )REALLOC (parser , tag -> buf , bufSize );
25802583 if (temp == NULL )
@@ -3756,6 +3759,17 @@ addBinding(XML_Parser parser, PREFIX *prefix, const ATTRIBUTE_ID *attId,
37563759 if (! mustBeXML && isXMLNS
37573760 && (len > xmlnsLen || uri [len ] != xmlnsNamespace [len ]))
37583761 isXMLNS = XML_FALSE ;
3762+
3763+ // NOTE: While Expat does not validate namespace URIs against RFC 3986,
3764+ // we have to at least make sure that the XML processor on top of
3765+ // Expat (that is splitting tag names by namespace separator into
3766+ // 2- or 3-tuples (uri-local or uri-local-prefix)) cannot be confused
3767+ // by an attacker putting additional namespace separator characters
3768+ // into namespace declarations. That would be ambiguous and not to
3769+ // be expected.
3770+ if (parser -> m_ns && (uri [len ] == parser -> m_namespaceSeparator )) {
3771+ return XML_ERROR_SYNTAX ;
3772+ }
37593773 }
37603774 isXML = isXML && len == xmlLen ;
37613775 isXMLNS = isXMLNS && len == xmlnsLen ;
@@ -7317,44 +7331,15 @@ nextScaffoldPart(XML_Parser parser) {
73177331 return next ;
73187332}
73197333
7320- static void
7321- build_node (XML_Parser parser , int src_node , XML_Content * dest ,
7322- XML_Content * * contpos , XML_Char * * strpos ) {
7323- DTD * const dtd = parser -> m_dtd ; /* save one level of indirection */
7324- dest -> type = dtd -> scaffold [src_node ].type ;
7325- dest -> quant = dtd -> scaffold [src_node ].quant ;
7326- if (dest -> type == XML_CTYPE_NAME ) {
7327- const XML_Char * src ;
7328- dest -> name = * strpos ;
7329- src = dtd -> scaffold [src_node ].name ;
7330- for (;;) {
7331- * (* strpos )++ = * src ;
7332- if (! * src )
7333- break ;
7334- src ++ ;
7335- }
7336- dest -> numchildren = 0 ;
7337- dest -> children = NULL ;
7338- } else {
7339- unsigned int i ;
7340- int cn ;
7341- dest -> numchildren = dtd -> scaffold [src_node ].childcnt ;
7342- dest -> children = * contpos ;
7343- * contpos += dest -> numchildren ;
7344- for (i = 0 , cn = dtd -> scaffold [src_node ].firstchild ; i < dest -> numchildren ;
7345- i ++ , cn = dtd -> scaffold [cn ].nextsib ) {
7346- build_node (parser , cn , & (dest -> children [i ]), contpos , strpos );
7347- }
7348- dest -> name = NULL ;
7349- }
7350- }
7351-
73527334static XML_Content *
73537335build_model (XML_Parser parser ) {
7336+ /* Function build_model transforms the existing parser->m_dtd->scaffold
7337+ * array of CONTENT_SCAFFOLD tree nodes into a new array of
7338+ * XML_Content tree nodes followed by a gapless list of zero-terminated
7339+ * strings. */
73547340 DTD * const dtd = parser -> m_dtd ; /* save one level of indirection */
73557341 XML_Content * ret ;
7356- XML_Content * cpos ;
7357- XML_Char * str ;
7342+ XML_Char * str ; /* the current string writing location */
73587343
73597344 /* Detect and prevent integer overflow.
73607345 * The preprocessor guard addresses the "always false" warning
@@ -7380,10 +7365,96 @@ build_model(XML_Parser parser) {
73807365 if (! ret )
73817366 return NULL ;
73827367
7383- str = (XML_Char * )(& ret [dtd -> scaffCount ]);
7384- cpos = & ret [1 ];
7368+ /* What follows is an iterative implementation (of what was previously done
7369+ * recursively in a dedicated function called "build_node". The old recursive
7370+ * build_node could be forced into stack exhaustion from input as small as a
7371+ * few megabyte, and so that was a security issue. Hence, a function call
7372+ * stack is avoided now by resolving recursion.)
7373+ *
7374+ * The iterative approach works as follows:
7375+ *
7376+ * - We have two writing pointers, both walking up the result array; one does
7377+ * the work, the other creates "jobs" for its colleague to do, and leads
7378+ * the way:
7379+ *
7380+ * - The faster one, pointer jobDest, always leads and writes "what job
7381+ * to do" by the other, once they reach that place in the
7382+ * array: leader "jobDest" stores the source node array index (relative
7383+ * to array dtd->scaffold) in field "numchildren".
7384+ *
7385+ * - The slower one, pointer dest, looks at the value stored in the
7386+ * "numchildren" field (which actually holds a source node array index
7387+ * at that time) and puts the real data from dtd->scaffold in.
7388+ *
7389+ * - Before the loop starts, jobDest writes source array index 0
7390+ * (where the root node is located) so that dest will have something to do
7391+ * when it starts operation.
7392+ *
7393+ * - Whenever nodes with children are encountered, jobDest appends
7394+ * them as new jobs, in order. As a result, tree node siblings are
7395+ * adjacent in the resulting array, for example:
7396+ *
7397+ * [0] root, has two children
7398+ * [1] first child of 0, has three children
7399+ * [3] first child of 1, does not have children
7400+ * [4] second child of 1, does not have children
7401+ * [5] third child of 1, does not have children
7402+ * [2] second child of 0, does not have children
7403+ *
7404+ * Or (the same data) presented in flat array view:
7405+ *
7406+ * [0] root, has two children
7407+ *
7408+ * [1] first child of 0, has three children
7409+ * [2] second child of 0, does not have children
7410+ *
7411+ * [3] first child of 1, does not have children
7412+ * [4] second child of 1, does not have children
7413+ * [5] third child of 1, does not have children
7414+ *
7415+ * - The algorithm repeats until all target array indices have been processed.
7416+ */
7417+ XML_Content * dest = ret ; /* tree node writing location, moves upwards */
7418+ XML_Content * const destLimit = & ret [dtd -> scaffCount ];
7419+ XML_Content * jobDest = ret ; /* next free writing location in target array */
7420+ str = (XML_Char * )& ret [dtd -> scaffCount ];
7421+
7422+ /* Add the starting job, the root node (index 0) of the source tree */
7423+ (jobDest ++ )-> numchildren = 0 ;
7424+
7425+ for (; dest < destLimit ; dest ++ ) {
7426+ /* Retrieve source tree array index from job storage */
7427+ const int src_node = (int )dest -> numchildren ;
7428+
7429+ /* Convert item */
7430+ dest -> type = dtd -> scaffold [src_node ].type ;
7431+ dest -> quant = dtd -> scaffold [src_node ].quant ;
7432+ if (dest -> type == XML_CTYPE_NAME ) {
7433+ const XML_Char * src ;
7434+ dest -> name = str ;
7435+ src = dtd -> scaffold [src_node ].name ;
7436+ for (;;) {
7437+ * str ++ = * src ;
7438+ if (! * src )
7439+ break ;
7440+ src ++ ;
7441+ }
7442+ dest -> numchildren = 0 ;
7443+ dest -> children = NULL ;
7444+ } else {
7445+ unsigned int i ;
7446+ int cn ;
7447+ dest -> name = NULL ;
7448+ dest -> numchildren = dtd -> scaffold [src_node ].childcnt ;
7449+ dest -> children = jobDest ;
7450+
7451+ /* Append scaffold indices of children to array */
7452+ for (i = 0 , cn = dtd -> scaffold [src_node ].firstchild ;
7453+ i < dest -> numchildren ; i ++ , cn = dtd -> scaffold [cn ].nextsib )
7454+ (jobDest ++ )-> numchildren = (unsigned int )cn ;
7455+ }
7456+ }
73857457
7386- build_node (parser , 0 , ret , & cpos , & str );
73877458 return ret ;
73887459}
73897460
@@ -7412,7 +7483,7 @@ getElementType(XML_Parser parser, const ENCODING *enc, const char *ptr,
74127483
74137484static XML_Char *
74147485copyString (const XML_Char * s , const XML_Memory_Handling_Suite * memsuite ) {
7415- int charsRequired = 0 ;
7486+ size_t charsRequired = 0 ;
74167487 XML_Char * result ;
74177488
74187489 /* First determine how long the string is */
0 commit comments