Commit 40c291cf932 for php.net

commit 40c291cf932c31a302fa584bb50b6edb199fb07e
Author: Niels Dossche <7771979+ndossche@users.noreply.github.com>
Date:   Tue Nov 11 23:31:27 2025 +0100

    Fix GH-20444: Dom\XMLDocument::C14N() seems broken compared to DOMDocument::C14N()

    C14N code expects namespace to be in-tree, but we store namespaces in a
    different way out-of-tree to avoid reconciliations that break the tree
    structure in a way unexpected by the DOM spec. In the DOM spec,
    namespace nodes don't exist; they're regular attributes.
    To solve this, we temporarily make fake namespace nodes that we later
    remove.

    Closes GH-20457.

diff --git a/NEWS b/NEWS
index 604f81d3078..e586e31998f 100644
--- a/NEWS
+++ b/NEWS
@@ -18,6 +18,8 @@ PHP                                                                        NEWS
 - DOM:
   . Fixed bug GH-20722 (Null pointer dereference in DOM namespace node cloning
     via clone on malformed objects). (ndossche)
+  . Fixed bug GH-20444 (Dom\XMLDocument::C14N() seems broken compared
+    to DOMDocument::C14N()). (ndossche)

 - GD:
   . Fixed bug GH-20622 (imagestring/imagestringup overflow). (David Carlier)
diff --git a/ext/dom/node.c b/ext/dom/node.c
index 3ec1db84157..0dd1b959752 100644
--- a/ext/dom/node.c
+++ b/ext/dom/node.c
@@ -2081,6 +2081,97 @@ PHP_METHOD(DOMNode, lookupNamespaceURI)
 }
 /* }}} end dom_node_lookup_namespace_uri */

+static void dom_relink_ns_decls_element(HashTable *links, xmlNodePtr node)
+{
+	if (node->type == XML_ELEMENT_NODE) {
+		for (xmlAttrPtr attr = node->properties; attr; attr = attr->next) {
+			if (php_dom_ns_is_fast((const xmlNode *) attr, php_dom_ns_is_xmlns_magic_token)) {
+				xmlNsPtr ns = xmlMalloc(sizeof(*ns));
+				if (!ns) {
+					return;
+				}
+
+				zval *zv = zend_hash_index_lookup(links, (zend_ulong) node);
+				if (Z_ISNULL_P(zv)) {
+					ZVAL_LONG(zv, 1);
+				} else {
+					Z_LVAL_P(zv)++;
+				}
+
+				bool should_free;
+				xmlChar *attr_value = php_libxml_attr_value(attr, &should_free);
+
+				memset(ns, 0, sizeof(*ns));
+				ns->type = XML_LOCAL_NAMESPACE;
+				ns->href = should_free ? attr_value : xmlStrdup(attr_value);
+				ns->prefix = attr->ns->prefix ? xmlStrdup(attr->name) : NULL;
+				ns->next = node->nsDef;
+				node->nsDef = ns;
+
+				ns->_private = attr;
+				if (attr->prev) {
+					attr->prev = attr->next;
+				} else {
+					node->properties = attr->next;
+				}
+				if (attr->next) {
+					attr->next->prev = attr->prev;
+				}
+			}
+		}
+
+		/* The default namespace is handled separately from the other namespaces in C14N.
+		 * The default namespace is explicitly looked up while the other namespaces are
+		 * deduplicated and compared to a list of visible namespaces. */
+		if (node->ns && !node->ns->prefix) {
+			/* Workaround for the behaviour where the xmlSearchNs() call inside c14n.c
+			 * can return the current namespace. */
+			zend_hash_index_add_new_ptr(links, (zend_ulong) node | 1, node->ns);
+			node->ns = xmlSearchNs(node->doc, node, NULL);
+		}
+	}
+}
+
+static void dom_relink_ns_decls(HashTable *links, xmlNodePtr root)
+{
+	dom_relink_ns_decls_element(links, root);
+
+	xmlNodePtr base = root;
+	xmlNodePtr node = base->children;
+	while (node != NULL) {
+		dom_relink_ns_decls_element(links, node);
+		node = php_dom_next_in_tree_order(node, base);
+	}
+}
+
+static void dom_unlink_ns_decls(HashTable *links)
+{
+	ZEND_HASH_MAP_FOREACH_NUM_KEY_VAL(links, zend_ulong h, zval *data) {
+		if (h & 1) {
+			xmlNodePtr node = (xmlNodePtr) (h ^ 1);
+			node->ns = Z_PTR_P(data);
+		} else {
+			xmlNodePtr node = (xmlNodePtr) h;
+			while (Z_LVAL_P(data)-- > 0) {
+				xmlNsPtr ns = node->nsDef;
+				node->nsDef = ns->next;
+
+				xmlAttrPtr attr = ns->_private;
+				if (attr->prev) {
+					attr->prev->next = attr;
+				} else {
+					node->properties = attr;
+				}
+				if (attr->next) {
+					attr->next->prev = attr;
+				}
+
+				xmlFreeNs(ns);
+			}
+		}
+	} ZEND_HASH_FOREACH_END();
+}
+
 static int dom_canonicalize_node_parent_lookup_cb(void *user_data, xmlNodePtr node, xmlNodePtr parent)
 {
 	xmlNodePtr root = user_data;
@@ -2136,7 +2227,23 @@ static void dom_canonicalization(INTERNAL_FUNCTION_PARAMETERS, int mode) /* {{{

 	docp = nodep->doc;

-	if (! docp) {
+	HashTable links;
+	bool modern = php_dom_follow_spec_node(nodep);
+	if (modern) {
+		xmlNodePtr root = nodep;
+		while (root->parent) {
+			root = root->parent;
+		}
+
+		if (UNEXPECTED(root->type != XML_DOCUMENT_NODE && root->type != XML_HTML_DOCUMENT_NODE)) {
+			php_dom_throw_error_with_message(HIERARCHY_REQUEST_ERR, "Canonicalization can only happen on nodes attached to a document.", /* strict */ true);
+			RETURN_THROWS();
+		}
+
+		zend_hash_init(&links, 0, NULL, NULL, false);
+		dom_relink_ns_decls(&links, xmlDocGetRootElement(docp));
+	} else if (!docp) {
+		/* Note: not triggerable with modern DOM */
 		zend_throw_error(NULL, "Node must be associated with a document");
 		RETURN_THROWS();
 	}
@@ -2158,12 +2265,12 @@ static void dom_canonicalization(INTERNAL_FUNCTION_PARAMETERS, int mode) /* {{{
 		if (!tmp) {
 			/* if mode == 0 then $xpath arg is 3, if mode == 1 then $xpath is 4 */
 			zend_argument_value_error(3 + mode, "must have a \"query\" key");
-			RETURN_THROWS();
+			goto clean_links;
 		}
 		if (Z_TYPE_P(tmp) != IS_STRING) {
 			/* if mode == 0 then $xpath arg is 3, if mode == 1 then $xpath is 4 */
 			zend_argument_type_error(3 + mode, "\"query\" option must be a string, %s given", zend_zval_value_name(tmp));
-			RETURN_THROWS();
+			goto clean_links;
 		}
 		xquery = Z_STRVAL_P(tmp);

@@ -2195,7 +2302,7 @@ static void dom_canonicalization(INTERNAL_FUNCTION_PARAMETERS, int mode) /* {{{
 			}
 			xmlXPathFreeContext(ctxp);
 			zend_throw_error(NULL, "XPath query did not return a nodeset");
-			RETURN_THROWS();
+			goto clean_links;
 		}
 	}

@@ -2264,6 +2371,12 @@ static void dom_canonicalization(INTERNAL_FUNCTION_PARAMETERS, int mode) /* {{{
 			RETURN_LONG(bytes);
 		}
 	}
+
+clean_links:
+	if (modern) {
+		dom_unlink_ns_decls(&links);
+		zend_hash_destroy(&links);
+	}
 }
 /* }}} */

diff --git a/ext/dom/tests/canonicalization.phpt b/ext/dom/tests/canonicalization.phpt
index 4183b7cd41e..fcd9b207bc2 100644
--- a/ext/dom/tests/canonicalization.phpt
+++ b/ext/dom/tests/canonicalization.phpt
@@ -21,32 +21,46 @@
 $dom->loadXML($xml);
 $doc = $dom->documentElement->firstChild;

+$newDom = Dom\XMLDocument::createFromString($xml);
+$newDoc = $newDom->documentElement->firstChild;
+$counter = 0;
+
+function check($doc, $newDoc, ...$args) {
+  global $counter;
+  $counter++;
+  echo $doc->C14N(...$args)."\n\n";
+  if ($doc->C14N(...$args) !== $newDoc->C14N(...$args)) {
+    var_dump($doc->C14N(...$args), $newDoc->C14N(...$args));
+    throw new Error("mismatch: $counter");
+  }
+}
+
 /* inclusive/without comments first child element of doc element is context. */
-echo $doc->C14N()."\n\n";
+check($doc, $newDoc);

 /* exclusive/without comments first child element of doc element is context. */
-echo $doc->c14N(TRUE)."\n\n";
+check($doc, $newDoc, TRUE);

 /* inclusive/with comments first child element of doc element is context. */
-echo $doc->C14N(FALSE, TRUE)."\n\n";
+check($doc, $newDoc, FALSE, TRUE);

 /* exclusive/with comments first child element of doc element is context. */
-echo $doc->C14N(TRUE, TRUE)."\n\n";
+check($doc, $newDoc, TRUE, TRUE);

 /* exclusive/without comments using xpath query. */
-echo $doc->c14N(TRUE, FALSE, array('query'=>'(//. | //@* | //namespace::*)'))."\n\n";
+check($doc, $newDoc, TRUE, FALSE, array('query'=>'(//. | //@* | //namespace::*)'))."\n\n";

 /* exclusive/without comments first child element of doc element is context.
    using xpath query with registered namespace.
    test namespace prefix is also included. */
-echo $doc->c14N(TRUE, FALSE,
+check($doc, $newDoc, TRUE, FALSE,
                 array('query'=>'(//a:contain | //a:bar | .//namespace::*)',
                       'namespaces'=>array('a'=>'http://www.example.com/ns/foo')),
-                array('test'))."\n\n";
+                array('test'));

 /* exclusive/without comments first child element of doc element is context.
    test namespace prefix is also included */
-echo $doc->C14N(TRUE, FALSE, NULL, array('test'));
+check($doc, $newDoc, TRUE, FALSE, NULL, array('test'));
 ?>
 --EXPECT--
 <contain xmlns="http://www.example.com/ns/foo" xmlns:fubar="http://www.example.com/ns/fubar" xmlns:test="urn::test">
diff --git a/ext/dom/tests/modern/xml/canonicalize_unattached.phpt b/ext/dom/tests/modern/xml/canonicalize_unattached.phpt
new file mode 100644
index 00000000000..cec5f108575
--- /dev/null
+++ b/ext/dom/tests/modern/xml/canonicalize_unattached.phpt
@@ -0,0 +1,20 @@
+--TEST--
+Canonicalize unattached node should fail
+--EXTENSIONS--
+dom
+--FILE--
+<?php
+
+$d = \Dom\XMLDocument::createFromString('<root><child/></root>');
+$child = $d->documentElement->firstChild;
+$child->remove();
+
+try {
+    $child->C14N();
+} catch (Dom\DOMException $e) {
+    echo $e->getMessage(), "\n";
+}
+
+?>
+--EXPECT--
+Canonicalization can only happen on nodes attached to a document.
diff --git a/ext/dom/tests/modern/xml/gh20444.phpt b/ext/dom/tests/modern/xml/gh20444.phpt
new file mode 100644
index 00000000000..b3a77e3f13e
--- /dev/null
+++ b/ext/dom/tests/modern/xml/gh20444.phpt
@@ -0,0 +1,43 @@
+--TEST--
+GH-20444 (Dom\XMLDocument::C14N() seems broken compared to DOMDocument::C14N())
+--EXTENSIONS--
+dom
+--FILE--
+<?php
+
+$xml = <<<EOF
+<?xml version="1.0" encoding="UTF-8"?>
+<test:root xmlns:test="http://example.com/dummy/ns">
+    <test:a xmlns:test="http://example.com/dummy/ns"/>
+    <test:b test:kind="123">abc</test:b>
+</test:root>
+EOF;
+
+$d = \Dom\XMLDocument::createFromString($xml);
+var_dump($d->C14N(true));
+
+$xml = <<<EOF
+<?xml version="1.0" encoding="UTF-8"?>
+<ns1:root xmlns:ns1="http://example.com/dummy/ns">
+    <ns1:a/>
+    <ns1:b>
+        <ns1:c>123</ns1:c>
+    </ns1:b>
+</ns1:root>
+EOF;
+
+$d = \Dom\XMLDocument::createFromString($xml);
+var_dump($d->C14N());
+
+?>
+--EXPECT--
+string(128) "<test:root xmlns:test="http://example.com/dummy/ns">
+    <test:a></test:a>
+    <test:b test:kind="123">abc</test:b>
+</test:root>"
+string(134) "<ns1:root xmlns:ns1="http://example.com/dummy/ns">
+    <ns1:a></ns1:a>
+    <ns1:b>
+        <ns1:c>123</ns1:c>
+    </ns1:b>
+</ns1:root>"