Commit 40c291cf932 for php.net
commit 40c291cf932c31a302fa584bb50b6edb199fb07e
Author: Niels Dossche <7771979+ndossche@users.noreply.github.com>
Date: Tue Nov 11 23:31:27 2025 +0100
Fix GH-20444: Dom\XMLDocument::C14N() seems broken compared to DOMDocument::C14N()
C14N code expects namespace to be in-tree, but we store namespaces in a
different way out-of-tree to avoid reconciliations that break the tree
structure in a way unexpected by the DOM spec. In the DOM spec,
namespace nodes don't exist; they're regular attributes.
To solve this, we temporarily make fake namespace nodes that we later
remove.
Closes GH-20457.
diff --git a/NEWS b/NEWS
index 604f81d3078..e586e31998f 100644
--- a/NEWS
+++ b/NEWS
@@ -18,6 +18,8 @@ PHP NEWS
- DOM:
. Fixed bug GH-20722 (Null pointer dereference in DOM namespace node cloning
via clone on malformed objects). (ndossche)
+ . Fixed bug GH-20444 (Dom\XMLDocument::C14N() seems broken compared
+ to DOMDocument::C14N()). (ndossche)
- GD:
. Fixed bug GH-20622 (imagestring/imagestringup overflow). (David Carlier)
diff --git a/ext/dom/node.c b/ext/dom/node.c
index 3ec1db84157..0dd1b959752 100644
--- a/ext/dom/node.c
+++ b/ext/dom/node.c
@@ -2081,6 +2081,97 @@ PHP_METHOD(DOMNode, lookupNamespaceURI)
}
/* }}} end dom_node_lookup_namespace_uri */
+static void dom_relink_ns_decls_element(HashTable *links, xmlNodePtr node)
+{
+ if (node->type == XML_ELEMENT_NODE) {
+ for (xmlAttrPtr attr = node->properties; attr; attr = attr->next) {
+ if (php_dom_ns_is_fast((const xmlNode *) attr, php_dom_ns_is_xmlns_magic_token)) {
+ xmlNsPtr ns = xmlMalloc(sizeof(*ns));
+ if (!ns) {
+ return;
+ }
+
+ zval *zv = zend_hash_index_lookup(links, (zend_ulong) node);
+ if (Z_ISNULL_P(zv)) {
+ ZVAL_LONG(zv, 1);
+ } else {
+ Z_LVAL_P(zv)++;
+ }
+
+ bool should_free;
+ xmlChar *attr_value = php_libxml_attr_value(attr, &should_free);
+
+ memset(ns, 0, sizeof(*ns));
+ ns->type = XML_LOCAL_NAMESPACE;
+ ns->href = should_free ? attr_value : xmlStrdup(attr_value);
+ ns->prefix = attr->ns->prefix ? xmlStrdup(attr->name) : NULL;
+ ns->next = node->nsDef;
+ node->nsDef = ns;
+
+ ns->_private = attr;
+ if (attr->prev) {
+ attr->prev = attr->next;
+ } else {
+ node->properties = attr->next;
+ }
+ if (attr->next) {
+ attr->next->prev = attr->prev;
+ }
+ }
+ }
+
+ /* The default namespace is handled separately from the other namespaces in C14N.
+ * The default namespace is explicitly looked up while the other namespaces are
+ * deduplicated and compared to a list of visible namespaces. */
+ if (node->ns && !node->ns->prefix) {
+ /* Workaround for the behaviour where the xmlSearchNs() call inside c14n.c
+ * can return the current namespace. */
+ zend_hash_index_add_new_ptr(links, (zend_ulong) node | 1, node->ns);
+ node->ns = xmlSearchNs(node->doc, node, NULL);
+ }
+ }
+}
+
+static void dom_relink_ns_decls(HashTable *links, xmlNodePtr root)
+{
+ dom_relink_ns_decls_element(links, root);
+
+ xmlNodePtr base = root;
+ xmlNodePtr node = base->children;
+ while (node != NULL) {
+ dom_relink_ns_decls_element(links, node);
+ node = php_dom_next_in_tree_order(node, base);
+ }
+}
+
+static void dom_unlink_ns_decls(HashTable *links)
+{
+ ZEND_HASH_MAP_FOREACH_NUM_KEY_VAL(links, zend_ulong h, zval *data) {
+ if (h & 1) {
+ xmlNodePtr node = (xmlNodePtr) (h ^ 1);
+ node->ns = Z_PTR_P(data);
+ } else {
+ xmlNodePtr node = (xmlNodePtr) h;
+ while (Z_LVAL_P(data)-- > 0) {
+ xmlNsPtr ns = node->nsDef;
+ node->nsDef = ns->next;
+
+ xmlAttrPtr attr = ns->_private;
+ if (attr->prev) {
+ attr->prev->next = attr;
+ } else {
+ node->properties = attr;
+ }
+ if (attr->next) {
+ attr->next->prev = attr;
+ }
+
+ xmlFreeNs(ns);
+ }
+ }
+ } ZEND_HASH_FOREACH_END();
+}
+
static int dom_canonicalize_node_parent_lookup_cb(void *user_data, xmlNodePtr node, xmlNodePtr parent)
{
xmlNodePtr root = user_data;
@@ -2136,7 +2227,23 @@ static void dom_canonicalization(INTERNAL_FUNCTION_PARAMETERS, int mode) /* {{{
docp = nodep->doc;
- if (! docp) {
+ HashTable links;
+ bool modern = php_dom_follow_spec_node(nodep);
+ if (modern) {
+ xmlNodePtr root = nodep;
+ while (root->parent) {
+ root = root->parent;
+ }
+
+ if (UNEXPECTED(root->type != XML_DOCUMENT_NODE && root->type != XML_HTML_DOCUMENT_NODE)) {
+ php_dom_throw_error_with_message(HIERARCHY_REQUEST_ERR, "Canonicalization can only happen on nodes attached to a document.", /* strict */ true);
+ RETURN_THROWS();
+ }
+
+ zend_hash_init(&links, 0, NULL, NULL, false);
+ dom_relink_ns_decls(&links, xmlDocGetRootElement(docp));
+ } else if (!docp) {
+ /* Note: not triggerable with modern DOM */
zend_throw_error(NULL, "Node must be associated with a document");
RETURN_THROWS();
}
@@ -2158,12 +2265,12 @@ static void dom_canonicalization(INTERNAL_FUNCTION_PARAMETERS, int mode) /* {{{
if (!tmp) {
/* if mode == 0 then $xpath arg is 3, if mode == 1 then $xpath is 4 */
zend_argument_value_error(3 + mode, "must have a \"query\" key");
- RETURN_THROWS();
+ goto clean_links;
}
if (Z_TYPE_P(tmp) != IS_STRING) {
/* if mode == 0 then $xpath arg is 3, if mode == 1 then $xpath is 4 */
zend_argument_type_error(3 + mode, "\"query\" option must be a string, %s given", zend_zval_value_name(tmp));
- RETURN_THROWS();
+ goto clean_links;
}
xquery = Z_STRVAL_P(tmp);
@@ -2195,7 +2302,7 @@ static void dom_canonicalization(INTERNAL_FUNCTION_PARAMETERS, int mode) /* {{{
}
xmlXPathFreeContext(ctxp);
zend_throw_error(NULL, "XPath query did not return a nodeset");
- RETURN_THROWS();
+ goto clean_links;
}
}
@@ -2264,6 +2371,12 @@ static void dom_canonicalization(INTERNAL_FUNCTION_PARAMETERS, int mode) /* {{{
RETURN_LONG(bytes);
}
}
+
+clean_links:
+ if (modern) {
+ dom_unlink_ns_decls(&links);
+ zend_hash_destroy(&links);
+ }
}
/* }}} */
diff --git a/ext/dom/tests/canonicalization.phpt b/ext/dom/tests/canonicalization.phpt
index 4183b7cd41e..fcd9b207bc2 100644
--- a/ext/dom/tests/canonicalization.phpt
+++ b/ext/dom/tests/canonicalization.phpt
@@ -21,32 +21,46 @@
$dom->loadXML($xml);
$doc = $dom->documentElement->firstChild;
+$newDom = Dom\XMLDocument::createFromString($xml);
+$newDoc = $newDom->documentElement->firstChild;
+$counter = 0;
+
+function check($doc, $newDoc, ...$args) {
+ global $counter;
+ $counter++;
+ echo $doc->C14N(...$args)."\n\n";
+ if ($doc->C14N(...$args) !== $newDoc->C14N(...$args)) {
+ var_dump($doc->C14N(...$args), $newDoc->C14N(...$args));
+ throw new Error("mismatch: $counter");
+ }
+}
+
/* inclusive/without comments first child element of doc element is context. */
-echo $doc->C14N()."\n\n";
+check($doc, $newDoc);
/* exclusive/without comments first child element of doc element is context. */
-echo $doc->c14N(TRUE)."\n\n";
+check($doc, $newDoc, TRUE);
/* inclusive/with comments first child element of doc element is context. */
-echo $doc->C14N(FALSE, TRUE)."\n\n";
+check($doc, $newDoc, FALSE, TRUE);
/* exclusive/with comments first child element of doc element is context. */
-echo $doc->C14N(TRUE, TRUE)."\n\n";
+check($doc, $newDoc, TRUE, TRUE);
/* exclusive/without comments using xpath query. */
-echo $doc->c14N(TRUE, FALSE, array('query'=>'(//. | //@* | //namespace::*)'))."\n\n";
+check($doc, $newDoc, TRUE, FALSE, array('query'=>'(//. | //@* | //namespace::*)'))."\n\n";
/* exclusive/without comments first child element of doc element is context.
using xpath query with registered namespace.
test namespace prefix is also included. */
-echo $doc->c14N(TRUE, FALSE,
+check($doc, $newDoc, TRUE, FALSE,
array('query'=>'(//a:contain | //a:bar | .//namespace::*)',
'namespaces'=>array('a'=>'http://www.example.com/ns/foo')),
- array('test'))."\n\n";
+ array('test'));
/* exclusive/without comments first child element of doc element is context.
test namespace prefix is also included */
-echo $doc->C14N(TRUE, FALSE, NULL, array('test'));
+check($doc, $newDoc, TRUE, FALSE, NULL, array('test'));
?>
--EXPECT--
<contain xmlns="http://www.example.com/ns/foo" xmlns:fubar="http://www.example.com/ns/fubar" xmlns:test="urn::test">
diff --git a/ext/dom/tests/modern/xml/canonicalize_unattached.phpt b/ext/dom/tests/modern/xml/canonicalize_unattached.phpt
new file mode 100644
index 00000000000..cec5f108575
--- /dev/null
+++ b/ext/dom/tests/modern/xml/canonicalize_unattached.phpt
@@ -0,0 +1,20 @@
+--TEST--
+Canonicalize unattached node should fail
+--EXTENSIONS--
+dom
+--FILE--
+<?php
+
+$d = \Dom\XMLDocument::createFromString('<root><child/></root>');
+$child = $d->documentElement->firstChild;
+$child->remove();
+
+try {
+ $child->C14N();
+} catch (Dom\DOMException $e) {
+ echo $e->getMessage(), "\n";
+}
+
+?>
+--EXPECT--
+Canonicalization can only happen on nodes attached to a document.
diff --git a/ext/dom/tests/modern/xml/gh20444.phpt b/ext/dom/tests/modern/xml/gh20444.phpt
new file mode 100644
index 00000000000..b3a77e3f13e
--- /dev/null
+++ b/ext/dom/tests/modern/xml/gh20444.phpt
@@ -0,0 +1,43 @@
+--TEST--
+GH-20444 (Dom\XMLDocument::C14N() seems broken compared to DOMDocument::C14N())
+--EXTENSIONS--
+dom
+--FILE--
+<?php
+
+$xml = <<<EOF
+<?xml version="1.0" encoding="UTF-8"?>
+<test:root xmlns:test="http://example.com/dummy/ns">
+ <test:a xmlns:test="http://example.com/dummy/ns"/>
+ <test:b test:kind="123">abc</test:b>
+</test:root>
+EOF;
+
+$d = \Dom\XMLDocument::createFromString($xml);
+var_dump($d->C14N(true));
+
+$xml = <<<EOF
+<?xml version="1.0" encoding="UTF-8"?>
+<ns1:root xmlns:ns1="http://example.com/dummy/ns">
+ <ns1:a/>
+ <ns1:b>
+ <ns1:c>123</ns1:c>
+ </ns1:b>
+</ns1:root>
+EOF;
+
+$d = \Dom\XMLDocument::createFromString($xml);
+var_dump($d->C14N());
+
+?>
+--EXPECT--
+string(128) "<test:root xmlns:test="http://example.com/dummy/ns">
+ <test:a></test:a>
+ <test:b test:kind="123">abc</test:b>
+</test:root>"
+string(134) "<ns1:root xmlns:ns1="http://example.com/dummy/ns">
+ <ns1:a></ns1:a>
+ <ns1:b>
+ <ns1:c>123</ns1:c>
+ </ns1:b>
+</ns1:root>"