Commit 6cfbe0b for mammothjs
commit 6cfbe0b8952b809bb1a3564041c8155405336167
Author: Michael Williamson <mike@zwobble.org>
Date: Thu Mar 12 21:05:37 2026 +0000
Handle hyperlink complex fields with unquoted hrefs
diff --git a/NEWS b/NEWS
index d9b8255..02dd689 100644
--- a/NEWS
+++ b/NEWS
@@ -2,6 +2,8 @@
* Handle hyperlinked wp:anchor and wp:inline elements.
+* Handle hyperlink complex fields with unquoted hrefs.
+
# 1.11.0
* Ignore style definitions using a style ID that has already been used.
diff --git a/lib/docx/body-reader.js b/lib/docx/body-reader.js
index febbbc2..74add0a 100644
--- a/lib/docx/body-reader.js
+++ b/lib/docx/body-reader.js
@@ -203,14 +203,16 @@ function BodyReader(options) {
}
function parseInstrText(instrText, fldChar) {
- var externalLinkResult = /\s*HYPERLINK "(.*)"/.exec(instrText);
+ var externalLinkResult = /^\s*HYPERLINK\s+(?:"(.*)"|([^\\]\S*))/.exec(instrText);
if (externalLinkResult) {
- return {type: "hyperlink", options: {href: externalLinkResult[1]}};
+ var href = externalLinkResult[1] === undefined ? externalLinkResult[2] : externalLinkResult[1];
+ return {type: "hyperlink", options: {href: href}};
}
- var internalLinkResult = /\s*HYPERLINK\s+\\l\s+"(.*)"/.exec(instrText);
+ var internalLinkResult = /^\s*HYPERLINK\s+\\l\s+(?:"(.*)"|([^\\]\S*))/.exec(instrText);
if (internalLinkResult) {
- return {type: "hyperlink", options: {anchor: internalLinkResult[1]}};
+ var anchor = internalLinkResult[1] === undefined ? internalLinkResult[2] : internalLinkResult[1];
+ return {type: "hyperlink", options: {anchor: anchor}};
}
var checkboxResult = /\s*FORMCHECKBOX\s*/.exec(instrText);
diff --git a/test/docx/body-reader.tests.js b/test/docx/body-reader.tests.js
index dee38a5..26c57e2 100644
--- a/test/docx/body-reader.tests.js
+++ b/test/docx/body-reader.tests.js
@@ -332,11 +332,13 @@ test("complex fields", (function() {
assert.deepEqual(instrText, []);
},
- "runs in a complex field for hyperlink without switch are read as external hyperlinks": function() {
+ "runs in a complex field for hyperlink without switch with quoted target are read as external hyperlinks": function() {
var hyperlinkRunXml = runOfText("this is a hyperlink");
var paragraphXml = new XmlElement("w:p", {}, [
beginXml,
- hyperlinkInstrText,
+ new XmlElement("w:instrText", {}, [
+ xml.text(' HYPERLINK "http://example.com"')
+ ]),
separateXml,
hyperlinkRunXml,
endXml
@@ -347,7 +349,7 @@ test("complex fields", (function() {
isEmptyRun,
isEmptyHyperlinkedRun,
isHyperlinkedRun({
- href: uri,
+ href: "http://example.com",
children: contains(
isText("this is a hyperlink")
)
@@ -356,7 +358,33 @@ test("complex fields", (function() {
));
},
- "runs in a complex field for hyperlink with l switch are read as internal hyperlinks": function() {
+ "runs in a complex field for hyperlink without switch with unquoted target are read as external hyperlinks": function() {
+ var hyperlinkRunXml = runOfText("this is a hyperlink");
+ var paragraphXml = new XmlElement("w:p", {}, [
+ beginXml,
+ new XmlElement("w:instrText", {}, [
+ xml.text(' HYPERLINK http://example.com')
+ ]),
+ separateXml,
+ hyperlinkRunXml,
+ endXml
+ ]);
+ var paragraph = readXmlElementValue(paragraphXml);
+
+ assertThat(paragraph.children, contains(
+ isEmptyRun,
+ isEmptyHyperlinkedRun,
+ isHyperlinkedRun({
+ href: "http://example.com",
+ children: contains(
+ isText("this is a hyperlink")
+ )
+ }),
+ isEmptyRun
+ ));
+ },
+
+ "runs in a complex field for hyperlink with l switch with quoted target are read as internal hyperlinks": function() {
var hyperlinkRunXml = runOfText("this is a hyperlink");
var paragraphXml = new XmlElement("w:p", {}, [
beginXml,
@@ -382,6 +410,32 @@ test("complex fields", (function() {
));
},
+ "runs in a complex field for hyperlink with l switch with unquoted target are read as internal hyperlinks": function() {
+ var hyperlinkRunXml = runOfText("this is a hyperlink");
+ var paragraphXml = new XmlElement("w:p", {}, [
+ beginXml,
+ new XmlElement("w:instrText", {}, [
+ xml.text(' HYPERLINK \\l InternalLink')
+ ]),
+ separateXml,
+ hyperlinkRunXml,
+ endXml
+ ]);
+ var paragraph = readXmlElementValue(paragraphXml);
+
+ assertThat(paragraph.children, contains(
+ isEmptyRun,
+ isEmptyHyperlinkedRun,
+ isHyperlinkedRun({
+ anchor: "InternalLink",
+ children: contains(
+ isText("this is a hyperlink")
+ )
+ }),
+ isEmptyRun
+ ));
+ },
+
"runs after a complex field for hyperlinks are not read as hyperlinks": function() {
var afterEndXml = runOfText("this will not be a hyperlink");
var paragraphXml = new XmlElement("w:p", {}, [