From 03af0aab6daeb83a23eff998a818ae0c3af2a187 Mon Sep 17 00:00:00 2001 From: Avram Lyon Date: Sat, 12 Feb 2011 18:28:24 +0000 Subject: [PATCH] Trans: Change DOI to disallow ampersands --- translators/DOI.js | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/translators/DOI.js b/translators/DOI.js index a0bfada3e..05009dfcf 100644 --- a/translators/DOI.js +++ b/translators/DOI.js @@ -16,7 +16,14 @@ var selectArray = {}; // builds a list of DOIs function getDOIs(doc) { - const DOIre = /\b(10\.[\w.]+\/[^\s]+)\.?\b/igm; + // TODO Detect DOIs more correctly. + // The actual rules for DOIs are very lax-- but we're more strict. + // Specifically, we should allow space characters, and all Unicode + // characters except for control characters. Here, we're cheating + // by not allowing ampersands, to fix an issue with getting DOIs + // out of URLs. + // Description at: http://www.doi.org/handbook_2000/appendix_1.html#A1-4 + const DOIre = /\b(10\.[\w.]+\/[^\s&]+)\.?\b/igm; const DOIXPath = "//text()[contains(., '10.')]"; DOIre.lastMatch = 0; @@ -100,4 +107,4 @@ function doWeb(doc, url) { // retrieve full items asynchronously Zotero.wait(); retrieveNextDOI(DOIs, doc); -} \ No newline at end of file +}