From 9165a0247f5c065890b361399f41e16ccb83f43e Mon Sep 17 00:00:00 2001 From: Martynas Bagdonas Date: Mon, 7 May 2018 13:04:11 +0300 Subject: [PATCH] Fix PMID and arXiv identifiers extraction (#1498) --- chrome/content/zotero/xpcom/utilities_internal.js | 6 +++--- test/tests/utilities_internalTest.js | 15 ++++++++++----- 2 files changed, 13 insertions(+), 8 deletions(-) diff --git a/chrome/content/zotero/xpcom/utilities_internal.js b/chrome/content/zotero/xpcom/utilities_internal.js index bd8681559..09e0c066e 100644 --- a/chrome/content/zotero/xpcom/utilities_internal.js +++ b/chrome/content/zotero/xpcom/utilities_internal.js @@ -901,7 +901,7 @@ Zotero.Utilities.Internal = { // arXiv identifiers are extracted without version number // i.e. 0706.0044v1 is extracted as 0706.0044, // because arXiv OAI API doesn't allow to access individual versions - let arXiv_RE = /((?:[^A-Za-z]|^)([\-A-Za-z\.]+\/\d{7})(?:(v[0-9]+)|)(?!\d))|((?:\D|^)(\d{4}.\d{4,5})(?:(v[0-9]+)|)(?!\d))/g; + let arXiv_RE = /((?:[^A-Za-z]|^)([\-A-Za-z\.]+\/\d{7})(?:(v[0-9]+)|)(?!\d))|((?:\D|^)(\d{4}\.\d{4,5})(?:(v[0-9]+)|)(?!\d))/g; let m; while ((m = arXiv_RE.exec(text))) { let arXiv = m[2] || m[5]; @@ -916,11 +916,11 @@ Zotero.Utilities.Internal = { if (!identifiers.length) { // PMID; right now, the longest PMIDs are 8 digits, so it doesn't seem like we'll // need to discriminate for a fairly long time - let PMID_RE = /(?:\D|^)(\d{1,9})(?!\d)/g; + let PMID_RE = /(^|\s|,|:)(\d{1,9})(?=\s|,|$)/g; let pmid; while ((pmid = PMID_RE.exec(text)) && !foundIDs.has(pmid)) { identifiers.push({ - PMID: pmid[1] + PMID: pmid[2] }); foundIDs.add(pmid); } diff --git a/test/tests/utilities_internalTest.js b/test/tests/utilities_internalTest.js index f8c935a82..a9aaff08d 100644 --- a/test/tests/utilities_internalTest.js +++ b/test/tests/utilities_internalTest.js @@ -147,15 +147,20 @@ describe("Zotero.Utilities.Internal", function () { }); it("should extract PMID", async function () { - var id = "24297125"; - var identifiers = ZUI.extractIdentifiers(id); - assert.lengthOf(identifiers, 1); + var identifiers = ZUI.extractIdentifiers("1 PMID:24297125,222 3-4 1234567890, 123456789"); + assert.lengthOf(identifiers, 4); assert.lengthOf(Object.keys(identifiers[0]), 1); - assert.propertyVal(identifiers[0], "PMID", id); + assert.lengthOf(Object.keys(identifiers[1]), 1); + assert.lengthOf(Object.keys(identifiers[2]), 1); + assert.lengthOf(Object.keys(identifiers[3]), 1); + assert.propertyVal(identifiers[0], "PMID", "1"); + assert.propertyVal(identifiers[1], "PMID", "24297125"); + assert.propertyVal(identifiers[2], "PMID", "222"); + assert.propertyVal(identifiers[3], "PMID", "123456789"); }); it("should extract multiple old and new style arXivs", async function () { - var identifiers = ZUI.extractIdentifiers("0706.0044 arXiv:0706.00441v1,hep-ex/9809001v1, math.GT/0309135."); + var identifiers = ZUI.extractIdentifiers("0706.0044 arXiv:0706.00441v1,12345678,hep-ex/9809001v1, math.GT/0309135."); assert.lengthOf(identifiers, 4); assert.lengthOf(Object.keys(identifiers[0]), 1); assert.lengthOf(Object.keys(identifiers[1]), 1);