diff --git a/translators/unAPI.js b/translators/unAPI.js index 54531cebc..8bfaf43fe 100644 --- a/translators/unAPI.js +++ b/translators/unAPI.js @@ -4,257 +4,331 @@ "label":"unAPI", "creator":"Simon Kornblith", "target":null, - "minVersion":"1.0.0b4.r1", + "minVersion":"2.1", "maxVersion":"", "priority":200, "inRepository":true, "detectXPath":"//link[@rel='unapi-server']", - "lastUpdated":"2010-09-23 04:19:20" + "lastUpdated":"2011-04-19 19:40:07" } -var RECOGNIZABLE_FORMATS = ["mods", "marc", "endnote", "ris", "bibtex", "rdf"]; +var RECOGNIZABLE_FORMATS = ["rdf_zotero", "rdf_bibliontology", "mods", "marc", "unimarc", "ris", + "refer", "bibtex", "rdf_dc"]; var FORMAT_GUIDS = { + "rdf_zotero":"5e3ad958-ac79-463d-812b-a86a9235c28f", + "rdf_bibliontology":"14763d25-8ba0-45df-8f52-b8d1108e7ac9", "mods":"0e2235e7-babf-413c-9acf-f27cce5f059c", "marc":"a6ee60df-1ddc-4aae-bb25-45e0537be973", - "endnote":"881f60f2-0802-411a-9228-ce5f47b64c7d", + "unimarc":"a6ee60df-1ddc-4aae-bb25-45e0537be973", "ris":"32d59d2d-b65a-4da4-b0a3-bdd3cfb979e7", + "refer":"881f60f2-0802-411a-9228-ce5f47b64c7d", "bibtex":"9cb70025-a888-4a29-a210-93ec52da40d4", - "rdf":"5e3ad958-ac79-463d-812b-a86a9235c28f" + "rdf_dc":"5e3ad958-ac79-463d-812b-a86a9235c28f" }; -var unAPIResolver, unsearchedIds, foundIds, foundItems, foundFormat, foundFormatName, domain; +var unAPIResolver = false; +var defaultFormat, unAPIIDs; -function detectWeb(doc, url) { - // initialize variables - unsearchedIds = []; - foundIds = []; - foundItems = []; - foundFormat = []; - foundFormatName = []; - - // Set the domain we're scraping - domain = doc.location.href.match(/https?:\/\/([^/]+)/); +/** + * A class to describe an unAPI format description + * @property isSupported {Boolean} Whether Zotero supports a format contained in this description + * @property name {String} The unAPI format name, used to retrieve item descriptions + * @property translatorID {String} The ID of the translator used to read this format + * + * @constructor + * @param {String} aXML unAPI format description XML + */ +UnAPIFormat = function(aXML) { + var parser = new DOMParser(); + var doc = parser.parseFromString(aXML.replace(/]*>/, "").replace(/<\?xml[^>]*\?>/, ""), "text/xml"); - // This and the x: prefix in the XPath are to work around an issue with pages - // served as application/xhtml+xml - // - // https://developer.mozilla.org/en/Introduction_to_using_XPath_in_JavaScript#Implementing_a_default_namespace_for_XML_documents - function nsResolver() { - return 'http://www.w3.org/1999/xhtml'; + var foundFormat = new Object(); + + // Loop through to determine format name + var nodes = doc.documentElement.getElementsByTagName("format"); + var nNodes = nodes.length; + var node, name, lowerName, format; + for(var i=0; i1 HTTP request + if(defaultFormat.isSupported) { + me.format = defaultFormat; + callback(true); + } else { + // if no supported default format, try format for this item + Zotero.Utilities.HTTP.doGet(unAPIResolver+"?id="+me.id, function(text) { + me.format = UnAPIFormat(text); + callback(!!me.format.isSupported); + }); } }); } } -function getItemType() { - // if there are no items left to search, use the only item's type (if there is one) or give up - if(!unsearchedIds.length) { - if(foundIds.length) { - getOnlyItem(); +/** + * This and the x: prefix in the XPath are to work around an issue with pages + * served as application/xhtml+xml + * + * https://developer.mozilla.org/en/Introduction_to_using_XPath_in_JavaScript#Implementing_a_default_namespace_for_XML_documents + */ +function nsResolver() { + return 'http://www.w3.org/1999/xhtml'; +} + +/** + * Extracts UnAPIIDs from a document + * @param {document} A document object from which to extract unAPIIds + * @return {UnAPIID[]} The unAPI ID objects extracted from the document + */ +function getUnAPIIDs(doc) { + // look for a resolver + var newUnAPIResolver = doc.evaluate('//x:link[@rel="unapi-server"]', doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext(); + if(!newUnAPIResolver) return []; + newUnAPIResolver = newUnAPIResolver.getAttribute("href"); + if(unAPIResolver !== newUnAPIResolver) { + // if unAPI resolver has changed, clear + defaultFormat = false; + unAPIResolver = newUnAPIResolver; + unAPIIDs = []; + } + + // look for abbrs + var abbrs = doc.evaluate('//x:abbr[contains(@class, " unapi-id") or contains(@class, "unapi-id ") or @class="unapi-id"][@title]', + doc, nsResolver, XPathResult.ANY_TYPE, null); + var abbr; + var ids = []; + while(abbr = abbrs.iterateNext()) { + var id = abbr.getAttribute("title"); + ids.push(unAPIIDs[id] ? unAPIIDs[id] : new UnAPIID(id)); + } + + return ids; +} + + +/** + * Retrieves the list of formats available for all items accessible through this unAPI resolver + * @param {Function} callback A callback to be passed the format when it is available + */ +function getDefaultFormat(callback) { + if(defaultFormat) { + callback(defaultFormat); + } else { + Zotero.Utilities.HTTP.doGet(unAPIResolver, function(text) { + // determine format of this item + defaultFormat = new UnAPIFormat(text); + callback(defaultFormat); + }); + } +} +/** + * Determines itemType for detection + */ +function determineDetectItemType(ids, supportedId) { + var id = ids.shift(); + id.isSupported(function(isSupported) { + if(isSupported && supportedId !== undefined) { + // If there are multiple items with valid itemTypes, use "multiple" + Zotero.done("multiple"); + } else if(ids.length) { + // If IDs remain to be handled, handle the next one + determineDetectItemType(ids, (isSupported ? id : supportedId)); } else { - Zotero.done(false); + // If all IDs have been handled, get foundItemType for only supported ID + supportedId.getItemType(Zotero.done); } - return; - } - - var id = unsearchedIds.shift(); - Zotero.Utilities.HTTP.doGet(unAPIResolver+"?id="+id, function(text) { - var format = checkFormats(text); - if(format) { - // save data - foundIds.push(id); - foundFormat.push(format[0]); - foundFormatName.push(format[1]); - - if(foundIds.length == 2) { - // this is our second; use multiple - Zotero.done("multiple"); - return; - } - } - - // keep going - getItemType(); - }); -} - -function checkFormats(text) { - text = text.replace(/]*>/, "").replace(/<\?xml[^>]*\?>/, ""); - var xml = new XML(text); - - var foundFormat = new Object(); - - // this is such an ugly, disgusting hack, and I hate how Mozilla decided to neuter an ECMA standard - for each(var format in xml.format) { - var name = format.@name.toString(); - var lowerName = name.toLowerCase(); - - if(format.@namespace_uri == "http://www.loc.gov/mods/v3" || lowerName == "mods" || format.@docs == "http://www.loc.gov/standards/mods/") { - if(!foundFormat["mods"] || lowerName.indexOf("full") != -1) { - foundFormat["mods"] = escape(name); - } - } else if(lowerName.match(/^marc\b/)) { - if(!foundFormat["marc"] || lowerName.indexOf("utf8") != -1) { - foundFormat["marc"] = escape(name); - } - } else if(lowerName == "rdf_dc") { - foundFormat["rdf"] = escape(name); - } else if(format.@docs.text() == "http://www.refman.com/support/risformat_intro.asp" || lowerName.match(/^ris\b/)) { - if(!foundFormat["ris"] || lowerName.indexOf("utf8") != -1) { - foundFormat["ris"] = escape(name); - } - } else if(lowerName == "bibtex") { - foundFormat["bibtex"] = escape(name); - } else if(lowerName == "endnote") { - foundFormat["endnote"] = escape(name); - } - } - - // loop through again, this time respecting preferences - for each(var format in RECOGNIZABLE_FORMATS) { - if(foundFormat[format]) return [format, foundFormat[format]]; - } - - return false; -} - -function getOnlyItem() { - // retrieve the only item - retrieveItem(foundIds[0], foundFormat[0], foundFormatName[0], function(obj, item) { - foundItems.push(item); - Zotero.done(item.itemType); - }); -} - -function retrieveItem(id, format, formatName, callback) { - // retrieve URL - Zotero.Utilities.HTTP.doGet(unAPIResolver+"?id="+id+"&format="+formatName, function(text) { - var translator = Zotero.loadTranslator("import"); - translator.setTranslator(FORMAT_GUIDS[format]); - translator.setString(text); - translator.setHandler("itemDone", callback); - translator.translate(); }); } /** - * Get formats and names for all usable ids; when done, get all items + * Get all items + * @param {UnAPIID[]} ids List of UnAPI IDs + * @param {Function} callback Function to pass item array to when items have been retrieved + * @param {Zotero.Item[]} items Item array; used for recursive calls **/ -function getAllIds() { - if(!unsearchedIds.length) { - // once all ids have been gotten, get all items - getAllItems(); - return; - } - - var id = unsearchedIds.shift(); - Zotero.Utilities.HTTP.doGet(unAPIResolver+"?id="+id, function(text) { - var format = checkFormats(text); - if(format) { - // save data - foundIds.push(id); - foundFormat.push(format[0]); - foundFormatName.push(format[1]); - } +function getAllItems(ids, callback, items) { + var id = ids.shift(); + id.getItems(function(retrievedItems) { + var collectedItems = (items ? items.concat(retrievedItems) : retrievedItems); - // keep going - getAllIds(); + if(ids.length) { + getAllItems(ids, callback, collectedItems); + } else { + callback(collectedItems); + } }); } -/** - * Get all items; when done, show selectItems or scrape - **/ -function getAllItems() { - if(foundItems.length == foundIds.length) { - if(foundItems.length == 1) { - // Set the item Repository to the domain - foundItems[0].repository = domain[1]; - // if only one item, send complete() - foundItems[0].complete(); - } else if(foundItems.length > 0) { - // if multiple items, show selectItems +function detectWeb(doc, url) { + // get unAPI IDs + var ids = getUnAPIIDs(doc); + if(!ids.length) return false; + + // now we need to see if the server actually gives us bibliographic metadata, and determine the + // type + Zotero.wait(); + + if(!ids.length === 1) { + // Only one item, so we will just get its item type + ids[0].getItemType(Zotero.done); + } else { + // Several items. We will need to call determineDetectItemType + determineDetectItemType(ids); + } +} + +function doWeb(doc, url) { + var ids = getUnAPIIDs(doc); + + Zotero.wait(); + + getAllItems(ids, function(items) { + // get the domain we're scraping, so we can use it for libraryCatalog + domain = doc.location.href.match(/https?:\/\/([^/]+)/); + + if(items.length == 1) { + // If only one item, just complete it + items[0].libraryCatalog = domain[1]; + items[0].complete(); + } else if(items.length > 0) { + // If multiple items, extract their titles var itemTitles = []; - for(var i in foundItems) { - itemTitles[i] = foundItems[i].title; + for(var i in items) { + itemTitles[i] = items[i].title; } + // Show item selection dialog var chosenItems = Zotero.selectItems(itemTitles); if(!chosenItems) Zotero.done(true); + // Complete items for(var i in chosenItems) { - // Set the item Repository to the domain - foundItems[i].repository = domain[1]; - foundItems[i].complete(); + items[i].libraryCatalog = domain[1]; + items[i].complete(); } } - // reset items - foundItems = []; - Zotero.done(); return; - } - - var id = foundIds[foundItems.length]; - // foundFormat can be either a string or an array - if(typeof(foundFormat) == "string") { - var format = foundFormat; - var formatName = foundFormatName; - } else { - var format = foundFormat[foundItems.length]; - var formatName = foundFormatName[foundItems.length]; - } - - // get item - retrieveItem(id, format, formatName, function(obj, item) { - foundItems.push(item); - getAllItems(); }); } - -function doWeb() { - Zotero.wait(); - - // retrieve data for all ids - getAllIds(); -}