diff --git a/translators/Reuters.js b/translators/Reuters.js index 6571b64b0..e02f5373f 100644 --- a/translators/Reuters.js +++ b/translators/Reuters.js @@ -1,18 +1,42 @@ { - "translatorID":"83979786-44af-494a-9ddb-46654e0486ef", - "translatorType":4, - "label":"Reuters", - "creator":"Michael Berkowitz", - "target":"http://(www\\.)?reuters.com/", - "minVersion":"1.0.0b4.r5", - "maxVersion":"", - "priority":100, - "inRepository":true, - "lastUpdated":"2008-07-07 14:50:00" + "translatorID": "83979786-44af-494a-9ddb-46654e0486ef", + "label": "Reuters", + "creator": "Avram Lyon, Michael Berkowitz", + "target": "^https?://(www\\.)?reuters\\.com/", + "minVersion": "2.0", + "maxVersion": "", + "priority": 100, + "inRepository": "1", + "translatorType": 4, + "lastUpdated": "2011-03-10 21:05:59" } +/* + Reuters Translator + Copyright (C) 2011 Avram Lyon, ajlyon@gmail.com + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ + +/* + Translator for Reuters. This is a minimal translator just to get Reuters back working after a redesign. + Future versions should implement multiple item saving and attend to the nits that this translator has + probably missed. +*/ + function detectWeb(doc, url) { - if (url.match(/article/)) { + if (url.match(/^https?:\/\/(www\.)?reuters\.com\/article/)) { return "newspaperArticle"; } } @@ -20,18 +44,38 @@ function detectWeb(doc, url) { function doWeb(doc, url) { var item = new Zotero.Item("newspaperArticle"); - item.title = Zotero.Utilities.trimInternal(doc.evaluate('//div[@class="article primaryContent"]/h1', doc, null, XPathResult.ANY_TYPE, null).iterateNext().textContent); - item.date = doc.evaluate('//div[@class="timestampHeader"]', doc, null, XPathResult.ANY_TYPE, null).iterateNext().textContent.match(/^.*\d{4}/)[0]; - var byline = doc.evaluate('//div[@id="resizeableText"]/p[1]', doc, null, XPathResult.ANY_TYPE, null).iterateNext().textContent; - if (byline.match(/^By/)) { - var authors = byline.substr(3).split(','); - for each (var aut in authors) { - item.creators.push(Zotero.Utilities.cleanAuthor(aut, "author")); - } - item.abstractNote = doc.evaluate('//div[@id="resizeableText"]/p[2]', doc, null, XPathResult.ANY_TYPE, null).iterateNext().textContent.match(/\-\s+(.*)$/)[1]; - } else { - item.abstractNote = byline.match(/\-\s+(.*)$/)[1]; + item.title = doc.evaluate('//meta[@property="og:title"]', doc, null, XPathResult.ANY_TYPE, null).iterateNext().content; + item.date = doc.evaluate('//meta[@name="REVISION_DATE"]', doc, null, XPathResult.ANY_TYPE, null).iterateNext().content; + item.place = doc.evaluate('//div[@id="articleInfo"]//span[@class="location"]', doc, null, XPathResult.ANY_TYPE, null).iterateNext().textContent; + var byline = doc.evaluate('//div[@id="articleInfo"]//p[@class="byline"]', doc, null, XPathResult.ANY_TYPE, null).iterateNext().textContent; + var authors = byline.substr(3).split(','); + for each (var aut in authors) { + item.creators.push(authorFix(aut)); } - item.url = url; + item.abstractNote = doc.evaluate('//span[@class="focusParagraph"]/p', doc, null, XPathResult.ANY_TYPE, null).iterateNext().textContent.replace(/^.*\(Reuters\)\s+-\s+/,""); + item.url = doc.evaluate('//link[@rel="canonical"]', doc, null, XPathResult.ANY_TYPE, null).iterateNext().href; + item.publicationTitle = "Reuters"; + if (item.place == item.place.toUpperCase()) + item.place = Zotero.Utilities.capitalizeTitle(item.place.toLowerCase(),true); item.complete(); -} \ No newline at end of file +} + +function authorFix(author) { + // Sometimes we have "By Author" + if(author.substr(0, 3).toLowerCase() == "by ") { + author = author.substr(3); + } + var cleaned = Zotero.Utilities.cleanAuthor(author, "author"); + // If we have only one name, set the author to one-name mode + if (cleaned.firstName == "") { + cleaned["fieldMode"] = true; + } else { + // We can check for all lower-case and capitalize if necessary + // All-uppercase is handled by cleanAuthor + cleaned.firstName = (cleaned.firstName == cleaned.firstName.toLowerCase()) ? + Zotero.Utilities.capitalizeTitle(cleaned.firstName, true) : cleaned.firstName; + cleaned.lastName = (cleaned.lastName == cleaned.lastName.toLowerCase()) ? + Zotero.Utilities.capitalizeTitle(cleaned.lastName, true) : cleaned.lastName; + } + return cleaned; +}