-Adds AlterNet translator from Zotero-Dev list.
This commit is contained in:
parent
137b9dc7ee
commit
a17e06469b
170
scrapers.sql
170
scrapers.sql
|
@ -5862,6 +5862,176 @@ REPLACE INTO translators VALUES ('f203db7f-7b7b-4dc4-b018-115b7885fe3b', '1.0.0b
|
||||||
}
|
}
|
||||||
}');
|
}');
|
||||||
|
|
||||||
|
REPLACE INTO translators VALUES ('ea531652-cdeb-4ec2-940e-627d4b107263', '1.0.0b4.r1', '', '2008-07-09 13:41:54', '0', '100', '4', 'AlterNet', 'Jesse Johnson', '^http://(?:www\.)alternet.org',
|
||||||
|
'function detectWeb(doc, url) {
|
||||||
|
// identifies articles according to the presence of an article ID
|
||||||
|
// number in the URL
|
||||||
|
var index = url.toString().indexOf(''.org/'') + 5;
|
||||||
|
index += url.toString().substr(index).indexOf(''/'');
|
||||||
|
if (index != -1) {
|
||||||
|
// ordinary aritcle
|
||||||
|
var id = url.toString().substr(index + 1, 5);
|
||||||
|
Zotero.Utilities.cleanString(id);
|
||||||
|
if (Number(id)) {
|
||||||
|
return "magazineArticle";
|
||||||
|
}
|
||||||
|
//columnist or blog article
|
||||||
|
index += url.toString().substr(index + 1).indexOf(''/'');
|
||||||
|
id = url.toString().substr(index + 2, 5);
|
||||||
|
Zotero.Utilities.cleanString(id);
|
||||||
|
if (Number(id) && url.toString().search(''blog'') == -1) {
|
||||||
|
return "magazineArticle";
|
||||||
|
}
|
||||||
|
else if (Number(id)) {
|
||||||
|
return "blogPost";
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return null;
|
||||||
|
}',
|
||||||
|
'function scrape(doc, url, title) {
|
||||||
|
var index = url.toString().indexOf(''.org/'') + 5;
|
||||||
|
index += url.toString().substr(index).indexOf(''/'');
|
||||||
|
if (index != -1) {
|
||||||
|
// ordinary aritcle
|
||||||
|
var id = url.toString().substr(index + 1, 5);
|
||||||
|
Zotero.Utilities.cleanString(id);
|
||||||
|
if (Number(id)) {
|
||||||
|
var newItem = new Zotero.Item("magazineArticle");
|
||||||
|
}
|
||||||
|
//columnist or blog article
|
||||||
|
index += url.toString().substr(index + 1).indexOf(''/'');
|
||||||
|
id = url.toString().substr(index + 2, 5);
|
||||||
|
Zotero.Utilities.cleanString(id);
|
||||||
|
if (Number(id) && url.toString().search(''blog'') == -1) {
|
||||||
|
var newItem = new Zotero.Item("magazineArticle");
|
||||||
|
}
|
||||||
|
else if (Number(id)) {
|
||||||
|
var newItem = new Zotero.Item("blogPost");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
newItem.url = url;
|
||||||
|
newItem.title = title;
|
||||||
|
|
||||||
|
if (newItem.itemType == "magazineArticle") {
|
||||||
|
newItem.publicationTitle = "AlterNet";
|
||||||
|
newItem.repository = "alternet.org";
|
||||||
|
}
|
||||||
|
else if (newItem.itemType == "blogPost") {
|
||||||
|
newItem.websiteType = "AlterNet Blog";
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
// general scraping variables
|
||||||
|
var xpath;
|
||||||
|
|
||||||
|
|
||||||
|
// author
|
||||||
|
if (newItem.itemType == "magazineArticle") {
|
||||||
|
xpath = ''//p[@class="storybyline"]//a[contains(@href,"author")]'';
|
||||||
|
}
|
||||||
|
else if (newItem.itemType == "blogPost") {
|
||||||
|
xpath = ''//p[@class="storybyline"]//a[contains(@href,"bloggers")]'';
|
||||||
|
}
|
||||||
|
temp = doc.evaluate(xpath, doc, null, XPathResult.ANY_TYPE, null).iterateNext();
|
||||||
|
if (temp) {
|
||||||
|
var author = Zotero.Utilities.trimInternal(temp.textContent);
|
||||||
|
if(author.substr(0, 3).toLowerCase() == "by ") {
|
||||||
|
author = author.substr(3);
|
||||||
|
}
|
||||||
|
|
||||||
|
var authors = author.split(",");
|
||||||
|
for each (var author in authors) {
|
||||||
|
newItem.creators.push(Zotero.Utilities.cleanAuthor(author, "author"));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// date
|
||||||
|
if (newItem.itemType == "magazineArticle") {
|
||||||
|
xpath = ''//p[@class="storybyline"]//a[contains(@href,"date")]'';
|
||||||
|
temp = doc.evaluate(xpath, doc, null, XPathResult.ANY_TYPE, null).iterateNext();
|
||||||
|
var date = Zotero.Utilities.strToDate(temp.textContent);
|
||||||
|
}
|
||||||
|
else if (newItem.itemType == "blogPost") {
|
||||||
|
xpath = ''//p[@class="storybyline"]/b'';
|
||||||
|
temp = doc.evaluate(xpath, doc, null, XPathResult.ANY_TYPE, null).iterateNext();
|
||||||
|
var begin = temp.textContent.lastIndexOf(" on ");
|
||||||
|
temp = temp.textContent.substr(begin + 4);
|
||||||
|
var date = Zotero.Utilities.strToDate(temp.substr(0, temp.length - 1));
|
||||||
|
}
|
||||||
|
if (date != null) {
|
||||||
|
var strdate;
|
||||||
|
|
||||||
|
date.month = date.month + 1;
|
||||||
|
|
||||||
|
strdate = date.year + ''-'';
|
||||||
|
if (date.month < 10) {
|
||||||
|
strdate += ''0'' + date.month;
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
strdate += date.month;
|
||||||
|
}
|
||||||
|
if (date.day > 10) {
|
||||||
|
strdate += ''-'' + date.day;
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
strdate += ''-0'' + date.day;
|
||||||
|
}
|
||||||
|
|
||||||
|
newItem.date = strdate;
|
||||||
|
}
|
||||||
|
|
||||||
|
// abstract
|
||||||
|
xpath = ''//div[@class="teaser"]//div[contains(@class,"teaser")]'';
|
||||||
|
temp = doc.evaluate(xpath, doc, null, XPathResult.ANY_TYPE, null).iterateNext();
|
||||||
|
if (temp) {
|
||||||
|
newItem.abstractNote = Zotero.Utilities.trimInternal(temp.textContent);
|
||||||
|
}
|
||||||
|
|
||||||
|
// article snapshot
|
||||||
|
// grabs 5-digit article code from url and uses it to derive printable page url for use in article snapshot
|
||||||
|
var index = url.toString().indexOf(''.org/'') + 5;
|
||||||
|
index += url.toString().substr(index).indexOf(''/'');
|
||||||
|
if (index != -1) {
|
||||||
|
var printurl;
|
||||||
|
// ordinary article
|
||||||
|
var id = url.toString().substr(index + 1, 5);
|
||||||
|
if (Number(id)) {
|
||||||
|
printurl = "http://www.alternet.org/module/printversion/" + id;
|
||||||
|
newItem.attachments.push({url:printurl, title:"AlterNet Article Snapshot", mimeType:"text/html"});
|
||||||
|
}
|
||||||
|
// columnist article
|
||||||
|
else {
|
||||||
|
index += url.toString().substr(index + 1).indexOf(''/'');
|
||||||
|
id = url.toString().substr(index + 2, 5);
|
||||||
|
Zotero.Utilities.cleanString(id);
|
||||||
|
if (Number(id)) {
|
||||||
|
printurl = "http://www.alternet.org/module/printversion/" + id;
|
||||||
|
if (newItem.itemType == "blogPost") {
|
||||||
|
printurl += "/?type=blog";
|
||||||
|
}
|
||||||
|
newItem.attachments.push({url:printurl, title:"AlterNet Article Snapshot", mimeType:"text/html"});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
newItem.complete();
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
function doWeb(doc, url) {
|
||||||
|
// ordinary and columnist articles
|
||||||
|
var xpath = ''//p[@class="storyheadline"]'';
|
||||||
|
var title;
|
||||||
|
if (title = doc.evaluate(xpath, doc, null, XPathResult.ANY_TYPE, null).iterateNext()) {
|
||||||
|
scrape(doc, url, title.textContent);
|
||||||
|
}
|
||||||
|
|
||||||
|
return null;
|
||||||
|
}');
|
||||||
|
|
||||||
REPLACE INTO translators VALUES ('56ea09bc-57ee-4f50-976e-cf7cb1f6c6d8', '1.0.0b4.r5', '', '2008-04-23 09:45:00', '0', '100', '4', 'Royal Society Publishing', 'Michael Berkowitz', 'http://journals.royalsociety.org/',
|
REPLACE INTO translators VALUES ('56ea09bc-57ee-4f50-976e-cf7cb1f6c6d8', '1.0.0b4.r5', '', '2008-04-23 09:45:00', '0', '100', '4', 'Royal Society Publishing', 'Michael Berkowitz', 'http://journals.royalsociety.org/',
|
||||||
'function detectWeb(doc, url) {
|
'function detectWeb(doc, url) {
|
||||||
if (doc.evaluate(''//div[@class="listItemName"]/a'', doc, null, XPathResult.ANY_TYPE, null).iterateNext()) {
|
if (doc.evaluate(''//div[@class="listItemName"]/a'', doc, null, XPathResult.ANY_TYPE, null).iterateNext()) {
|
||||||
|
|
Loading…
Reference in New Issue
Block a user