-Adds Alexander Street Press translator with added handling for search results.
This commit is contained in:
parent
86c13dca35
commit
10e1ecedba
188
scrapers.sql
188
scrapers.sql
|
@ -22,7 +22,7 @@
|
|||
|
||||
|
||||
-- Set the following timestamp to the most recent scraper update date
|
||||
REPLACE INTO version VALUES ('repository', STRFTIME('%s', '2008-02-27 15:00:00'));
|
||||
REPLACE INTO version VALUES ('repository', STRFTIME('%s', '2008-02-27 17:00:00'));
|
||||
|
||||
REPLACE INTO translators VALUES ('96b9f483-c44d-5784-cdad-ce21b984fe01', '1.0.0b4.r1', '', '2007-06-21 20:00:00', '1', '100', '4', 'Amazon.com', 'Sean Takats', '^https?://(?:www\.)?amazon',
|
||||
'function detectWeb(doc, url) {
|
||||
|
@ -1579,6 +1579,192 @@ function doWeb(doc, url) {
|
|||
}
|
||||
}');
|
||||
|
||||
REPLACE INTO translators VALUES ('0a84a653-79ea-4c6a-8a68-da933e3b504a', '1.0.0b4.r5', '', '2008-02-27 17:00:00', '0', '100', '4', 'Alexander Street Press', 'John West and Michael Berkowitz', '^http://(?:www\.)|(?:asp6new\.)alexanderstreet',
|
||||
'function detectWeb(doc, url) {
|
||||
if( url.indexOf("object.details.aspx") != -1 ) {
|
||||
var zitemtype = doc.getElementById("ctl00_ctl00_MasterContentBody_ContentPlaceHolder1_txtZType").value;
|
||||
switch (zitemtype.toLowerCase()) {
|
||||
case "book":
|
||||
return "book";
|
||||
break;
|
||||
case "chapter":
|
||||
return "bookSection";
|
||||
break;
|
||||
case "journal":
|
||||
return "journalArticle";
|
||||
break;
|
||||
case "manuscript":
|
||||
return "manuscript";
|
||||
break;
|
||||
case "audio":
|
||||
return "audioRecording";
|
||||
break;
|
||||
case "video":
|
||||
return "videoRecording";
|
||||
break;
|
||||
case "issue":
|
||||
return "journalArticle";
|
||||
break;
|
||||
case "article":
|
||||
return "journalArticle";
|
||||
break;
|
||||
case "series":
|
||||
return "interview";
|
||||
break;
|
||||
case "session":
|
||||
return "interview";
|
||||
break;
|
||||
default:
|
||||
return "document";
|
||||
}
|
||||
} else if (url.indexOf("results.aspx") != -1) {
|
||||
return "multiple";
|
||||
}
|
||||
}',
|
||||
'function scrape(doc, url) {
|
||||
|
||||
// set prefix for serverside control
|
||||
var p = "ctl00_ctl00_MasterContentBody_ContentPlaceHolder1_txtZ";
|
||||
|
||||
// get values from hidden inputs
|
||||
var ztype = GetItemType(doc.getElementById(p+"Type").value);
|
||||
var ztitle = doc.getElementById(p+"Title").value;
|
||||
var zbooktitle = doc.getElementById(p+"BookTitle").value;
|
||||
var znotes = doc.getElementById(p+"Notes").value;
|
||||
var zurl = doc.getElementById(p+"URL").value;
|
||||
var zrights = doc.getElementById(p+"Rights").value;
|
||||
var zseries = doc.getElementById(p+"Series").value;
|
||||
var zvolume = doc.getElementById(p+"Volume").value;
|
||||
var zissue = doc.getElementById(p+"Issue").value;
|
||||
var zedition = doc.getElementById(p+"Edition").value;
|
||||
var zplace = doc.getElementById(p+"Place").value;
|
||||
var zpublisher = doc.getElementById(p+"Publisher").value;
|
||||
var zpages = doc.getElementById(p+"Pages").value;
|
||||
var zrepository = doc.getElementById(p+"Repository").value;
|
||||
var zlabel = doc.getElementById(p+"Label").value;
|
||||
var zrunningTime = doc.getElementById(p+"RunningTime").value;
|
||||
var zlanguage = doc.getElementById(p+"Language").value;
|
||||
var zauthor = doc.getElementById(p+"Author").value;
|
||||
var zeditor = doc.getElementById(p+"Editor").value;
|
||||
var ztranslator = doc.getElementById(p+"Translator").value;
|
||||
var zinterviewee = doc.getElementById(p+"Interviewee").value;
|
||||
var zinterviewer = doc.getElementById(p+"Interviewer").value;
|
||||
var zrecipient = doc.getElementById(p+"Recipient").value;
|
||||
var zdirector = doc.getElementById(p+"Director").value;
|
||||
var zscriptwriter = doc.getElementById(p+"ScriptWriter").value;
|
||||
var zproducer = doc.getElementById(p+"Producer").value;
|
||||
var zcastMember = doc.getElementById(p+"CastMember").value;
|
||||
var zperformer = doc.getElementById(p+"Performer").value;
|
||||
var zcomposer = doc.getElementById(p+"Composer").value;
|
||||
|
||||
// create Zotero item
|
||||
var newArticle = new Zotero.Item(ztype);
|
||||
|
||||
// populate Zotero item
|
||||
newArticle.title = ztitle;
|
||||
newArticle.bookTitle = zbooktitle;
|
||||
newArticle.notes = znotes;
|
||||
newArticle.url = zurl;
|
||||
newArticle.place = zplace;
|
||||
newArticle.publisher = zpublisher;
|
||||
newArticle.pages = zpages;
|
||||
newArticle.rights = zrights;
|
||||
newArticle.series = zseries;
|
||||
newArticle.volume = zvolume;
|
||||
newArticle.issue = zissue;
|
||||
newArticle.edition = zedition;
|
||||
newArticle.repository = zrepository;
|
||||
newArticle.label = zlabel;
|
||||
newArticle.runningTime = zrunningTime;
|
||||
newArticle.language = zlanguage;
|
||||
newArticle.editor = zeditor;
|
||||
newArticle.translator = ztranslator;
|
||||
newArticle.interviewee = zinterviewee;
|
||||
newArticle.interviewer = zinterviewer;
|
||||
newArticle.recipient = zrecipient;
|
||||
newArticle.director = zdirector;
|
||||
newArticle.scriptwriter = zscriptwriter;
|
||||
newArticle.producer = zproducer;
|
||||
newArticle.castMember = zcastMember;
|
||||
newArticle.performer = zperformer;
|
||||
newArticle.composer = zcomposer;
|
||||
var aus = zauthor.split(";");
|
||||
for (var i=0; i< aus.length ; i++) {
|
||||
newArticle.creators.push(Zotero.Utilities.cleanAuthor(aus[i], "author", true));
|
||||
}
|
||||
|
||||
newArticle.attachments = [{url:doc.location.href, title:"Alexander Street Press Snapshot", mimeType:"text/html"}];
|
||||
if (doc.evaluate(''//a[contains(@href, "get.pdf")]'', doc, null, XPathResult.ANY_TYPE, null).iterateNext()) {
|
||||
var pdfurl = doc.evaluate(''//a[contains(@href, "get.pdf")]'', doc, null, XPathResult.ANY_TYPE, null).iterateNext().href;
|
||||
newArticle.attachments.push({url:pdfurl, title:"Alexander Street Press PDF", mimeType:"application/pdf"});
|
||||
} else if (doc.evaluate(''//a[contains(@href, "get.jpg")]'', doc, null, XPathResult.ANY_TYPE, null).iterateNext()) {
|
||||
var imgurl = doc.evaluate(''//a[contains(@href, "get.jpg")]'', doc, null, XPathResult.ANY_TYPE, null).iterateNext().href.replace(/.{2}$/, "01");
|
||||
newArticle.attachments.push({url:imgurl, title:"Alexander Street Press Pg 1", mimeType:"image/jpg"});
|
||||
newArticle.notes = [{note:"Further page images can be found by following the URL of the ''Alexander Street Press Pg 1'' attachment and iterating the final digits of the URL"}];
|
||||
}
|
||||
// save Zotero item
|
||||
newArticle.complete();
|
||||
|
||||
}
|
||||
|
||||
function GetItemType(zitemtype) {
|
||||
switch (zitemtype.toLowerCase()) {
|
||||
case "book":
|
||||
return "book";
|
||||
break;
|
||||
case "chapter":
|
||||
return "bookSection";
|
||||
break;
|
||||
case "journal":
|
||||
return "journalArticle";
|
||||
break;
|
||||
case "manuscript":
|
||||
return "manuscript";
|
||||
break;
|
||||
case "audio":
|
||||
return "audioRecording";
|
||||
break;
|
||||
case "video":
|
||||
return "videoRecording";
|
||||
break;
|
||||
case "issue":
|
||||
return "journalArticle";
|
||||
break;
|
||||
case "article":
|
||||
return "journalArticle";
|
||||
break;
|
||||
case "series":
|
||||
return "interview";
|
||||
break;
|
||||
case "session":
|
||||
return "interview";
|
||||
break;
|
||||
default:
|
||||
return "document";
|
||||
}
|
||||
}
|
||||
|
||||
function doWeb(doc, url) {
|
||||
var articles = new Array();
|
||||
if (detectWeb(doc, url) == "multiple") {
|
||||
var items = new Object();
|
||||
var xpath = ''//tbody/tr/td[2][@class="data"]/a[1]'';
|
||||
var titles = doc.evaluate(xpath, doc, null, XPathResult.ANY_TYPE, null);
|
||||
var next_title;
|
||||
while (next_title = titles.iterateNext()) {
|
||||
items[next_title.href] = next_title.textContent;
|
||||
}
|
||||
items = Zotero.selectItems(items);
|
||||
for (var i in items) {
|
||||
articles.push(i);
|
||||
}
|
||||
} else {
|
||||
articles = [url];
|
||||
}
|
||||
Zotero.debug(articles);
|
||||
Zotero.Utilities.processDocuments(articles, scrape, function() {Zotero.done;});
|
||||
}');
|
||||
|
||||
REPLACE INTO translators VALUES ('0abd577b-ec45-4e9f-9081-448737e2fd34', '1.0.0b4.r5', '', '2008-02-22 20:30:00', '0', '100', '4', 'DSpace', 'Michael Berkowitz', 'dspace',
|
||||
'function detectWeb(doc, url) {
|
||||
if (doc.evaluate(''//center/table[@class="itemDisplayTable"]'', doc, null, XPathResult.ANY_TYPE, null).iterateNext()) {
|
||||
|
|
Loading…
Reference in New Issue
Block a user