Fixes Project MUSE translator. At last!
This commit is contained in:
parent
235144fa34
commit
1c911ff169
139
scrapers.sql
139
scrapers.sql
|
@ -22,7 +22,7 @@
|
||||||
|
|
||||||
|
|
||||||
-- Set the following timestamp to the most recent scraper update date
|
-- Set the following timestamp to the most recent scraper update date
|
||||||
REPLACE INTO version VALUES ('repository', STRFTIME('%s', '2007-10-03 08:00:00'));
|
REPLACE INTO version VALUES ('repository', STRFTIME('%s', '2007-10-10 20:00:00'));
|
||||||
|
|
||||||
REPLACE INTO translators VALUES ('96b9f483-c44d-5784-cdad-ce21b984fe01', '1.0.0b4.r1', '', '2007-06-21 20:00:00', '1', '100', '4', 'Amazon.com', 'Sean Takats', '^https?://(?:www\.)?amazon',
|
REPLACE INTO translators VALUES ('96b9f483-c44d-5784-cdad-ce21b984fe01', '1.0.0b4.r1', '', '2007-06-21 20:00:00', '1', '100', '4', 'Amazon.com', 'Sean Takats', '^https?://(?:www\.)?amazon',
|
||||||
'function detectWeb(doc, url) {
|
'function detectWeb(doc, url) {
|
||||||
|
@ -7063,9 +7063,9 @@ REPLACE INTO translators VALUES ('0f9fc2fc-306e-5204-1117-25bca009dffc', '1.0.0b
|
||||||
Zotero.wait();
|
Zotero.wait();
|
||||||
}');
|
}');
|
||||||
|
|
||||||
REPLACE INTO translators VALUES ('c54d1932-73ce-dfd4-a943-109380e06574', '1.0.0b3.r1', '', '2007-06-12 23:30:00', 1, 100, 4, 'Project MUSE', 'Simon Kornblith', '^https?://muse\.jhu\.edu[^/]*/(?:journals/[^/]+/[^/]+/[^/]+\.html|search/pia.cgi)',
|
REPLACE INTO translators VALUES ('c54d1932-73ce-dfd4-a943-109380e06574', '1.0.0b4.r1', '', '2007-10-10 20:00:00', '1', '100', '4', 'Project MUSE', 'Simon Kornblith', '^https?://muse\.jhu\.edu[^/]*/(?:journals/[^/]+/[^/]+/[^/]+\.html|search/results)',
|
||||||
'function detectWeb(doc, url) {
|
'function detectWeb(doc, url) {
|
||||||
var searchRe = new RegExp("^https?://[^/]+/search/pia\.cgi");
|
var searchRe = new RegExp("^https?://[^/]+/search/results");
|
||||||
if(searchRe.test(url)) {
|
if(searchRe.test(url)) {
|
||||||
return "multiple";
|
return "multiple";
|
||||||
} else {
|
} else {
|
||||||
|
@ -7078,7 +7078,7 @@ REPLACE INTO translators VALUES ('c54d1932-73ce-dfd4-a943-109380e06574', '1.0.0b
|
||||||
if (prefix == ''x'') return namespace; else return null;
|
if (prefix == ''x'') return namespace; else return null;
|
||||||
} : null;
|
} : null;
|
||||||
|
|
||||||
var searchRe = new RegExp("^https?://[^/]+/search/pia\.cgi");
|
var searchRe = new RegExp("^https?://[^/]+/search/results");
|
||||||
if(searchRe.test(doc.location.href)) {
|
if(searchRe.test(doc.location.href)) {
|
||||||
var items = new Array();
|
var items = new Array();
|
||||||
var attachments = new Array();
|
var attachments = new Array();
|
||||||
|
@ -7090,11 +7090,11 @@ REPLACE INTO translators VALUES ('c54d1932-73ce-dfd4-a943-109380e06574', '1.0.0b
|
||||||
var tableRow;
|
var tableRow;
|
||||||
// Go through table rows
|
// Go through table rows
|
||||||
while(tableRow = tableRows.iterateNext()) {
|
while(tableRow = tableRows.iterateNext()) {
|
||||||
// article_id is what we need to get it all as one file
|
// aid (article id) is what we need to get it all as one file
|
||||||
var input = doc.evaluate(''./tbody/tr/td/input[@name="article_id"]'', tableRow, nsResolver, XPathResult.ANY_TYPE, null).iterateNext();
|
var input = doc.evaluate(''./tbody/tr/td/input[@name="aid"]'', tableRow, nsResolver, XPathResult.ANY_TYPE, null).iterateNext();
|
||||||
var link = doc.evaluate(''.//b/i/text()'', tableRow, nsResolver, XPathResult.ANY_TYPE, null).iterateNext();
|
var title = doc.evaluate(''.//b/i/text()'', tableRow, nsResolver, XPathResult.ANY_TYPE, null).iterateNext();
|
||||||
if(input && input.value && link && link.nodeValue) {
|
if(input && input.value && title && title.nodeValue) {
|
||||||
items[input.value] = link.nodeValue;
|
items[input.value] = title.nodeValue;
|
||||||
|
|
||||||
var aTags = tableRow.getElementsByTagName("a");
|
var aTags = tableRow.getElementsByTagName("a");
|
||||||
|
|
||||||
|
@ -7119,100 +7119,63 @@ REPLACE INTO translators VALUES ('c54d1932-73ce-dfd4-a943-109380e06574', '1.0.0b
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
try {
|
|
||||||
var search_id = doc.forms.namedItem("results").elements.namedItem("search_id").value;
|
|
||||||
} catch(e) {
|
|
||||||
var search_id = "";
|
|
||||||
}
|
|
||||||
var articleString = "";
|
var articleString = "";
|
||||||
var newAttachments = new Array();
|
var newAttachments = new Array();
|
||||||
for(var i in items) {
|
for(var i in items) {
|
||||||
articleString += "&article_id="+i;
|
articleString += "&aid="+i;
|
||||||
newAttachments.push(attachments[i]);
|
newAttachments.push(attachments[i]);
|
||||||
}
|
}
|
||||||
var savePostString = "actiontype=save&search_id="+search_id+articleString;
|
|
||||||
|
|
||||||
Zotero.Utilities.HTTP.doGet("http://muse.jhu.edu/search/save.cgi?"+savePostString, function() {
|
Zotero.Utilities.HTTP.doGet("http://muse.jhu.edu/search/export.cgi?exporttype=endnote"+articleString, function(text) {
|
||||||
Zotero.Utilities.HTTP.doGet("http://muse.jhu.edu/search/export.cgi?exporttype=endnote"+articleString, function(text) {
|
// load translator for RIS
|
||||||
Zotero.debug(text);
|
var translator = Zotero.loadTranslator("import");
|
||||||
// load translator for RIS
|
translator.setTranslator("32d59d2d-b65a-4da4-b0a3-bdd3cfb979e7");
|
||||||
var translator = Zotero.loadTranslator("import");
|
translator.setString(text);
|
||||||
translator.setTranslator("32d59d2d-b65a-4da4-b0a3-bdd3cfb979e7");
|
translator.setHandler("itemDone", function(obj, item) {
|
||||||
translator.setString(text);
|
if(item.notes && item.notes[0]) {
|
||||||
translator.setHandler("itemDone", function(obj, item) {
|
item.extra = item.notes[0].note;
|
||||||
if(item.notes && item.notes[0]) {
|
delete item.notes;
|
||||||
Zotero.debug(item.notes);
|
item.notes = undefined;
|
||||||
item.extra = item.notes[0].note;
|
}
|
||||||
|
item.attachments = newAttachments.shift();
|
||||||
delete item.notes;
|
item.complete();
|
||||||
item.notes = undefined;
|
});
|
||||||
}
|
translator.translate();
|
||||||
item.attachments = newAttachments.shift();
|
Zotero.done();
|
||||||
Zotero.debug(item.attachments);
|
|
||||||
item.complete();
|
|
||||||
});
|
|
||||||
translator.translate();
|
|
||||||
Zotero.done();
|
|
||||||
}, function() {});
|
|
||||||
}, function() {});
|
}, function() {});
|
||||||
|
|
||||||
Zotero.wait();
|
Zotero.wait();
|
||||||
} else {
|
} else {
|
||||||
var newItem = new Zotero.Item("journalArticle");
|
var hostRe = new RegExp("^(http://[^/]+)/");
|
||||||
newItem.url = url;
|
var m = hostRe.exec(url);
|
||||||
newItem.attachments.push({document:doc, title:"Project MUSE Snapshot"});
|
var host = m[1];
|
||||||
|
|
||||||
var getPDF = doc.evaluate(''//a[text() = "[Access article in PDF]"]'', doc,
|
var getPDF = doc.evaluate(''//a[text() = "[Access article in PDF]"]'', doc,
|
||||||
nsResolver, XPathResult.ANY_TYPE, null).iterateNext();
|
nsResolver, XPathResult.ANY_TYPE, null).iterateNext();
|
||||||
if(getPDF) {
|
|
||||||
newItem.attachments.push({title:"Project MUSE Full Text PDF", mimeType:"application/pdf",
|
|
||||||
url:getPDF.href});
|
|
||||||
}
|
|
||||||
|
|
||||||
var elmts = doc.evaluate(''//comment()'', doc, nsResolver,
|
var newUrl = url.replace(host, host+"/metadata/zotero");
|
||||||
XPathResult.ANY_TYPE, null);
|
Zotero.Utilities.HTTP.doGet(newUrl, function(text) {
|
||||||
|
var translator = Zotero.loadTranslator("import");
|
||||||
|
//set RIS translator
|
||||||
|
translator.setTranslator("32d59d2d-b65a-4da4-b0a3-bdd3cfb979e7");
|
||||||
|
translator.setString(text);
|
||||||
|
translator.setHandler("itemDone", function(obj, item) {
|
||||||
|
if(item.notes && item.notes[0]) {
|
||||||
|
item.extra = item.notes[0].note;
|
||||||
|
delete item.notes;
|
||||||
|
item.notes = undefined;
|
||||||
|
}
|
||||||
|
item.attachments.splice(0);
|
||||||
|
item.attachments.push({document:doc, title:"Project MUSE Snapshot"});
|
||||||
|
if(getPDF) {
|
||||||
|
item.attachments.push({title:"Project MUSE Full Text PDF", mimeType:"application/pdf",
|
||||||
|
url:getPDF.href});
|
||||||
|
}
|
||||||
|
|
||||||
var headerRegexp = /HeaderData((?:.|\n)*)\#\#EndHeaders/i
|
item.complete();
|
||||||
while(elmt = elmts.iterateNext()) {
|
});
|
||||||
if(elmt.nodeValue.substr(0, 10) == "HeaderData") {
|
translator.translate();
|
||||||
var m = headerRegexp.exec(elmt.nodeValue);
|
});
|
||||||
var headerData = m[1];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Use E4X rather than DOM/XPath, because the Mozilla gods have decided not to
|
|
||||||
// expose DOM/XPath to sandboxed scripts
|
|
||||||
var newDOM = new XML(headerData);
|
|
||||||
|
|
||||||
newItem.publicationTitle = newDOM.journal.text();
|
|
||||||
newItem.volume = newDOM.volume.text();
|
|
||||||
newItem.issue = newDOM.issue.text();
|
|
||||||
newItem.date = newDOM.pubdate.text().toString();
|
|
||||||
if(!newItem.date) {
|
|
||||||
newItem.date = newDOM.year.text();
|
|
||||||
}
|
|
||||||
newItem.title = newDOM.doctitle.text();
|
|
||||||
newItem.ISSN = newDOM.issn.text();
|
|
||||||
|
|
||||||
// Do pages
|
|
||||||
var fpage = newDOM.fpage.text();
|
|
||||||
var lpage = newDOM.lpage.text();
|
|
||||||
if(fpage != "") {
|
|
||||||
newItem.pages = fpage;
|
|
||||||
if(lpage) {
|
|
||||||
newItem.pages += "-"+lpage;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Do authors
|
|
||||||
var elmts = newDOM.docauthor;
|
|
||||||
for(var i in elmts) {
|
|
||||||
var fname = elmts[i].fname.text();
|
|
||||||
var surname = elmts[i].surname.text();
|
|
||||||
newItem.creators.push({firstName:fname, lastName:surname, creatorType:"author"});
|
|
||||||
}
|
|
||||||
|
|
||||||
newItem.complete();
|
|
||||||
}
|
}
|
||||||
}');
|
}');
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue
Block a user