-Adds ARTstor translator. Needs some testing.

This commit is contained in:
Michael Berkowitz 2008-04-30 19:55:32 +00:00
parent 9073f3405e
commit 8baee8e8b3

View File

@ -22,7 +22,7 @@
-- Set the following timestamp to the most recent scraper update date
REPLACE INTO version VALUES ('repository', STRFTIME('%s', '2008-04-30 19:30:00'));
REPLACE INTO version VALUES ('repository', STRFTIME('%s', '2008-04-30 21:00:00'));
REPLACE INTO translators VALUES ('96b9f483-c44d-5784-cdad-ce21b984fe01', '1.0.0b4.r1', '', '2008-03-21 20:00:00', '1', '100', '4', 'Amazon.com', 'Sean Takats and Michael Berkowitz', '^https?://(?:www\.)?amazon',
'function detectWeb(doc, url) {
@ -1089,6 +1089,239 @@ REPLACE INTO translators VALUES ('88915634-1af6-c134-0171-56fd198235ed', '1.0.0b
Zotero.wait();
}');
REPLACE INTO translators VALUES ('5278b20c-7c2c-4599-a785-12198ea648bf', '1.0.0b4.r5', '', '2008-04-30 21:00:00', '0', '100', '4', 'ARTstor', 'Ameer Ahmed and Michael Berkowitz', 'http://web2.artstor.org',
'function detectWeb(doc, url) {
if (url.match(''Search'')) return "multiple"
}',
'function doWeb(doc, url) {
if (url.indexOf("|")!=-1){
scrape(doc, url);
}
}
function scrape(doc, url){
var savedItems = new Array();
var saved = 0;
var urlstub = url.substring(url.indexOf(''.org/'')+5,url.length);
urlstub = url.substring(0,url.indexOf(''.org/'')+5) + urlstub.substring(0, urlstub.indexOf(''/'')+1)
var suburl = url.substring(url.indexOf(''|'')+1, url.length);
var groupname = suburl.substring(0, suburl.indexOf("|"));
var searchterm = ''//*[@id="thumbSubTitle"]'';
var stt = doc.evaluate(searchterm, doc, null, XPathResult.ANY_TYPE, null).iterateNext();
var st = stt.firstChild.nodeValue;
var pageNn = ''//*[@id="pageNo"]'';
var stt = doc.evaluate(pageNn, doc, null, XPathResult.ANY_TYPE, null).iterateNext();
var pg;
if (stt.value==1){
pg = 1;
} else if (stt.value==2){
pg = 25;
} else {
pg = ((stt.value-1) * 24) + 1;
}
var groupid;
//check if user is on search page if not construct the query using the 2nd pattern
if (groupname.indexOf("search")!=-1){
groupid = "1/" + pg + "/24/0?tn=1&name=&id=all&kw=" +st + "&type=1";
}else {
groupid = suburl.substring(suburl.indexOf(''|'')+1, suburl.indexOf(''||'')) + "//thumbnails/" + pg + "/24/0";
}
// Initial query to get results from the service - primary purpose is to get objectids. which in turn are required for the 2nd service call, which exposes the actual metadata
Zotero.Utilities.HTTP.doGet(urlstub + "secure/" + groupname + "//" + groupid, function(text) {
json = eval("(" + text + ")");
items = new Object();
for(var i=0; i<json.thumbnails.length; i++) {
child = json.thumbnails[i];
var tmpUrl = urlstub + "secure/metadata/" + child.objectId + "?_method=Infolder";
//here we are saving the url service call to get each objects metadata
savedItems[saved] = tmpUrl;
items[tmpUrl]=child.objectId;
saved++;
}
// GET VALUES FROM THE WEB
var xpath = ''//div[@id="thumbContentWrap"]'';
var elmts = doc.evaluate(xpath, doc, null, XPathResult.ANY_TYPE, null).iterateNext();
var webitems = new Object();
var selectedNums = new Array();
var si=0;
var c = elmts.getElementsByTagName(''*'');
var title = "";
for(var i=0; i<c.length; i++) {
var child = c[i];
if (child.id.indexOf("_imageHolder")!=-1){
var csss = child.style;
var glow = csss.getPropertyValue(''border'');
}
if (child.id.indexOf("_thumb1")!=-1){
title = child.title;
}
if (child.id.indexOf("_thumb2")!=-1){
title+= " :: " + child.title;
}
if (child.id.indexOf("_thumb3")!=-1){
var childtitle = child.title;
var dialogTitle = title;
if (childtitle.length>1) {
dialogTitle+=" " + childtitle;
}
var sitem = child.id.substring(6,child.id.indexOf("_"));
webitems[sitem-1] = dialogTitle;
if (glow.indexOf(75)!=-1){
selectedNums[si]=sitem-1;
si++;
}
title = null;
}
}
// GET VALUES FROM THE WEB
var tcount=0;
var newitems = null;
if (selectedNums.length>0){
newitems = new Object();
for (j=0; j<selectedNums.length;j++){
var numnum = selectedNums[j];
for (var x in items){
if (numnum==tcount){
newitems[x] = webitems[tcount];
tcount=0;
break;
} else {
tcount++;
}
}
}
} else {
tcount = 0;
for (var x in items){
items[x] = webitems[tcount];
tcount++;
}
}
if (newitems!=null){
items = newitems;
}
//show dialogbox
var items = Zotero.selectItems(items);
if(!items) {
return true;
}
var urls = new Array();
for(var i in items) {
urls.push(i);
}
//this gets called when an object is selected in the dialog box, fires off a get on the service url
Zotero.Utilities.HTTP.doGet(urls, function(text) {
json = eval("(" + text + ")");
var newArticle = new Zotero.Item(''artwork'');
for (var i=0; i<json.metaData.length; i++) {
child = json.metaData[i];
// MISSING CULTURE!!!
if (child.fieldName.indexOf("Title")!=-1){
if (newArticle.title!=null){
newArticle.title+= ";" + child.fieldValue;
} else {
newArticle.title = child.fieldValue;
}
}
if (child.fieldName.indexOf("Creator")!=-1){
var aut = child.fieldValue.match(/^([^,]+),\s+(.*)$/);
newArticle.notes.push({note:"Artist information: " + aut[2]});
newArticle.creators.push(Zotero.Utilities.cleanAuthor(aut[1], "artist"));
}
if (child.fieldName.indexOf("Culture")!=-1){
newArticle.creators.push(Zotero.Utilities.cleanAuthor(child.fieldValue, "producer", true));
}
if (child.fieldName.indexOf("Rights")!=-1){
if (newArticle.rights!=null){
newArticle.rights+= ";" + child.fieldValue.replace(/<wbr\/>/g, "");
} else {
newArticle.rights = child.fieldValue.replace(/<wbr\/>/g, "");
}
}
if (child.fieldName.indexOf("Subject")!=-1){
newArticle.tags.push(Zotero.Utilities.trimInternal(child.fieldValue));
}
if (child.fieldName.indexOf("Location")!=-1){
newArticle.tags.push(Zotero.Utilities.trimInternal(child.fieldValue));
}
if (child.fieldName.indexOf("Style Period")!=-1){
newArticle.tags.push(Zotero.Utilities.trimInternal(child.fieldValue));
}
if (child.fieldName.indexOf("Work Type")!=-1){
newArticle.tags.push(Zotero.Utilities.trimInternal(child.fieldValue));
}
if (child.fieldName.indexOf("Material")!=-1 || child.fieldName.indexOf("Technique")!=-1 ){
if (newArticle.artworkMedium!=null){
newArticle.artworkMedium+= ";" + Zotero.Utilities.trimInternal(child.fieldValue);
} else {
newArticle.artworkMedium = Zotero.Utilities.trimInternal(child.fieldValue);
}
}
if (child.fieldName.indexOf("Measurements")!=-1){
if (newArticle.artworkSize!=null){
newArticle.artworkSize+= ";" + Zotero.Utilities.trimInternal(child.fieldValue);
} else {
newArticle.artworkSize = Zotero.Utilities.trimInternal(child.fieldValue);
}
}
if (child.fieldName.indexOf("Date")!=-1){
if (newArticle.date!=null){
newArticle.date+= ";" + Zotero.Utilities.trimInternal(child.fieldValue);
} else {
//bug here!! when date parser fails, entire object is not saved in Zotero - works in Scaffold, fails in Zotero! to patch remove all occurrences of B.C
newArticle.date = Zotero.Utilities.trimInternal(child.fieldValue.replace(/B.C./i, ""));
}
}
if (child.fieldName.indexOf("Repository")!=-1){
if (newArticle.repository!=null){
newArticle.repository+= ";" + Zotero.Utilities.trimInternal(child.fieldValue);
} else {
newArticle.repository = Zotero.Utilities.trimInternal(child.fieldValue);
}
}
if (child.fieldName.indexOf("Source")!=-1){
if (newArticle.archiveLocation!=null){
newArticle.archiveLocation+= ";" + Zotero.Utilities.trimInternal(child.fieldValue);
} else {
newArticle.archiveLocation = Zotero.Utilities.trimInternal(child.fieldValue);
}
}
if (child.fieldName.indexOf("Description")!=-1){
if (newArticle.abstractNote!=null){
newArticle.abstractNote+= ";" + Zotero.Utilities.trimInternal(child.fieldValue);
} else {
newArticle.abstractNote = Zotero.Utilities.trimInternal(child.fieldValue);
}
}
if (child.fieldName.indexOf("Collection")!=-1){
if (newArticle.extra!=null){
newArticle.extra+= ";" + Zotero.Utilities.trimInternal(child.fieldValue);
} else {
newArticle.extra = Zotero.Utilities.trimInternal(child.fieldValue);
}
}
}
var objectId = json.objectId;
//this is called to get the url stub for the ARTstor viewer
Zotero.Utilities.HTTP.doGet(urlstub + "secure/metadata/" + objectId + "?_method=FpHtml", function(dom) {
var testurl = dom.substring(dom.lastIndexOf(''<td class="data">'')+21,dom.lastIndexOf(''</td>''));
var t2 = "http://www.artstor.org";
var tmp2 = testurl.replace(/<wbr\/>/g, "");
tmp2 = tmp2.substring(0, tmp2.indexOf("&userId"));
//build ARTstorImageURL
artstorimgurl = t2+tmp2;
newArticle.url = artstorimgurl;
newArticle.callNumber = objectId;
newArticle.complete();
Zotero.done();
});
Zotero.wait();
});
Zotero.wait();
});
Zotero.wait();
}');
REPLACE INTO translators VALUES ('79f6f9ed-537a-4d4f-8270-c4fbaafdf327', '1.0.0b4.r5', '', '2008-04-30 19:30:00', '0', '100', '4', 'Emerald Publishing', 'Michael Berkowitz', 'www.emeraldinsight.com/',
'function detectWeb(doc, url) {
if (url.match(''searchQuickOptions.do'')) {