zotero/translators/OCLC WorldCat FirstSearch.js

207 lines
6.2 KiB
JavaScript

{
"translatorID":"838d8849-4ffb-9f44-3d0d-aa8a0a079afe",
"translatorType":4,
"label":"OCLC WorldCat FirstSearch",
"creator":"Simon Kornblith",
"target":"https?://[^/]*firstsearch\\.oclc\\.org[^/]*/WebZ/",
"minVersion":"1.0.0b3.r1",
"maxVersion":"",
"priority":100,
"inRepository":true,
"lastUpdated":"2011-01-11 04:31:00"
}
function detectWeb(doc, url) {
var detailRe = /FirstSearch: [\w ]+ Detailed Record/;
var searchRe = /FirstSearch: [\w ]+ List of Records/;
if(detailRe.test(doc.title)) {
return "book";
} else if(searchRe.test(doc.title)) {
return "multiple";
}
}
function processURLs(urls) {
if(!urls.length) { // last url
Zotero.done();
return;
}
var newUrl = urls.shift();
Zotero.Utilities.HTTP.doPost(newUrl,
'exportselect=record&exporttype=plaintext', function(text) {
var lineRegexp = new RegExp();
lineRegexp.compile("^([\\w() ]+): *(.*)$");
var newItem = new Zotero.Item("book");
newItem.extra = "";
var lines = text.split('\n');
for(var i=0;i<lines.length;i++) {
var testMatch = lineRegexp.exec(lines[i]);
if(testMatch) {
var match = newMatch;
var newMatch = testMatch
} else {
var match = false;
}
if(match) {
// is a useful match
if(match[1] == 'Title') {
var title = match[2];
if(!lineRegexp.test(lines[i+1])) {
i++;
title += ' '+lines[i];
}
if(title.substring(title.length-2) == " /") {
title = title.substring(0, title.length-2);
}
newItem.title = Zotero.Utilities.capitalizeTitle(title);
} else if(match[1] == "Series") {
newItem.series = match[2];
} else if(match[1] == "Description") {
var pageMatch = /([0-9]+) p\.?/
var m = pageMatch.exec(match[2]);
if(m) {
newItem.pages = m[1];
}
} else if(match[1] == 'Author(s)' || match[1] == "Corp Author(s)") {
var yearRegexp = /[0-9]{4}-([0-9]{4})?/;
var authors = match[2].split(';');
if(authors) {
newItem.creators.push(Zotero.Utilities.cleanAuthor(authors[0], "author", true));
for(var j=1; j<authors.length; j+=2) {
if(authors[j-1].substring(0, 1) != '(' && !yearRegexp.test(authors[j])) {
// ignore places where there are parentheses
newItem.creators.push({lastName:authors[j], creatorType:"author", fieldMode:true});
}
}
} else {
newItem.creators.push(Zotero.Utilities.trimInternal(match[2]));
}
} else if(match[1] == 'Publication') {
match[2] = Zotero.Utilities.trimInternal(match[2]);
if(match[2].substring(match[2].length-1) == ',') {
match[2] = match[2].substring(0, match[2].length-1);
}
// most, but not all, WorldCat publisher/places are
// colon delimited
var parts = match[2].split(/ ?: ?/);
if(parts.length == 2) {
newItem.place = parts[0];
newItem.publisher = parts[1];
} else {
newItem.publisher = match[2];
}
} else if(match[1] == 'Institution') {
newItem.publisher = match[2];
} else if(match[1] == 'Standard No') {
var ISBNRe = /ISBN:\s*([0-9X]+)/
var m = ISBNRe.exec(match[2]);
if(m) newItem.ISBN = m[1];
} else if(match[1] == 'Year') {
newItem.date = match[2];
} else if(match[1] == "Descriptor") {
if(match[2][match[2].length-1] == ".") {
match[2] = match[2].substr(0, match[2].length-1);
}
var tags = match[2].split("--");
for(var j in tags) {
newItem.tags.push(Zotero.Utilities.trimInternal(tags[j]));
}
} else if(match[1] == "Accession No") {
newItem.accessionNumber = Zotero.Utilities.superCleanString(match[2]);
} else if(match[1] == "Degree") {
newItem.itemType = "thesis";
newItem.thesisType = match[2];
} else if(match[1] == "DOI") {
newItem.DOI = match[2];
} else if(match[1] == "Database") {
if(match[2].substr(0, 8) != "WorldCat") {
newItem.itemType = "journalArticle";
}
} else if(match[1] != "Availability" &&
match[1] != "Find Items About" &&
match[1] != "Document Type") {
newItem.extra += match[1]+": "+match[2]+"\n";
}
} else {
if(lines[i] != "" && lines[i] != "SUBJECT(S)") {
newMatch[2] += " "+lines[i];
}
}
}
if(newItem.extra) {
newItem.extra = newItem.extra.substr(0, newItem.extra.length-1);
}
newItem.complete();
processURLs(urls);
}, false, 'iso-8859-1');
}
function doWeb(doc, url) {
var sessionRegexp = /(?:\?|\:)sessionid=([^?:]+)(?:\?|\:|$)/;
var numberRegexp = /(?:\?|\:)recno=([^?:]+)(?:\?|\:|$)/;
var resultsetRegexp = /(?:\?|\:)resultset=([^?:]+)(?:\?|\:|$)/;
var hostRegexp = new RegExp("^(https?://[^/]+)/");
var sMatch = sessionRegexp.exec(url);
var sessionid = sMatch[1];
var hMatch = hostRegexp.exec(url);
var host = hMatch[1];
var newUri, exportselect;
var detailRe = /FirstSearch: [\w ]+ Detailed Record/;
if(detailRe.test(doc.title)) {
var publisherRegexp = /^(.*), (.*?),?$/;
var nMatch = numberRegexp.exec(url);
if(nMatch) {
var number = nMatch[1];
} else {
number = 1;
}
var rMatch = resultsetRegexp.exec(url);
if(rMatch) {
var resultset = rMatch[1];
} else {
// It's in an XPCNativeWrapper, so we have to do this black magic
resultset = doc.forms.namedItem('main').elements.namedItem('resultset').value;
}
urls = [host+'/WebZ/DirectExport?numrecs=10:smartpage=directexport:entityexportnumrecs=10:entityexportresultset=' + resultset + ':entityexportrecno=' + number + ':sessionid=' + sessionid + ':entitypagenum=35:0'];
} else {
var items = Zotero.Utilities.getItemArray(doc, doc, '/WebZ/FSFETCH\\?fetchtype=fullrecord', '^(See more details for locating this item|Detailed Record)$');
items = Zotero.selectItems(items);
if(!items) {
return true;
}
var urls = new Array();
for(var i in items) {
var nMatch = numberRegexp.exec(i);
var rMatch = resultsetRegexp.exec(i);
if(rMatch && nMatch) {
var number = nMatch[1];
var resultset = rMatch[1];
urls.push(host+'/WebZ/DirectExport?numrecs=10:smartpage=directexport:entityexportnumrecs=10:entityexportresultset=' + resultset + ':entityexportrecno=' + number + ':sessionid=' + sessionid + ':entitypagenum=35:0');
}
}
}
processURLs(urls);
Zotero.wait();
}