541 lines
14 KiB
JavaScript
541 lines
14 KiB
JavaScript
{
|
|
"translatorID": "57a00950-f0d1-4b41-b6ba-44ff0fc30289",
|
|
"label": "Google Scholar",
|
|
"creator": "Simon Kornblith, Frank Bennett",
|
|
"target": "^https?://scholar\\.google\\.(?:com|com?\\.[a-z]{2}|[a-z]{2}|co\\.[a-z]{2})/scholar(?:_case)*",
|
|
"minVersion": "2.1.9",
|
|
"maxVersion": "",
|
|
"priority": 100,
|
|
"inRepository": true,
|
|
"translatorType": 4,
|
|
"browserSupport": "gcs",
|
|
"lastUpdated": "2011-07-04 13:18:22"
|
|
}
|
|
|
|
/*
|
|
* Test pages
|
|
*
|
|
* Searches of Google Scholar with the following terms should yield a folder
|
|
* icon that works. Check that unlinked ([CITATION]) items that provide
|
|
* no BibTeX data (there is currently one under "Marbury v. Madison",
|
|
* and "clifford" seems to be a good source of garbage) are
|
|
* dropped from the listings:
|
|
*
|
|
* marbury v madison
|
|
* kelo
|
|
* smith
|
|
* view of the cathedral
|
|
* clifford
|
|
*
|
|
* "How cited" pages should NOT yield a page or folder icon. The
|
|
* Urls to these currently look like this:
|
|
*
|
|
* http://scholar.google.co.jp/scholar_case?about=1101424605047973909&q=kelo&hl=en&as_sdt=2002
|
|
*
|
|
* Case pages should present a document icon that works:
|
|
*
|
|
* http://scholar.google.co.jp/scholar_case?case=18273389148555376997&hl=en&as_sdt=2002&kqfp=13204897074208725174&kql=186&kqpfp=16170611681001262513#kq
|
|
*/
|
|
|
|
|
|
/*
|
|
* ###############################
|
|
* ### detectWeb() and doWeb() ###
|
|
* ###############################
|
|
*/
|
|
|
|
var bogusItemID = 1;
|
|
|
|
var detectWeb = function (doc, url) {
|
|
// Icon shows only for search results and law cases
|
|
if (url.match(/scholar_case/)) {
|
|
if (url.match(/about=/)) {
|
|
return false;
|
|
} else {
|
|
return "case";
|
|
}
|
|
} else {
|
|
return "multiple";
|
|
}
|
|
};
|
|
|
|
function doWeb(doc, url) {
|
|
var haveBibTexLinks, nsResolver;
|
|
// Invoke the case or the listing scraper, as appropriate.
|
|
// In a listings page, this forces use of bibtex data and English page version
|
|
nsResolver = doc.createNSResolver(doc.documentElement);
|
|
if (url.match(/scholar_case/)) {
|
|
scrapeCase(doc, url);
|
|
} else {
|
|
haveBibTexLinks = doc.evaluate('//a[contains(@href, "scholar.bib")]',
|
|
doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext();
|
|
if(!haveBibTexLinks) {
|
|
url = url.replace (/hl\=[^&]*&?/, "");
|
|
url = url.replace("scholar?", "scholar_setprefs?hl=en&scis=yes&scisf=4&submit=Save+Preferences&");
|
|
Zotero.Utilities.processDocuments(url, function(scisigDoc) {
|
|
var scisig = scisigDoc.evaluate('//input[@name="scisig"]',
|
|
scisigDoc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext();
|
|
url = url + "&scisig="+scisig.value;
|
|
Zotero.Utilities.processDocuments(url, function(doc) {
|
|
scrapeListing(doc);
|
|
}, function() {});
|
|
}, function() {});
|
|
} else {
|
|
scrapeListing(doc);
|
|
}
|
|
Zotero.wait();
|
|
}
|
|
}
|
|
|
|
|
|
/*
|
|
* #########################
|
|
* ### Scraper Functions ###
|
|
* #########################
|
|
*/
|
|
var scrapeListing = function (doc) {
|
|
var nsResolver = doc.createNSResolver(doc.documentElement);
|
|
|
|
// XML fragment lists
|
|
var titleFrags = doc.evaluate('//div[@class="gs_r"]//h3', doc, nsResolver, XPathResult.ANY_TYPE, null);
|
|
var citeletFrags = doc.evaluate('//span[@class="gs_a"]', doc, nsResolver, XPathResult.ANY_TYPE, null);
|
|
var bibtexFrags = doc.evaluate('//a[contains(@href, "scholar.bib")]',
|
|
doc, nsResolver, XPathResult.ANY_TYPE, null);
|
|
|
|
var labels = [];
|
|
var factories = [];
|
|
|
|
while (true) {
|
|
var titleFrag = titleFrags.iterateNext();
|
|
if (!titleFrag) {
|
|
break;
|
|
}
|
|
// initialize argument values
|
|
var titleString = titleFrag.textContent;
|
|
var citeletString = citeletFrags.iterateNext().textContent;
|
|
var bibtexLink = bibtexFrags.iterateNext().href;
|
|
var attachmentFrag = doc.evaluate('.//a',
|
|
titleFrag, nsResolver, XPathResult.ANY_TYPE, null).iterateNext();
|
|
if (attachmentFrag) {
|
|
var attachmentLinks = [attachmentFrag.href];
|
|
} else {
|
|
var attachmentLinks = [];
|
|
}
|
|
|
|
// Instantiate item factory with available data
|
|
var factory = new ItemFactory(citeletString, attachmentLinks, titleString, bibtexLink);
|
|
|
|
if (!factory.hasUsefulData()) {
|
|
continue;
|
|
}
|
|
// (Feed the array used in the selection list)
|
|
if (factory.hyphenSplit.length) {
|
|
labels.push(titleString + " (" + factory.trailingInfo + ")");
|
|
} else {
|
|
labels.push(titleString);
|
|
}
|
|
factories.push(factory);
|
|
}
|
|
|
|
Zotero.selectItems(labels, function(items) {
|
|
if(!items) {
|
|
return false;
|
|
}
|
|
|
|
var newFactories = [];
|
|
for(var i in items) {
|
|
newFactories.push(factories[i]);
|
|
}
|
|
|
|
processFactories(newFactories);
|
|
});
|
|
return true;
|
|
};
|
|
|
|
function processFactories(factories) {
|
|
if(!factories.length) {
|
|
Zotero.done();
|
|
return;
|
|
}
|
|
|
|
var factory = factories.shift();
|
|
factory.getCourt();
|
|
factory.getVolRepPag();
|
|
if (factory.hasReporter()) {
|
|
// If we win here, we get by without fetching the BibTeX object at all.
|
|
factory.saveItem();
|
|
processFactories(factories);
|
|
} else {
|
|
var attachments;
|
|
|
|
// The only supplementary translator we use is BibTeX
|
|
var bibtexTranslator = Zotero.loadTranslator("import");
|
|
bibtexTranslator.setTranslator("9cb70025-a888-4a29-a210-93ec52da40d4");
|
|
bibtexTranslator.setHandler("itemDone", function(obj, item) {
|
|
item.attachments = attachments;
|
|
item.complete();
|
|
});
|
|
|
|
factory.getBibtexData(function(res) {
|
|
if (res) {
|
|
// Has BibTeX data with title, pass it through to the BibTeX translator
|
|
attachments = factory.getAttachments("Page");
|
|
bibtexTranslator.setString(res);
|
|
bibtexTranslator.setHandler("done", function() {
|
|
processFactories(factories);
|
|
});
|
|
bibtexTranslator.translate();
|
|
} else {
|
|
// If BibTeX is empty, this is some kind of case, if anything.
|
|
// Metadata from the citelet, supplemented by the target
|
|
// document for the docket number, if possible.
|
|
if (!factory.hasReporter()) {
|
|
factory.getDocketNumber(null, function() {
|
|
factory.saveItem();
|
|
processFactories(factories);
|
|
});
|
|
} else {
|
|
factory.saveItem();
|
|
processFactories(factories);
|
|
}
|
|
}
|
|
});
|
|
}
|
|
}
|
|
|
|
var scrapeCase = function (doc, url) {
|
|
// Citelet is identified by
|
|
// id="gsl_reference"
|
|
var nsResolver = doc.createNSResolver(doc.documentElement);
|
|
var refFrag = doc.evaluate('//div[@id="gsl_reference"]', doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext();
|
|
if (refFrag) {
|
|
// citelet looks kind of like this
|
|
// Powell v. McCormack, 395 US 486 - Supreme Court 1969
|
|
var item = new Zotero.Item("case");
|
|
var factory = new ItemFactory(refFrag.textContent, [url]);
|
|
factory.repairCitelet();
|
|
factory.getDate();
|
|
factory.getCourt();
|
|
factory.getVolRepPag();
|
|
if (!factory.hasReporter()) {
|
|
// Look for docket number in the current document
|
|
factory.getDocketNumber(doc);
|
|
}
|
|
factory.getTitle();
|
|
factory.saveItem();
|
|
}
|
|
};
|
|
|
|
|
|
/*
|
|
* ####################
|
|
* ### Item Factory ###
|
|
* ####################
|
|
*/
|
|
|
|
var ItemFactory = function (citeletString, attachmentLinks, titleString, bibtexLink) {
|
|
// var strings
|
|
this.v = {};
|
|
this.v.title = titleString;
|
|
this.v.number = false;
|
|
this.v.court = false;
|
|
this.v.extra = false;
|
|
this.v.date = undefined;
|
|
this.v.jurisdiction = false;
|
|
this.v.docketNumber = false;
|
|
this.vv = {};
|
|
this.vv.volRepPag = [];
|
|
// portable array
|
|
this.attachmentLinks = attachmentLinks;
|
|
// working strings
|
|
this.citelet = citeletString;
|
|
this.bibtexLink = bibtexLink;
|
|
this.bibtexData = undefined;
|
|
this.trailingInfo = false;
|
|
// simple arrays of strings
|
|
this.hyphenSplit = false;
|
|
this.commaSplit = false;
|
|
};
|
|
|
|
|
|
ItemFactory.prototype.repairCitelet = function () {
|
|
if (!this.citelet.match(/\s+-\s+/)) {
|
|
this.citelet = this.citelet.replace(/,\s+([A-Z][a-z]+:)/, " - $1");
|
|
}
|
|
};
|
|
|
|
|
|
ItemFactory.prototype.repairTitle = function () {
|
|
// All-caps words of four or more characters probably need fixing.
|
|
if (this.v.title.match(/(?:[^a-z]|^)[A-Z]{4,}(?:[^a-z]|$)/)) {
|
|
this.v.title = Zotero.Utilities.capitalizeTitle(this.v.title.toLowerCase()).replace(/([^0-9a-z])V([^0-9a-z])/, "$1v$2");
|
|
}
|
|
};
|
|
|
|
|
|
ItemFactory.prototype.hasUsefulData = function () {
|
|
if (this.getDate()) {
|
|
return true;
|
|
}
|
|
if (this.hasInitials()) {
|
|
return true;
|
|
}
|
|
return false;
|
|
};
|
|
|
|
|
|
ItemFactory.prototype.hasInitials = function () {
|
|
if (this.hyphenSplit.length && this.hyphenSplit[0].match(/[A-Z] /)) {
|
|
return true;
|
|
}
|
|
return false;
|
|
};
|
|
|
|
|
|
ItemFactory.prototype.hasReporter = function () {
|
|
if (this.vv.volRepPag.length > 0) {
|
|
return true;
|
|
}
|
|
return false;
|
|
};
|
|
|
|
|
|
ItemFactory.prototype.getDate = function () {
|
|
var i, m;
|
|
// Citelet parsing, step (1)
|
|
if (!this.hyphenSplit) {
|
|
this.hyphenSplit = this.citelet.split(/\s+-\s+/);
|
|
this.trailingInfo = this.hyphenSplit.slice(-1);
|
|
}
|
|
if (!this.v.date && this.v.date !== false) {
|
|
this.v.date = false;
|
|
for (i = this.hyphenSplit.length - 1; i > -1; i += -1) {
|
|
m = this.hyphenSplit[i].match(/(?:(.*)\s+)*([0-9]{4})$/);
|
|
if (m) {
|
|
this.v.date = m[2];
|
|
if (m[1]) {
|
|
this.hyphenSplit[i] = m[1];
|
|
} else {
|
|
this.hyphenSplit[i] = "";
|
|
}
|
|
this.hyphenSplit = this.hyphenSplit.slice(0, i + 1);
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
return this.v.date;
|
|
};
|
|
|
|
|
|
ItemFactory.prototype.getCourt = function () {
|
|
var s, m;
|
|
// Citelet parsing, step (2)
|
|
s = this.hyphenSplit.pop().replace(/,\s*$/, "").replace(/\u2026\s*$/, "Court");
|
|
m = s.match(/(?:([a-zA-Z]+):\s*)*(.*)/);
|
|
if (m) {
|
|
this.v.court = m[2].replace("_", " ", "g");
|
|
if (m[1]) {
|
|
this.v.extra = "{:jurisdiction: " + m[1] + "}";
|
|
}
|
|
}
|
|
return this.v.court;
|
|
};
|
|
|
|
|
|
ItemFactory.prototype.getVolRepPag = function () {
|
|
var i, m;
|
|
// Citelet parsing, step (3)
|
|
if (this.hyphenSplit.length) {
|
|
this.commaSplit = this.hyphenSplit.slice(-1)[0].split(/\s*,\s+/);
|
|
var gotOne = false;
|
|
for (i = this.commaSplit.length - 1; i > -1; i += -1) {
|
|
m = this.commaSplit[i].match(/^([0-9]+)\s+(.*)\s+(.*)/);
|
|
if (m) {
|
|
var volRepPag = {};
|
|
volRepPag.volume = m[1];
|
|
volRepPag.reporter = m[2];
|
|
volRepPag.pages = m[3].replace(/\s*$/, "");
|
|
this.commaSplit.pop();
|
|
if (!volRepPag.pages.match(/[0-9]$/) && (i > 0 || gotOne)) {
|
|
continue;
|
|
}
|
|
gotOne = true;
|
|
this.vv.volRepPag.push(volRepPag);
|
|
} else {
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
};
|
|
|
|
|
|
ItemFactory.prototype.getTitle = function () {
|
|
// Citelet parsing, step (4) [optional]
|
|
if (this.commaSplit) {
|
|
this.v.title = this.commaSplit.join(", ");
|
|
}
|
|
};
|
|
|
|
|
|
ItemFactory.prototype.getDocketNumber = function (doc, callback) {
|
|
if (!doc) {
|
|
// Needs doc fetch and xpath
|
|
var me = this;
|
|
Zotero.Utilities.processDocuments(this.attachmentLinks[0],
|
|
function(doc) { me.getDocumentNumber(doc, callback) }, function() {});
|
|
return;
|
|
}
|
|
|
|
var nsResolver = doc.createNSResolver(doc.documentElement);
|
|
if (doc) {
|
|
var docNumFrag = doc.evaluate('//center[preceding-sibling::center//h3[@id="gsl_case_name"]]', doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext();
|
|
if (docNumFrag) {
|
|
this.v.docketNumber = docNumFrag.textContent.replace(/^\s*[Nn][Oo](?:.|\s+)\s*/, "").replace(/\.\s*$/, "");
|
|
}
|
|
}
|
|
|
|
if(callback) callback();
|
|
};
|
|
|
|
|
|
ItemFactory.prototype.getAttachments = function (doctype) {
|
|
var i, ilen, attachments;
|
|
attachments = [];
|
|
for (i = 0, ilen = this.attachmentLinks.length; i < ilen; i += 1) {
|
|
attachments.push({title:"Google Scholar Linked " + doctype, type:"text/html",
|
|
url:this.attachmentLinks[i]});
|
|
}
|
|
return attachments;
|
|
};
|
|
|
|
|
|
ItemFactory.prototype.pushAttachments = function (doctype) {
|
|
this.item.attachments = this.getAttachments(doctype);
|
|
};
|
|
|
|
|
|
ItemFactory.prototype.getBibtexData = function (callback) {
|
|
if (!this.bibtexData) {
|
|
if (this.bibtexData !== false) {
|
|
Zotero.Utilities.doGet(this.bibtexLink, function(bibtexData) {
|
|
if (!bibtexData.match(/title={{}}/)) {
|
|
this.bibtexData = bibtexData;
|
|
} else {
|
|
this.bibtexData = false;
|
|
}
|
|
callback(this.bibtexData);
|
|
});
|
|
return;
|
|
}
|
|
}
|
|
callback(this.bibtexData);
|
|
};
|
|
|
|
|
|
ItemFactory.prototype.saveItem = function () {
|
|
var i, ilen, key;
|
|
if (this.v.title) {
|
|
this.repairTitle();
|
|
if (this.vv.volRepPag.length) {
|
|
var completed_items = [];
|
|
for (i = 0, ilen = this.vv.volRepPag.length; i < ilen; i += 1) {
|
|
this.item = new Zotero.Item("case");
|
|
for (key in this.vv.volRepPag[i]) {
|
|
if (this.vv.volRepPag[i][key]) {
|
|
this.item[key] = this.vv.volRepPag[i][key];
|
|
}
|
|
}
|
|
this.saveItemCommonVars();
|
|
if (i === (this.vv.volRepPag.length - 1)) {
|
|
this.pushAttachments("Judgement");
|
|
}
|
|
this.item.itemID = "" + bogusItemID;
|
|
bogusItemID += 1;
|
|
completed_items.push(this.item);
|
|
}
|
|
for (i = 0, ilen = completed_items.length; i < ilen; i += 1) {
|
|
for (j = 0, jlen = completed_items.length; j < jlen; j += 1) {
|
|
if (i === j) {
|
|
continue;
|
|
}
|
|
completed_items[i].seeAlso.push(completed_items[j].itemID);
|
|
}
|
|
completed_items[i].complete();
|
|
}
|
|
} else {
|
|
this.item = new Zotero.Item("case");
|
|
this.saveItemCommonVars();
|
|
this.pushAttachments("Judgement");
|
|
this.item.complete();
|
|
}
|
|
}
|
|
};
|
|
|
|
|
|
ItemFactory.prototype.saveItemCommonVars = function () {
|
|
for (key in this.v) {
|
|
if (this.v[key]) {
|
|
this.item[key] = this.v[key];
|
|
}
|
|
}
|
|
};
|
|
|
|
|
|
/** BEGIN TEST CASES **/
|
|
var testCases = [
|
|
{
|
|
"type": "web",
|
|
"url": "http://scholar.google.com/scholar?q=marbury&hl=en&btnG=Search&as_sdt=1%2C22&as_sdtp=on",
|
|
"items": "multiple"
|
|
},
|
|
{
|
|
"type": "web",
|
|
"url": "http://scholar.google.com/scholar?hl=en&q=kelo&btnG=Search&as_sdt=0%2C22&as_ylo=&as_vis=0",
|
|
"items": "multiple"
|
|
},
|
|
{
|
|
"type": "web",
|
|
"url": "http://scholar.google.com/scholar?hl=en&q=smith&btnG=Search&as_sdt=0%2C22&as_ylo=&as_vis=0",
|
|
"items": "multiple"
|
|
},
|
|
{
|
|
"type": "web",
|
|
"url": "http://scholar.google.com/scholar?hl=en&q=view+of+the+cathedral&btnG=Search&as_sdt=0%2C22&as_ylo=&as_vis=0",
|
|
"items": "multiple"
|
|
},
|
|
{
|
|
"type": "web",
|
|
"url": "http://scholar.google.com/scholar?hl=en&q=clifford&btnG=Search&as_sdt=0%2C22&as_ylo=&as_vis=0",
|
|
"items": "multiple"
|
|
},
|
|
{
|
|
"type": "web",
|
|
"url": "http://scholar.google.com/scholar_case?case=9834052745083343188&q=marbury+v+madison&hl=en&as_sdt=2,5",
|
|
"items": [
|
|
{
|
|
"itemType": "case",
|
|
"creators": [],
|
|
"notes": [],
|
|
"tags": [],
|
|
"seeAlso": [],
|
|
"attachments": [
|
|
{
|
|
"title": "Google Scholar Linked Judgement",
|
|
"type": "text/html",
|
|
"url": false
|
|
}
|
|
],
|
|
"volume": "5",
|
|
"reporter": "US",
|
|
"pages": "137",
|
|
"title": "Marbury v. Madison",
|
|
"court": "Supreme Court",
|
|
"date": "1803",
|
|
"itemID": "1",
|
|
"libraryCatalog": "Google Scholar"
|
|
}
|
|
]
|
|
}
|
|
]
|
|
/** END TEST CASES **/
|