169 lines
5.3 KiB
JavaScript
169 lines
5.3 KiB
JavaScript
{
|
|
"translatorID":"72cb2536-3211-41e0-ae8b-974c0385e085",
|
|
"translatorType":4,
|
|
"label":"ARTFL Encyclopedie",
|
|
"creator":"Sean Takats",
|
|
"target":"/cgi-bin/philologic31/(getobject\\.pl\\?c\\.[0-9]+:[0-9]+\\.encyclopedie|search3t\\?dbname=encyclopedie0507)",
|
|
"minVersion":"1.0.0b4.r1",
|
|
"maxVersion":"",
|
|
"priority":100,
|
|
"inRepository":true,
|
|
"lastUpdated":"2011-01-11 04:31:00"
|
|
}
|
|
|
|
function detectWeb(doc, url) {
|
|
if (url.indexOf("getobject.pl") != -1){
|
|
return "encyclopediaArticle";
|
|
} else {
|
|
return "multiple";
|
|
}
|
|
}
|
|
|
|
function reconcileAuthor(author){
|
|
var authorMap = {
|
|
"Venel":"Venel, Gabriel-François",
|
|
"d'Aumont":"d'Aumont, Arnulphe",
|
|
"de La Chapelle":"de La Chapelle, Jean-Baptiste",
|
|
"Bourgelat":"Bourgelat, Claude",
|
|
"Dumarsais":"Du Marsais, César Chesneau",
|
|
"Mallet":"Mallet, Edme-François",
|
|
"Toussaint":"Toussaint, François-Vincent",
|
|
"Daubenton":"Daubenton, Louis-Jean-Marie",
|
|
"d'Argenville": "d'Argenville, Antoine-Joseph Desallier",
|
|
"Tarin":"Tarin, Pierre",
|
|
"Vandenesse":"de Vandenesse, Urbain",
|
|
"Blondel": "Blondel, Jacques-François",
|
|
"Le Blond":"Le Blond, Guillaume",
|
|
"Rousseau":"Rousseau, Jean-Jacques",
|
|
"Eidous":"Eidous, Marc-Antoine",
|
|
"d'Alembert":"d'Alembert, Jean le Rond",
|
|
"Louis":"Louis, Antoine",
|
|
"Bellin":"Bellin, Jacques-Nicolas",
|
|
"Diderot":"Diderot, Denis",
|
|
"Diderot1":"Diderot, Denis",
|
|
"Diderot2":"Diderot, Denis",
|
|
"de Jaucourt":"de Jaucourt, Chevalier Louis",
|
|
"Jaucourt":"de Jaucourt, Chevalier Louis",
|
|
"d'Holbach":"d'Holbach, Baron"
|
|
/* not yet mapped
|
|
Yvon
|
|
Forbonnais
|
|
Douchet and Beauzée
|
|
Boucher d'Argis
|
|
Lenglet Du Fresnoy
|
|
Cahusac
|
|
Pestré
|
|
Daubenton, le Subdélégué
|
|
Goussier
|
|
de Villiers
|
|
Barthès
|
|
Morellet
|
|
Malouin
|
|
Ménuret de Chambaud
|
|
Landois
|
|
Le Roy
|
|
*/
|
|
}
|
|
if(authorMap[author]) {
|
|
author = authorMap[author];
|
|
}
|
|
// remove ARTFL's trailing 5 for odd contributors (e.g. Turgot5)
|
|
if (author.substr(author.length-1, 1)=="5"){
|
|
author = author.substr(0, author.length-1);
|
|
}
|
|
return author;
|
|
}
|
|
|
|
function scrape (doc){
|
|
var namespace = doc.documentElement.namespaceURI;
|
|
var nsResolver = namespace ? function(prefix) {
|
|
if (prefix == 'x') return namespace; else return null;
|
|
} : null;
|
|
var url = doc.location.href;
|
|
var newItem = new Zotero.Item("encyclopediaArticle");
|
|
var xpath = '/html/body/div[@class="text"]/font';
|
|
var titleElmt = doc.evaluate(xpath, doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext();
|
|
if (titleElmt) {
|
|
var title = titleElmt.textContent;
|
|
} else {
|
|
xpath = '/html/body/div[@class="text"]/b';
|
|
var title = doc.evaluate(xpath, doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext().textContent;
|
|
}
|
|
newItem.title = title;
|
|
newItem.encyclopediaTitle = "Encyclopédie, ou Dictionnaire raisonné des sciences, des arts et des métiers";
|
|
newItem.shortTitle = "Encyclopédie";
|
|
newItem.date = "1751-1772";
|
|
newItem.publisher = "Briasson";
|
|
newItem.place = "Paris";
|
|
newItem.url = url;
|
|
|
|
newItem.attachments.push({title:"ARTFL Snapshot", mimeType:"text/html", url:url, snapshot:true});
|
|
|
|
// get author and tags
|
|
var hostRegexp = new RegExp("^(https?://[^/]+)/");
|
|
var hMatch = hostRegexp.exec(url);
|
|
var host = hMatch[1];
|
|
var getString1 = "/cgi-bin/philologic31/search3t?dbname=encyclopedie0507&word=&dgdivhead=";
|
|
var getString2 = "&dgdivocauthor=&dgdivocplacename=&dgdivocsalutation=&dgdivocclassification=&dgdivocpartofspeech=&dgdivtype=&CONJUNCT=PHRASE&DISTANCE=3&PROXY=or+fewer&OUTPUT=conc&POLESPAN=5&KWSS=1&KWSSPRLIM=500";
|
|
|
|
Zotero.Utilities.HTTP.doGet(host+getString1+title+getString2, function(text){
|
|
var tagRe = new RegExp('>'+title+'</a>[^\[]*\\[([^\\]]*)\]', 'i');
|
|
var m = tagRe.exec(text);
|
|
if(m[1] != "unclassified"){
|
|
var tagstring = m[1].replace("&", "&", "g");
|
|
var tags = tagstring.split(";")
|
|
for(var j in tags) {
|
|
newItem.tags.push(Zotero.Utilities.trimInternal(tags[j]));
|
|
}
|
|
}
|
|
var authorRe = new RegExp('>'+title+'</a>,([^,]*),', "i");
|
|
var m = authorRe.exec(text);
|
|
var author = m[1];
|
|
author = Zotero.Utilities.trimInternal(author);
|
|
// reconcile author
|
|
author = reconcileAuthor(author);
|
|
if (author!="NA"){ // ignore unknown authors
|
|
newItem.creators.push(Zotero.Utilities.cleanAuthor(author, "author", true));
|
|
}
|
|
newItem.creators.push({firstName:"Denis", lastName:"Diderot", creatorType:"editor"});
|
|
newItem.creators.push({firstName:"Jean le Rond", lastName:"d'Alembert", creatorType:"editor"});
|
|
newItem.complete();
|
|
}, function() {Zotero.done();}, null);
|
|
Zotero.wait();
|
|
}
|
|
|
|
function doWeb(doc, url) {
|
|
var namespace = doc.documentElement.namespaceURI;
|
|
var nsResolver = namespace ? function(prefix) {
|
|
if (prefix == 'x') return namespace; else return null;
|
|
} : null;
|
|
|
|
if (url.indexOf("getobject.pl") != -1){
|
|
// single article
|
|
scrape(doc);
|
|
} else {
|
|
//search page
|
|
var items = new Object();
|
|
var xpath = '/html/body/div[@class="text"]/p/a';
|
|
var elmts = doc.evaluate(xpath, doc, nsResolver, XPathResult.ANY_TYPE, null);
|
|
var elmt;
|
|
while (elmt = elmts.iterateNext()){
|
|
var title = elmt.textContent;
|
|
var link = elmt.href;
|
|
if (title && link){
|
|
items[link] = title;
|
|
}
|
|
}
|
|
var items = Zotero.selectItems(items);
|
|
if(!items) {
|
|
return true;
|
|
}
|
|
var urls = new Array();
|
|
for(var i in items) {
|
|
urls.push(i);
|
|
}
|
|
Zotero.Utilities.processDocuments(urls, scrape, function() { Zotero.done(); });
|
|
Zotero.wait();
|
|
}
|
|
|
|
} |