From 313bbd246d9fb4d54567c294bd25623b9765a227 Mon Sep 17 00:00:00 2001 From: Matt Burton Date: Mon, 30 Mar 2009 01:28:23 +0000 Subject: [PATCH] Adding translators from dev-list --- translators/AllAfrica.js | 70 +++++++++++++++++++++++ translators/jmlr.js | 116 +++++++++++++++++++++++++++++++++++++++ translators/nips.js | 84 ++++++++++++++++++++++++++++ 3 files changed, 270 insertions(+) create mode 100644 translators/AllAfrica.js create mode 100644 translators/jmlr.js create mode 100644 translators/nips.js diff --git a/translators/AllAfrica.js b/translators/AllAfrica.js new file mode 100644 index 000000000..822328db6 --- /dev/null +++ b/translators/AllAfrica.js @@ -0,0 +1,70 @@ +{ + "translatorID":"34B1E0EA-FD02-4069-BAE4-ED4D98674A5E", + "translatorType":4, + "label":"allAfrica.com", + "creator":"Matt Bachtell", + "target":"^http://allafrica\\.com/stories/*", + "minVersion":"1.0.0b4.r5", + "maxVersion":"", + "priority":100, + "inRepository":false, + "lastUpdated":"2009-03-29 12:34:05" +} + + +function detectWeb (doc, url) { + + return "newspaperArticle"; + +} + +function doWeb (doc, url){ + scrape(doc,url); +} + +function scrape(doc, url) { + var title = doc.evaluate("/html/body/div[3]/div/h1[@class='headline']", doc, null, XPathResult.ANY_TYPE, null).iterateNext().textContent; + var date = doc.evaluate("/html/body/div[3]/div/p[@class='date']", doc, null, XPathResult.ANY_TYPE, null).iterateNext().textContent; + +// zotero entry creation code + var newItem = new Zotero.Item('newspaperArticle'); + newItem.title = title; + newItem.date = date; + newItem.url = url; + + //AUTHORS + try{ + var authors = doc.evaluate("/html/body/div[3]/div/p[@class='reporter']", doc, null, XPathResult.ANY_TYPE,null).iterateNext().textContent; + if (authors.match(/ &| And/)){ + var aus = authors.split(" And"); + for (var i=0; i < aus.length ; i++){ + newItem.creators.push(Zotero.Utilities.cleanAuthor(aus[i], "author")); + } + } + else if(authors.match(", ")){ + var aus = authors.split(/[,| And| & ]/); + for (var i=0; i < aus.length; i++){ + newItem.creators.push(Zotero.Utilities.cleanAuthor(aus[i], "author")); + } + } + else{ + var author = authors; + newItem.creators.push(Zotero.Utilities.cleanAuthor(author, "author")); + } + } + catch(e){ + // DO NOTHING + } + + //SOURCE + try{ + var newspaper_source = doc.evaluate("/html/body/div[3]/div/p/a/img/@alt", doc, null, XPathResult.ANY_TYPE, null).iterateNext().textContent; + newItem.publicationTitle = newspaper_source; + } + catch(e){ + var newspaper_source = doc.evaluate("/html/body/div[3]/div/p", doc, null, XPathResult.ANY_TYPE, null).iterateNext().textContent; + newItem.publicationTitle = newspaper_source; + } + newItem.complete(); + +} // end scrape \ No newline at end of file diff --git a/translators/jmlr.js b/translators/jmlr.js new file mode 100644 index 000000000..c0c2045da --- /dev/null +++ b/translators/jmlr.js @@ -0,0 +1,116 @@ +{ + "translatorID":"80bc4fd3-747c-4dc2-86e9-da7b251e1407", + "translatorType":4, + "label":"Journal of Machine Learning Research", + "creator":"Fei Qi", + "target":"^http://jmlr\\.csail\\.mit\\.edu/papers", + "minVersion":"1.0.0b4.r5", + "maxVersion":"", + "priority":100, + "inRepository":false, + "lastUpdated":"2009-03-21 12:34:05" +} + +function detectWeb(doc, url) { + var contRe = /(v\d+|topic|special)/; + var m = contRe.exec( url ); + if (m) { + if( doc.title.match( "JMLR" ) ) + return "multiple"; + else + return "journalArticle"; + } + return false; +} + +function scrape( doc, url ) { + var n = doc.documentElement.namespaceURI; + var ns = n ? function(prefix) {} : null; + + var item = new Zotero.Item( "journalArticle" ); + item.url = doc.location.href; + item.publicationTitle = "Journal of Machine Learning Research"; + + // Zotero.debug( 'retrieving title' ); + var title = doc.evaluate( '//div[@id="content"]/h2', doc, ns, + XPathResult.ANY_TYPE, null ).iterateNext(); + if( title ){ + var titlecontent = title.textContent.replace( /^\s+/, '' ); + item.title = titlecontent.replace( /\s+$/, '' ); + } + + var refline = doc.evaluate( '//div[@id="content"]/p', doc, ns, + XPathResult.ANY_TYPE, null ).iterateNext(); + if( refline ) { + var info = refline.textContent.split( ';' ); + var authors = info[0].split( ',' ); + for ( var j = 0; j < authors.length; j++ ){ + item.creators.push( Zotero.Utilities.cleanAuthor( authors[j], "author" ) ); + } + // Zotero.debug( 'retrieving publication info' ); + var volissRe = /\s*(\d+)\(\s*(\w+)\s*\):\s*(\d+\s*--\s*\d+),\s*(\d+)./; + var voliss = info[1].match( volissRe ); + item.volume = voliss[1]; + item.date = voliss[2] + ', ' + voliss[4]; + item.pages = voliss[3]; + } + + var text = doc.evaluate( '//div[@id="content"]', doc, ns, + XPathResult.ANY_TYPE, null ).iterateNext(); + // Zotero.debug( doc.textContent ); + var full = text.textContent.split( 'Abstract' ); + var absatt = full[1].split( '[abs]' ); + var abs =absatt[0].replace( /^\s+/, '' ); + item.abstractNote = abs.replace( /\s+$/, '' ); + //Zotero.debug( item.abstractNote ); + + var atts = doc.evaluate( '//div[@id="content"]//a', doc, ns, + XPathResult.ANY_TYPE, null ); + var att = atts.iterateNext(); + while( att ){ + // Zotero.debug( att.textContent + ' VS ' + att.href ); + if( 0 <= att.textContent.search( 'pdf' ) ) { + item.attachments = [ {url:att.href, + title:item.title, + mimeType:"application/pdf"} ]; + break; + } + att = atts.iterateNext(); + } + item.complete(); +} + +function doWeb( doc, url ) { + var arts = new Array(); + if (detectWeb(doc, url) == "multiple") { + var n = doc.documentElement.namespaceURI; + var ns = n ? function(prefix) {} : null; + // Search page + var items = new Object(); + var titles = doc.evaluate( '//div[@id="content"]//dt', doc, ns, + XPathResult.ANY_TYPE, null ); + var urls = doc.evaluate( '//div[@id="content"]//dd/a', doc, ns, + XPathResult.ANY_TYPE, null ); + if( titles && urls ) { + var title = titles.iterateNext(); + var url = urls.iterateNext(); + while( title ) { + while( 0 > url.textContent.search( 'abs' ) ) + url = urls.iterateNext(); + // Zotero.debug( title.textContent + ' AT ' + url.href ); + items[url.href] = title.textContent; + title = titles.iterateNext(); + url = urls.iterateNext(); + } + } + items = Zotero.selectItems(items); + for (var item in items) { + arts.push(item); + } + } else { + arts.push(url); + } + + Zotero.Utilities.processDocuments( arts, scrape, function() {Zotero.done();}); + Zotero.wait(); +} diff --git a/translators/nips.js b/translators/nips.js new file mode 100644 index 000000000..30827a6c7 --- /dev/null +++ b/translators/nips.js @@ -0,0 +1,84 @@ +{ + "translatorID":"c816f8ad-4c73-4f6d-914e-a6e7212746cf", + "translatorType":4, + "label":"Neural Information Processing Systems", + "creator":"Fei Qi", + "target":"http://books\\.nips\\.cc/nips\\d+\\.html", + "minVersion":"1.0.0b4.r5", + "maxVersion":"", + "priority":100, + "inRepository":false, + "lastUpdated":"2009-03-21 11:23:12" +} + +function detectWeb(doc, url) { + return "multiple"; +} + +function grabCitation( paper ) { + // Zotero.debug( paper.title ); + // Zotero.debug( paper.pdf ); + // Zotero.debug( paper.bib ); + Zotero.Utilities.HTTP.doGet( paper.bib, function( text ) { + var translator = Zotero.loadTranslator("import"); + translator.setTranslator("9cb70025-a888-4a29-a210-93ec52da40d4"); + // Zotero.debug( text ); + translator.setString( text ); + translator.setHandler( "itemDone", function( obj, item ) { + item.attachments = [{url:paper.pdf, title:paper.title, mimeType:"application/pdf"}]; + item.complete(); + } ); + translator.translate(); + }, function() {Zotero.done();}, null); +} + +function doWeb( doc, url ) { + var n = doc.documentElement.namespaceURI; + var ns = n ? function(prefix) {} : null; + // if( doc.title.match( "Search" ) ){ + // var titleRe = '//i'; + // var urlRe = '//a'; + //} else { + var titleRe = '//table//td/b'; + var urlRe = '//table//td/a'; + //} + if (detectWeb(doc, url) == "multiple") { + // Retrive items + var items = new Object(); + var arts = new Array(); + var titles = doc.evaluate( titleRe, doc, ns, XPathResult.ANY_TYPE, null); + var urls = doc.evaluate( urlRe, doc, ns, XPathResult.ANY_TYPE, null); + if( titles ) { + var title = titles.iterateNext(); + var url = urls.iterateNext(); + var idx = 0; + while( title && urls ) { + var art = new Object; + // Zotero.debug( title.textContent ); + items[idx] = title.textContent; + art.title = items[idx]; + var urlnum = 0; + while( urlnum < 2 && url ) { + if( 0 <= url.textContent.search( 'pdf' ) ) { + art.pdf = url.href; + urlnum++; + } + if( 0 <= url.textContent.search( 'bib' ) ) { + art.bib = url.href; + urlnum++; + } + url = urls.iterateNext(); + } + arts.push( art ); + idx++; + title = titles.iterateNext(); + url = urls.iterateNext(); + } + } + items = Zotero.selectItems( items ); + for (var item in items) { + grabCitation( arts[item] ); + } + } + Zotero.wait(); +}