-Fixes some stuff in the SciELO translator.

This commit is contained in:
Michael Berkowitz 2008-05-14 14:09:05 +00:00
parent abbc8f0397
commit c5e3d0c359

View File

@ -22,7 +22,7 @@
-- Set the following timestamp to the most recent scraper update date
REPLACE INTO version VALUES ('repository', STRFTIME('%s', '2008-05-14 14:30:00'));
REPLACE INTO version VALUES ('repository', STRFTIME('%s', '2008-05-14 15:00:00'));
REPLACE INTO translators VALUES ('96b9f483-c44d-5784-cdad-ce21b984fe01', '1.0.0b4.r1', '', '2008-03-21 20:00:00', '1', '100', '4', 'Amazon.com', 'Sean Takats and Michael Berkowitz', '^https?://(?:www\.)?amazon',
'function detectWeb(doc, url) {
@ -4987,24 +4987,36 @@ REPLACE INTO translators VALUES ('636c8ea6-2af7-4488-8ccd-ea280e4a7a98', '1.0.0b
Zotero.wait();
}');
REPLACE INTO translators VALUES ('3eabecf9-663a-4774-a3e6-0790d2732eed', '1.0.0b4.r5', '', '2008-04-28 17:50:00', '0', '100', '4', 'SciELO', 'Michael Berkowitz', 'http://www.scielo.(org|br)/',
REPLACE INTO translators VALUES ('3eabecf9-663a-4774-a3e6-0790d2732eed', '1.0.0b4.r5', '', '2008-05-14 15:00:00', '1', '100', '4', 'SciELO', 'Michael Berkowitz', 'http://(www.)?scielo.(org|br)/',
'function detectWeb(doc, url) {
if (url.indexOf("wxis.exe") != -1) {
if (doc.evaluate(''//*[@class="isoref"]'', doc, null, XPathResult.ANY_TYPE, null).iterateNext()) {
return "multiple";
}
} else if (url.indexOf("issuetoc") != -1) {
return "multiple"
} else if (url.indexOf("&pid=") != -1) {
return "journalArticle";
} else {
Zotero.debug("ok");
}
}',
'function doWeb(doc, url) {
'function makeURL(host, str) {
return ''http://'' + host + ''/scieloOrg/php/articleXML.php?pid='' + str.match(/pid=([^&]+)/)[1] + ''&lang=en'';
}
function doWeb(doc, url) {
var host = doc.location.host;
var arts = new Array();
if (detectWeb(doc, url) == "multiple") {
var items = new Object();
var titlepath = ''//font[@class="isoref"]/font[@class="negrito"]'';
var linkpath = ''//font[@class="isoref"]/a[@class="isoref"]'';
if (url.indexOf(".exe") != -1) {
var titlepath = ''//font[@class="isoref"]/font[@class="negrito"]'';
var linkpath = ''//font[@class="isoref"]/a[@class="isoref"]'';
} else {
var titlepath = ''//font[@class="normal"]/b/b[1]'';
var linkpath = ''//tr/td/div/a[1]'';
}
var titles = doc.evaluate(titlepath, doc, null, XPathResult.ANY_TYPE, null);
var links = doc.evaluate(linkpath, doc, null, XPathResult.ANY_TYPE, null);
var next_title;
@ -5014,69 +5026,59 @@ REPLACE INTO translators VALUES ('3eabecf9-663a-4774-a3e6-0790d2732eed', '1.0.0b
}
items = Zotero.selectItems(items);
for (var i in items) {
arts.push(i);
arts.push(makeURL(host, i));
}
} else {
arts = [url];
arts = [makeURL(host, url)];
}
for each (var url in arts) {
Zotero.debug(url);
Zotero.Utilities.HTTP.doGet(url, function(text) {
var link = text.match(/\"([^"]+articleXML[^"]+)\"/)[1];
Zotero.debug(link);
Zotero.Utilities.HTTP.doGet(link, function(text) {
var item = new Zotero.Item("journalArticle");
text = text.replace(/<!DOCTYPE[^>]*>/, "").replace(/<\?xml[^>]*\?>/, "").replace(/<self-uri.*\/self\-uri>/g, "");
var journal = text.split("<journal-meta>")[1].split("</journal-meta>")[0];
journal = "<journal>" + journal + "</journal>";
journal = journal.replace(/\-([a-z])/g, "$1");
var xml2 = new XML(journal);
var art = text.split("<article-meta>")[1].split("</article-meta>")[0];
art = "<article>" + art + "</article>";
art = art.replace(/\-([a-z])/g, "$1");
var xml3 = new XML(art);
item.publicationTitle = xml2..journaltitle.text().toString();
item.journalAbbreviation = xml2..abbrevjournaltitle.text().toString();
item.ISSN = xml2..issn.text().toString();
item.publisher = xml2..publisher..publishername.text().toString();
item.title = xml3..titlegroup..articletitle.text().toString();
for (var i = 0 ; i < xml3..contribgroup..contrib.length() ; i++) {
var name = xml3..contribgroup..contrib[i]..name;
item.creators.push({firstName:name..givennames.text().toString(), lastName:name..surname.text().toString(), creatorType:"author"});
}
var date = xml3..pubdate[0];
var day = date..day.text().toString();
var month = date..month.text().toString();
var year = date..year.text().toString();
date = year;
if (month != "00") {
date = month + "/" + date;
}
if (day != "00") {
date = day + "/" + date;
}
item.date = date;
item.volume = xml3..volume.text().toString();
item.pages = xml3..fpage.text().toString() + "-" + xml3..lpage.text().toString();
for (var i = 0 ; i < xml3..kwdgroup..kwd.length() ; i++) {
item.tags.push(xml3..kwdgroup..kwd[i].text().toString());
}
item.attachments = [
{url:url, title:"SciELO Snapshot", mimeType:"text/html"}
];
Zotero.Utilities.HTTP.doGet(arts, function(text) {
var item = new Zotero.Item("journalArticle");
text = text.replace(/<!DOCTYPE[^>]*>/, "").replace(/<\?xml[^>]*\?>/, "").replace(/<self-uri.*\/self\-uri>/g, "");
var journal = text.split("<journal-meta>")[1].split("</journal-meta>")[0];
journal = "<journal>" + journal + "</journal>";
journal = journal.replace(/\-([a-z])/g, "$1");
var xml2 = new XML(journal);
var art = text.split("<article-meta>")[1].split("</article-meta>")[0];
art = "<article>" + art + "</article>";
art = art.replace(/\-([a-z])/g, "$1");
var xml3 = new XML(art);
item.complete();
});
});
}
item.publicationTitle = xml2..journaltitle.text().toString();
item.journalAbbreviation = xml2..abbrevjournaltitle.text().toString();
item.ISSN = xml2..issn.text().toString();
item.publisher = xml2..publisher..publishername.text().toString();
item.title = xml3..titlegroup..articletitle.text().toString();
for (var i = 0 ; i < xml3..contribgroup..contrib.length() ; i++) {
var name = xml3..contribgroup..contrib[i]..name;
item.creators.push({firstName:name..givennames.text().toString(), lastName:name..surname.text().toString(), creatorType:"author"});
}
var date = xml3..pubdate[0];
var day = date..day.text().toString();
var month = date..month.text().toString();
var year = date..year.text().toString();
date = year;
if (month != "00") {
date = month + "/" + date;
}
if (day != "00") {
date = day + "/" + date;
}
item.date = date;
item.volume = xml3..volume.text().toString();
item.pages = xml3..fpage.text().toString() + "-" + xml3..lpage.text().toString();
for (var i = 0 ; i < xml3..kwdgroup..kwd.length() ; i++) {
item.tags.push(xml3..kwdgroup..kwd[i].text().toString());
}
item.attachments = [
{url:url, title:"SciELO Snapshot", mimeType:"text/html"}
];
item.complete();
});
}');
REPLACE INTO translators VALUES ('0a84a653-79ea-4c6a-8a68-da933e3b504a', '1.0.0b4.r5', '', '2008-03-28 16:30:00', '0', '100', '4', 'Alexander Street Press', 'John West and Michael Berkowitz', 'http://(?:www\.)alexanderstreet',