Adding Sylvain's 20feb10 changes.

This commit is contained in:
Matt Burton 2010-03-29 15:01:40 +00:00
parent 7cfa4e8e3a
commit 1caa7ac359

View File

@ -8,7 +8,7 @@
"maxVersion":"", "maxVersion":"",
"priority":100, "priority":100,
"inRepository":true, "inRepository":true,
"lastUpdated":"2009-10-08 17:40:00" "lastUpdated":"2010-02-20 14:40:00"
} }
function detectWeb(doc, url) { function detectWeb(doc, url) {
@ -34,6 +34,10 @@ function detectWeb(doc, url) {
{ {
return "journalArticle"; return "journalArticle";
} }
else if (type.indexOf('audiovisual.gif') > 0)
{
return "film";
}
else if (type.indexOf('book.gif') > 0) else if (type.indexOf('book.gif') > 0)
{ {
return "book"; return "book";
@ -67,7 +71,7 @@ function detectWeb(doc, url) {
} }
} }
function scrape(doc) { function scrape(doc, url) {
var namespace = doc.documentElement.namespaceURI; var namespace = doc.documentElement.namespaceURI;
var nsResolver = namespace ? function(prefix) { var nsResolver = namespace ? function(prefix) {
if (prefix == 'x') return namespace; else return null; if (prefix == 'x') return namespace; else return null;
@ -83,50 +87,25 @@ function scrape(doc) {
newItem.repository = false; // do not save repository newItem.repository = false; // do not save repository
if(Zotero.Utilities.parseContextObject(coins, newItem)) if(Zotero.Utilities.parseContextObject(coins, newItem))
{ {
// ppn is the national identifier, used to make a permalink on the record
var ppn = "";
if (newItem.title) if (newItem.title)
{ {
// We use the same method as in detectWeb to find newItem.itemType = detectWeb(doc, url);
// the real type of document // The number of pages uses COinS field : rft.pages, even if the information
var xpathimage = '/html/body/div[2]/div[4]/span/img'; // concerns the number of pages.
if (elt = doc.evaluate(xpathimage, doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext()) if (newItem.pages != undefined)
{ {
var type = elt.getAttribute('src'); newItem.numPages = newItem.pages;
var ZoteroType = '';
if (type.indexOf('article.gif') > 0) var m = newItem.pages.match(/(\d*) vol\. \((.*) [pf]\./);
if (m)
{ {
zoteroType = 'journalArticle'; newItem.numberOfVolumes = m[1];
newItem.numPages = m[2];
} }
else if (type.indexOf('book.gif') > 0)
{
zoteroType = 'book';
}
else if (type.indexOf('handwriting.gif') > 0)
{
zoteroType = 'manuscript';
}
else if (type.indexOf('sons.gif') > 0)
{
zoteroType = "audioRecording";
}
else if (type.indexOf('sound.gif') > 0)
{
zoteroType = "audioRecording";
}
else if (type.indexOf('thesis.gif') > 0)
{
zoteroType = "thesis";
}
else if (type.indexOf('map.gif') > 0)
{
zoteroType = "map";
}
else
{
zoteroType = "book";
}
newItem.itemType = zoteroType;
} }
// We need to correct some informations where COinS is wrong // We need to correct some informations where COinS is wrong
var rowXpath = '//tr[td[@class="rec_lable"]]'; var rowXpath = '//tr[td[@class="rec_lable"]]';
var tableRows = doc.evaluate(rowXpath, doc, nsResolver, XPathResult.ANY_TYPE, null); var tableRows = doc.evaluate(rowXpath, doc, nsResolver, XPathResult.ANY_TYPE, null);
@ -161,7 +140,7 @@ function scrape(doc) {
{ {
zoteroFunction = 'translator'; zoteroFunction = 'translator';
} }
else if ( (zoteroType == "thesis") && (authorFunction != 'Auteur') ) else if ( (newItem.itemType == "thesis") && (authorFunction != 'Auteur') )
{ {
zoteroFunction = "contributor"; zoteroFunction = "contributor";
} }
@ -170,6 +149,8 @@ function scrape(doc) {
zoteroFunction = 'author'; zoteroFunction = 'author';
} }
// We need to remove the author dates from reference
authorText = authorText.replace(/ \(.{4}\-.{4}\)$/, "")
if (authorFunction == "Université de soutenance") if (authorFunction == "Université de soutenance")
{ {
// If the author function is "université de soutenance" it means that this author has to be in "university" field // If the author function is "université de soutenance" it means that this author has to be in "university" field
@ -250,6 +231,15 @@ function scrape(doc) {
var thesisType = value.split(/ ?:/)[0]; var thesisType = value.split(/ ?:/)[0];
newItem.type = thesisType; newItem.type = thesisType;
} }
else if ( (field == "Numéro\u00A0de\u00A0notice") || (field == "Record\u00A0number") )
{
ppn = value;
}
}
if ( (newItem.url == undefined) && (ppn != "") )
{
newItem.url = 'http://www.sudoc.abes.fr/DB=2.1/SRCH?IKT=12&TRM=' + ppn;
} }
newItem.complete(); newItem.complete();
} }
@ -291,7 +281,6 @@ function doWeb(doc, url) {
var uris = new Array(); var uris = new Array();
for(var i in items) { for(var i in items) {
uris.push(newUrl + links[i]); uris.push(newUrl + links[i]);
Zotero.debug(newUrl + links[i]);
} }
Zotero.Utilities.processDocuments(uris, function(doc) { scrape(doc) }, Zotero.Utilities.processDocuments(uris, function(doc) { scrape(doc) },
function() { Zotero.done(); }, null); function() { Zotero.done(); }, null);
@ -299,7 +288,7 @@ function doWeb(doc, url) {
} }
else if ( (content == "Notice complète") || (content == 'title data') ) else if ( (content == "Notice complète") || (content == 'title data') )
{ {
scrape(doc); scrape(doc, url);
} }
} }
} }