- fixed XML issues with PubMed scraper (although probably not the issue that everyone seems to be experiencing)

- unfinished support for new item types
This commit is contained in:
Simon Kornblith 2006-11-02 00:33:50 +00:00
parent 9172622c72
commit a1269146b7

View File

@ -1,4 +1,4 @@
-- 102 -- 103
-- ***** BEGIN LICENSE BLOCK ***** -- ***** BEGIN LICENSE BLOCK *****
-- --
@ -22,7 +22,7 @@
-- Set the following timestamp to the most recent scraper update date -- Set the following timestamp to the most recent scraper update date
REPLACE INTO "version" VALUES ('repository', STRFTIME('%s', '2006-10-25 18:40:43')); REPLACE INTO "version" VALUES ('repository', STRFTIME('%s', '2006-11-01 16:30:00'));
REPLACE INTO "translators" VALUES ('96b9f483-c44d-5784-cdad-ce21b984fe01', '2006-10-02 17:00:00', 1, 100, 4, 'Amazon.com', 'Simon Kornblith', '^http://www\.amazon\.com/', REPLACE INTO "translators" VALUES ('96b9f483-c44d-5784-cdad-ce21b984fe01', '2006-10-02 17:00:00', 1, 100, 4, 'Amazon.com', 'Simon Kornblith', '^http://www\.amazon\.com/',
'function detectWeb(doc, url) { 'function detectWeb(doc, url) {
@ -2580,7 +2580,7 @@ REPLACE INTO "translators" VALUES ('c54d1932-73ce-dfd4-a943-109380e06574', '2006
} }
}'); }');
REPLACE INTO "translators" VALUES ('fcf41bed-0cbc-3704-85c7-8062a0068a7a', '2006-10-23 00:23:00', 1, 100, 12, 'PubMed', 'Simon Kornblith', '^http://www\.ncbi\.nlm\.nih\.gov/entrez/query\.fcgi\?.*db=PubMed', REPLACE INTO "translators" VALUES ('fcf41bed-0cbc-3704-85c7-8062a0068a7a', '2006-11-01 16:30:00', 1, 100, 12, 'PubMed', 'Simon Kornblith', '^http://www\.ncbi\.nlm\.nih\.gov/entrez/query\.fcgi\?.*db=PubMed',
'function detectWeb(doc, url) { 'function detectWeb(doc, url) {
var namespace = doc.documentElement.namespaceURI; var namespace = doc.documentElement.namespaceURI;
var nsResolver = namespace ? function(prefix) { var nsResolver = namespace ? function(prefix) {
@ -2653,7 +2653,7 @@ function detectSearch(item) {
} }
if(article.Journal.length()) { if(article.Journal.length()) {
var issn = article.Journal.ISSN.text(); var issn = article.Journal.ISSN.text().toString();
if(issn) { if(issn) {
newItem.ISSN = issn.replace(/[^0-9]/g, ""); newItem.ISSN = issn.replace(/[^0-9]/g, "");
} }
@ -2666,15 +2666,15 @@ function detectSearch(item) {
} }
if(article.Journal.JournalIssue.length()) { if(article.Journal.JournalIssue.length()) {
newItem.volume = article.Journal.JournalIssue.Volume.text(); newItem.volume = article.Journal.JournalIssue.Volume.text().toString();
newItem.issue = article.Journal.JournalIssue.Issue.text(); newItem.issue = article.Journal.JournalIssue.Issue.text().toString();
if(article.Journal.JournalIssue.PubDate.length()) { // try to get the date if(article.Journal.JournalIssue.PubDate.length()) { // try to get the date
if(article.Journal.JournalIssue.PubDate.Day.text().toString() != "") { if(article.Journal.JournalIssue.PubDate.Day.text().toString() != "") {
newItem.date = article.Journal.JournalIssue.PubDate.Month.text()+" "+article.Journal.JournalIssue.PubDate.Day.text()+", "+article.Journal.JournalIssue.PubDate.Year.text(); newItem.date = article.Journal.JournalIssue.PubDate.Month.text().toString()+" "+article.Journal.JournalIssue.PubDate.Day.text().toString()+", "+article.Journal.JournalIssue.PubDate.Year.text().toString();
} else if(article.Journal.JournalIssue.PubDate.Month.text().toString() != "") { } else if(article.Journal.JournalIssue.PubDate.Month.text().toString() != "") {
newItem.date = article.Journal.JournalIssue.PubDate.Month.text()+" "+article.Journal.JournalIssue.PubDate.Year.text(); newItem.date = article.Journal.JournalIssue.PubDate.Month.text().toString()+" "+article.Journal.JournalIssue.PubDate.Year.text().toString();
} else if(article.Journal.JournalIssue.PubDate.Year.text().toString() != "") { } else if(article.Journal.JournalIssue.PubDate.Year.text().toString() != "") {
newItem.date = article.Journal.JournalIssue.PubDate.Year.text(); newItem.date = article.Journal.JournalIssue.PubDate.Year.text().toString();
} }
} }
} }
@ -4836,7 +4836,20 @@ REPLACE INTO "translators" VALUES ('14763d24-8ba0-45df-8f52-b8d1108e7ac9', '2006
Zotero.configure("dataMode", "rdf"); Zotero.configure("dataMode", "rdf");
Zotero.addOption("exportNotes", true); Zotero.addOption("exportNotes", true);
Zotero.addOption("exportFileData", false);', Zotero.addOption("exportFileData", false);',
'function generateSeeAlso(resource, seeAlso) { 'var rdf = "http://www.w3.org/1999/02/22-rdf-syntax-ns#";
var n = {
bib:"http://purl.org/net/biblio#",
dc:"http://purl.org/dc/elements/1.1/",
dcterms:"http://purl.org/dc/terms/",
prism:"http://prismstandard.org/namespaces/1.2/basic/",
foaf:"http://xmlns.com/foaf/0.1/",
vcard:"http://nwalsh.com/rdf/vCard#",
link:"http://purl.org/rss/1.0/modules/link/",
fs:"http://www.zotero.org/namespaces/export#"
};
function generateSeeAlso(resource, seeAlso) {
for(var i in seeAlso) { for(var i in seeAlso) {
if(itemResources[seeAlso[i]]) { if(itemResources[seeAlso[i]]) {
Zotero.RDF.addStatement(resource, n.dc+"relation", itemResources[seeAlso[i]], false); Zotero.RDF.addStatement(resource, n.dc+"relation", itemResources[seeAlso[i]], false);
@ -4927,11 +4940,48 @@ function generateItem(item, zoteroType, resource) {
if(item.charset) { if(item.charset) {
Zotero.RDF.addStatement(resource, n.link+"charset", item.charset, true); Zotero.RDF.addStatement(resource, n.link+"charset", item.charset, true);
} }
} else if(zoteroType == "report") {
type = n.bib+"Report";
} else if(zoteroType == "bill") {
type = n.bib+"Legislation";
} else if(zoteroType == "case") {
type = n.bib+"Legislation"; // ??
} else if(zoteroType == "hearing") {
type = n.bib+"Report";
} else if(zoteroType == "patent") {
type = n.bib+"Patent";
} else if(zoteroType == "statute") {
type = n.bib+"Legislation";
} else if(zoteroType == "email") {
type = n.bib+"Letter";
} else if(zoteroType == "map") {
type = n.bib+"Image";
} else if(zoteroType == "blogPost") {
type = n.bib+"Document";
} else if(zoteroType == "instantMessage") {
type = n.bib+"Letter";
} else if(zoteroType == "forumPost") {
type = n.bib+"Document";
} else if(zoteroType == "audioRecording") {
type = n.bib+"Recording";
} else if(zoteroType == "presentation") {
type = n.bib+"ConferenceProceedings";
} else if(zoteroType == "videoRecording") {
type = n.bib+"Recording";
} else if(zoteroType == "tvBroadcast") {
type = n.bib+"Recording";
} else if(zoteroType == "radioBroadcast") {
type = n.bib+"Recording";
} else if(zoteroType == "podcast") {
type = n.bib+"Recording";
} else if(zoteroType == "computerProgram") {
type = n.bib+"Data";
} }
if(type) { if(type) {
Zotero.RDF.addStatement(resource, rdf+"type", type, false); Zotero.RDF.addStatement(resource, rdf+"type", type, false);
} }
Zotero.RDF.addStatement(resource, n.fs+"type", zoteroType, true);
// authors/editors/contributors // authors/editors/contributors
var creatorContainers = new Object(); var creatorContainers = new Object();
@ -5132,10 +5182,15 @@ function generateItem(item, zoteroType, resource) {
} }
// type (not itemType) // type (not itemType)
if(item.type) { var typeTypes = ["reportType", "videoRecordingType", "letterType",
Zotero.RDF.addStatement(resource, n.dc+"type", item.type, true); "manuscriptType", "mapType", "thesisType", "websiteType",
} else if(item.thesisType) { "audioRecordingType", "presentationType", "postType",
Zotero.RDF.addStatement(resource, n.dc+"type", item.thesisType, true); "audioFileType"];
for each(var typeType in typeTypes) {
if(item[typeType]) {
Zotero.RDF.addStatement(resource, n.dc+"type", item[typeType], true);
break;
}
} }
// THIS IS NOT YET IN THE BIBLIO NAMESPACE, BUT BRUCE D''ARCUS HAS SAID // THIS IS NOT YET IN THE BIBLIO NAMESPACE, BUT BRUCE D''ARCUS HAS SAID
@ -5197,19 +5252,6 @@ function generateItem(item, zoteroType, resource) {
} }
function doExport() { function doExport() {
rdf = "http://www.w3.org/1999/02/22-rdf-syntax-ns#";
n = {
bib:"http://purl.org/net/biblio#",
dc:"http://purl.org/dc/elements/1.1/",
dcterms:"http://purl.org/dc/terms/",
prism:"http://prismstandard.org/namespaces/1.2/basic/",
foaf:"http://xmlns.com/foaf/0.1/",
vcard:"http://nwalsh.com/rdf/vCard#",
link:"http://purl.org/rss/1.0/modules/link/",
fs:"http://www.zotero.org/namespaces/export#"
};
// add namespaces // add namespaces
for(var i in n) { for(var i in n) {
Zotero.RDF.addNamespace(i, n[i]); Zotero.RDF.addNamespace(i, n[i]);
@ -5334,7 +5376,10 @@ REPLACE INTO "translators" VALUES ('6e372642-ed9d-4934-b5d1-c11ac758ebb7', '2006
Zotero.RDF.addStatement(resource, dc+"publisher", item.publisher, true); Zotero.RDF.addStatement(resource, dc+"publisher", item.publisher, true);
} else if(item.distributor) { } else if(item.distributor) {
Zotero.RDF.addStatement(resource, dc+"publisher", item.distributor, true); Zotero.RDF.addStatement(resource, dc+"publisher", item.distributor, true);
} else if(item.institution) {
Zotero.RDF.addStatement(resource, dc+"publisher", item.distributor, true);
} }
// date/year // date/year
if(item.date) { if(item.date) {
Zotero.RDF.addStatement(resource, dc+"date", item.date, true); Zotero.RDF.addStatement(resource, dc+"date", item.date, true);
@ -5380,7 +5425,20 @@ function detectImport() {
return true; return true;
} }
}', }',
'// gets the first result set for a property that can be encoded in multiple 'var rdf = "http://www.w3.org/1999/02/22-rdf-syntax-ns#";
var n = {
bib:"http://purl.org/net/biblio#",
dc:"http://purl.org/dc/elements/1.1/",
dcterms:"http://purl.org/dc/terms/",
prism:"http://prismstandard.org/namespaces/1.2/basic/",
foaf:"http://xmlns.com/foaf/0.1/",
vcard:"http://nwalsh.com/rdf/vCard#",
link:"http://purl.org/rss/1.0/modules/link/",
fs:"http://www.zotero.org/namespaces/export#"
};
// gets the first result set for a property that can be encoded in multiple
// ontologies // ontologies
function getFirstResults(node, properties, onlyOneString) { function getFirstResults(node, properties, onlyOneString) {
for(var i=0; i<properties.length; i++) { for(var i=0; i<properties.length; i++) {
@ -5587,9 +5645,25 @@ function importItem(newItem, node, type) {
} }
newItem.charset = getFirstResults(node, [n.link+"charset"], true); newItem.charset = getFirstResults(node, [n.link+"charset"], true);
newItem.mimeType = getFirstResults(node, [n.link+"type"], true); newItem.mimeType = getFirstResults(node, [n.link+"type"], true);
} else if(type == "report") {
newItem.itemType = "report";
} else if(type == "legislation") {
newItem.itemType = "statute";
} else if(type == "patent") {
newItem.itemType = "patent";
} else if(type == "image") {
newItem.itemType = "artwork";
} else if(type == "recording") {
newItem.itemType = "audioRecording";
} }
} }
// check to see if we recognize the type in the fs or dc namespaces
var zoteroType = getFirstResults(node, [n.fs+"type", n.dc+"type"], true);
if(Zotero.Utilities.itemTypeExists(zoteroType)) {
newItem.itemType = zoteroType;
}
// title // title
newItem.title = getFirstResults(node, [n.dc+"title"], true); newItem.title = getFirstResults(node, [n.dc+"title"], true);
if(!newItem.itemType && !newItem.title) { // require the title if(!newItem.itemType && !newItem.title) { // require the title
@ -5797,19 +5871,6 @@ function importItem(newItem, node, type) {
} }
function doImport() { function doImport() {
rdf = "http://www.w3.org/1999/02/22-rdf-syntax-ns#";
n = {
bib:"http://purl.org/net/biblio#",
dc:"http://purl.org/dc/elements/1.1/",
dcterms:"http://purl.org/dc/terms/",
prism:"http://prismstandard.org/namespaces/1.2/basic/",
foaf:"http://xmlns.com/foaf/0.1/",
vcard:"http://nwalsh.com/rdf/vCard#",
link:"http://purl.org/rss/1.0/modules/link/",
fs:"http://www.zotero.org/namespaces/export#"
};
callNumberTypes = [ callNumberTypes = [
n.dcterms+"LCC", n.dcterms+"DDC", n.dcterms+"UDC" n.dcterms+"LCC", n.dcterms+"DDC", n.dcterms+"UDC"
]; ];
@ -7657,8 +7718,9 @@ REPLACE INTO "csl" VALUES('http://purl.org/net/xbiblio/csl/styles/mla.csl', '200
<et-al min-authors="6" use-first="1" position="subsequent"/> <et-al min-authors="6" use-first="1" position="subsequent"/>
<layout> <layout>
<item> <item>
<author form="short"/> <author form="short">
<title form="short" when-multiple-author-items="true" prefix="" suffix=""/> <name and="text" sort-separator=", " delimiter=", " delimiter-precedes-last="always"/>
</author>
<locator prefix=" "/> <locator prefix=" "/>
</item> </item>
</layout> </layout>