diff --git a/chrome/chromeFiles/content/scholar/xpcom/utilities.js b/chrome/chromeFiles/content/scholar/xpcom/utilities.js index 2ae3b0b71..fcee7444c 100644 --- a/chrome/chromeFiles/content/scholar/xpcom/utilities.js +++ b/chrome/chromeFiles/content/scholar/xpcom/utilities.js @@ -180,6 +180,17 @@ Scholar.Utilities.prototype.lpad = function(string, pad, length) { return string; } +/* + * returns true if an item type exists, false if it does not + */ +Scholar.Utilities.prototype.itemTypeExists = function(type) { + if(Scholar.ItemTypes.getID(type)) { + return true; + } else { + return false; + } +} + /* * END SCHOLAR FOR FIREFOX EXTENSIONS */ diff --git a/scrapers.sql b/scrapers.sql index b59aa0a32..f6793fd80 100644 --- a/scrapers.sql +++ b/scrapers.sql @@ -2424,73 +2424,22 @@ REPLACE INTO "translators" VALUES ('3e684d82-73a3-9a34-095f-19b112d88bbf', '2006 Scholar.wait(); }'); -REPLACE INTO "translators" VALUES ('0e2235e7-babf-413c-9acf-f27cce5f059c', '2006-07-05 23:40:00', 2, 'MODS (XML)', 'Simon Kornblith', 'xml', +REPLACE INTO "translators" VALUES ('0e2235e7-babf-413c-9acf-f27cce5f059c', '2006-07-05 23:40:00', 3, 'MODS (XML)', 'Simon Kornblith', 'xml', 'Scholar.addOption("exportNotes", true); Scholar.addOption("exportFileData", true);', 'var partialItemTypes = ["bookSection", "journalArticle", "magazineArticle", "newspaperArticle"]; -var rdf = new Namespace("rdf", "http://www.w3.org/1999/02/22-rdf-syntax-ns#"); -var rdfs = new Namespace("rdfs", "http://www.w3.org/2000/01/rdf-schema#"); - -/* - * handles the generation of RDF describing a single collection and its child - * collections - */ -function generateCollection(collection, rdfDoc) { - var description = ; - // specify collection ID, namespaced - description.@rdf::ID = "collection:"+collection.id; - // collection type is an RDF Bag. honestly, i''m not sure if this is the - // correct way of doing it, but it''s how the Mozilla Foundation did it. then - // again, the Mozilla Foundation also uses invalid URN namespaces, so who - // knows. - description.rdf::type.@resource = "http://www.w3.org/1999/02/22-rdf-syntax-ns#Bag"; - description.rdfs::label = collection.name; - - for(var i in collection.children) { - var child = collection.children[i]; - // add child list items - var childID = child.type+":"+child.id; - description.rdf::li += ; - - if(child.type == "collection") { - // do recursive processing of collections - generateCollection(child, rdfDoc); - } - } - rdfDoc.rdf::description += description; -} - -/* - * handles the generation of RDF describing a see also item - */ -function generateSeeAlso(id, seeAlso, rdfDoc) { - var description = ; - description.@rdf::ID = "item:"+id; - for(var i in seeAlso) { - var seeID = "item:"+seeAlso[i]; - description += ; - } - rdfDoc.rdf::description += description; -} function doExport() { - //var rdfDoc = ; - var modsCollection = ; + var modsCollection = ; var item; while(item = Scholar.nextItem()) { - var isPartialItem = false; - if(Scholar.Utilities.inArray(item.itemType, partialItemTypes)) { - isPartialItem = true; - } + var isPartialItem = Scholar.Utilities.inArray(item.itemType, partialItemTypes); var mods = ; /** CORE FIELDS **/ - // add ID - mods.@ID = "item:"+item.itemID; - // XML tag titleInfo; object field title if(item.title) { mods.titleInfo.title = item.title; @@ -2518,7 +2467,7 @@ function doExport() { modsType.@manuscript = "yes"; } else if(item.itemType == "interview") { modsType = "text"; - modsType.@manuscript = "interview"; + marcGenre = "interview"; } else if(item.itemType == "film") { modsType = "moving image"; marcGenre = "motion picture"; @@ -2529,8 +2478,7 @@ function doExport() { modsType = "multimedia"; marcGenre = "web site"; } else if(item.itemType == "note") { - modsType = "text"; - marcGenre = null; + continue; } mods.typeOfResource = modsType; mods.genre += {item.itemType}; @@ -2655,9 +2603,9 @@ function doExport() { originInfo += {item.place}; } if(item.publisher) { - originInfo += item.publisher; + originInfo += {item.publisher}; } else if(item.distributor) { - originInfo += item.distributor; + originInfo += {item.distributor}; } if(item.year) { // Assume year is copyright date @@ -2726,19 +2674,6 @@ function doExport() { for(var j in item.notes) { // Add note tag var note = {item.notes[j].note}; - note.@ID = "item:"+item.notes[j].itemID; - mods.note += note; - - // Add see also info to RDF - /*if(item.notes[j].seeAlso) { - rdfDoc.Description += generateSeeAlso(item.notes[j].itemID, item.notes[j].seeAlso); - }*/ - } - - if(item.note) { - // Add note tag - var note = {item.note}; - note.@ID = "item:"+item.itemID; mods.note += note; } @@ -2748,23 +2683,196 @@ function doExport() { mods.subject += {item.tags[j]}; } - /** RDF STRUCTURE **/ - - // Add see also info to RDF - /*if(item.seeAlso) { - generateSeeAlso(item.itemID, item.seeAlso, rdfDoc); - }*/ - modsCollection.mods += mods; } - /*for(var i in collections) { - generateCollection(collections[i], rdfDoc); - } - modsCollection.rdf::RDF = rdfDoc;*/ - Scholar.write(''''+"\n"); Scholar.write(modsCollection.toXMLString()); +} + +function doImport() { + var text = ""; + var read; + + // read in 16384 byte increments + while(read = Scholar.read(16384)) { + text += read; + } + Scholar.Utilities.debugPrint("read in"); + + // eliminate heading so we can parse as XML + text = text.replace(/<\?xml[^?]+\?>/, ""); + + // parse with E4X + var m = new Namespace("http://www.loc.gov/mods/v3"); + // why does this default namespace declaration not work!? + default xml namespace = m; + var xml = new XML(text); + + for each(var mods in xml.m::mods) { + Scholar.Utilities.debugPrint("item is: "); + for(var i in mods) { + Scholar.Utilities.debugPrint(i+" = "+mods[i].toString()); + } + + var newItem = new Scholar.Item(); + + // title + newItem.title = mods.m::titleInfo.m::title; + + // try to get genre from local genre + var localGenre = mods.m::genre.(@authority=="local").text().toString(); + if(localGenre && Scholar.Utilities.itemTypeExists(localGenre)) { + newItem.itemType = localGenre; + } else { + // otherwise, look at the marc genre + var marcGenre = mods.m::genre.(@authority=="marcgt").text().toString(); + if(marcGenre) { + if(marcGenre == "book") { + newItem.itemType = "book"; + } else if(marcGenre == "periodical") { + newItem.itemType = "magazineArticle"; + } else if(marcGenre == "newspaper") { + newItem.itemType = "newspaperArticle"; + } else if(marcGenre == "theses") { + newItem.itemType = "thesis"; + } else if(marcGenre == "letter") { + newItem.itemType = "letter"; + } else if(marcGenre == "interview") { + newItem.itemType = "interview"; + } else if(marcGenre == "motion picture") { + newItem.itemType = "film"; + } else if(marcGenre == "art original") { + newItem.itemType = "artwork"; + } else if(marcGenre == "web site") { + newItem.itemType = "website"; + } + } + + if(!newItem.itemType) { + newItem.itemType = "book"; + } + } + + var isPartialItem = Scholar.Utilities.inArray(newItem.itemType, partialItemTypes); + + // TODO: thesisType, type + + for each(var name in mods.m::name) { + // TODO: institutional authors + var creator = new Array(); + creator.firstName = name.m::namePart.(@type=="given").text().toString(); + creator.lastName = name.m::namePart.(@type=="family").text().toString(); + + // look for roles + var role = name.m::role.m::roleTerm.(@type=="code").(@authority=="marcrelator").text().toString(); + if(role == "edt") { + creator.creatorType = "editor"; + } else if(role == "ctb") { + creator.creatorType = "contributor"; + } else { + creator.creatorType = "author"; + } + + newItem.creators.push(creator); + } + + // source + newItem.source = mods.m::recordInfo.m::recordContentSource.text().toString(); + // accessionNumber + newItem.accessionNumber = mods.m::recordInfo.m::recordIdentifier.text().toString(); + // rights + newItem.rights = mods.m::accessCondition.text().toString(); + + /** SUPPLEMENTAL FIELDS **/ + + // series + if(newItem.itemType == "bookSection") { + newItem.series = mods.m::relatedItem.(@type=="host").m::relatedItem.(@type=="series").m::titleInfo.m::title.text().toString(); + } else { + newItem.series = mods.m::relatedItem.(@type=="series").m::titleInfo.m::title.text().toString(); + } + + // get part + if(isPartialItem) { + var part = mods.m::relatedItem.m::part; + var originInfo = mods.m::relatedItem.m::originInfo; + var identifier = mods.m::relatedItem.m::identifier; + } else { + var part = mods.m::part; + var originInfo = mods.m::originInfo; + var identifier = mods.m::identifier; + } + + // volume + newItem.volume = part.m::detail.(@type=="volume").m::number.text().toString(); + if(!newItem.volume) { + newItem.volume = part.m::detail.(@type=="volume").m::text.text().toString(); + } + + // number + newItem.number = part.m::detail.(@type=="issue").m::number.text().toString(); + if(!newItem.number) { + newItem.number = part.m::detail.(@type=="issue").m::text.text().toString(); + } + + // section + newItem.section = part.m::detail.(@type=="section").m::number.text().toString(); + if(!newItem.section) { + newItem.section = part.m::detail.(@type=="section").m::text.text().toString(); + } + + // pages + var pagesStart = part.m::extent.(@unit=="pages").m::start.text().toString(); + var pagesEnd = part.m::extent.(@unit=="pages").m::end.text().toString(); + if(pagesStart || pagesEnd) { + if(pagesStart && pagesEnd && pagesStart != pagesEnd) { + newItem.pages = pagesStart+"-"+pagesEnd; + } else { + newItem.pages = pagesStart+pagesEnd; + } + } + + // edition + newItem.edition = originInfo.m::edition.text().toString(); + // place + newItem.place = originInfo.m::place.m::placeTerm.text().toString(); + // publisher/distributor + newItem.publisher = newItem.distributor = originInfo.m::publisher.text().toString(); + // date + newItem.date = originInfo.m::copyrightDate.text().toString(); + if(!newItem.date) { + newItem.date = originInfo.m::dateIssued.text().toString(); + if(!newItem.date) { + newItem.date = originInfo.dateCreated.text().toString(); + } + } + + // ISBN + newItem.ISBN = identifier.(@type=="ISBN").text().toString() + // ISSN + newItem.ISSN = identifier.(@type=="ISSN").text().toString() + // publication + newItem.publication = mods.m::relatedItem.m::publication.text().toString(); + // call number + newItem.callNumber = mods.m::classification.text().toString(); + // archiveLocation + newItem.archiveLocation = mods.m::location.m::physicalLocation.text().toString(); + // url + newItem.url = mods.m::location.m::url.text().toString(); + + /** NOTES **/ + for each(var note in mods.m::note) { + newItem.notes.push({note:note.text().toString()}); + } + + /** TAGS **/ + for each(var subject in mods.m::subject) { + newItem.tags.push(subject.text().toString()); + } + + newItem.complete(); + } }'); REPLACE INTO "translators" VALUES ('14763d24-8ba0-45df-8f52-b8d1108e7ac9', '2006-07-07 12:44:00', 2, 'Biblio/DC/FOAF/PRISM/VCard (RDF/XML)', 'Simon Kornblith', 'rdf',