diff --git a/chrome/chromeFiles/content/scholar/exportOptions.js b/chrome/chromeFiles/content/scholar/exportOptions.js index cfac3be9f..bcb3dc0d4 100644 --- a/chrome/chromeFiles/content/scholar/exportOptions.js +++ b/chrome/chromeFiles/content/scholar/exportOptions.js @@ -53,14 +53,13 @@ var Scholar_File_Interface_Export = new function() { var element = document.getElementById(option); if(typeof(defValue) == "boolean") { - if(element.checked == "true") { + if(element.checked == true) { _options[option] = true; } else { _options[option] = false; } } } - Scholar.debug(_options); } /* diff --git a/chrome/chromeFiles/content/scholar/xpcom/cite.js b/chrome/chromeFiles/content/scholar/xpcom/cite.js index d49f2233c..799a69a43 100644 --- a/chrome/chromeFiles/content/scholar/xpcom/cite.js +++ b/chrome/chromeFiles/content/scholar/xpcom/cite.js @@ -202,7 +202,7 @@ CSL.prototype.createBibliography = function(items, format) { // add line feeds if(format == "HTML") { var coins = Scholar.OpenURL.createContextObject(item, "1.0"); - string += ''; + string += ''; if(this._class == "note") { output += "
  • "+string+"
  • \r\n"; diff --git a/chrome/chromeFiles/content/scholar/xpcom/translate.js b/chrome/chromeFiles/content/scholar/xpcom/translate.js index ba945d26b..6edacda04 100644 --- a/chrome/chromeFiles/content/scholar/xpcom/translate.js +++ b/chrome/chromeFiles/content/scholar/xpcom/translate.js @@ -916,39 +916,64 @@ Scholar.Translate.prototype._closeStreams = function() { * imports an attachment from the disk */ Scholar.Translate.prototype._itemImportAttachment = function(attachment, sourceID) { - Scholar.debug(attachment); - if(!attachment.path) { // create from URL if(attachment.url) { var attachmentID = Scholar.Attachments.linkFromURL(attachment.url, sourceID, (attachment.mimeType ? attachment.mimeType : undefined), (attachment.title ? attachment.title : undefined)); + var attachmentItem = Scholar.Items.get(attachmentID); } else { Scholar.debug("not adding attachment: no path or url specified"); + return false; } } else { + // generate nsIFile + var IOService = Components.classes["@mozilla.org/network/io-service;1"]. + getService(Components.interfaces.nsIIOService); + var uri = IOService.newURI(attachment.path, "", null); + var file = uri.QueryInterface(Components.interfaces.nsIFileURL).file; + if(attachment.url) { - Scholar.debug("not adding attachment: snapshot import not yet implemented"); + // import from nsIFile + var attachmentID = Scholar.Attachments.importSnapshotFromFile(file, + attachment.url, attachment.title, attachment.mimeType, + (attachment.charset ? attachment.charset : null), sourceID); + var attachmentItem = Scholar.Items.get(attachmentID); } else { - // generate nsIFile - var IOService = Components.classes["@mozilla.org/network/io-service;1"]. - getService(Components.interfaces.nsIIOService); - var uri = IOService.newURI(attachment.path, "", null); - var file = uri.QueryInterface(Components.interfaces.nsIFileURL).file; - // import from nsIFile var attachmentID = Scholar.Attachments.importFromFile(file, sourceID); // get attachment item - var myAttachmentItem = Scholar.Items.get(attachmentID); + var attachmentItem = Scholar.Items.get(attachmentID); if(attachment.title) { // set title - myAttachmentItem.setField("title", attachment.title); + attachmentItem.setField("title", attachment.title); } } } - return attachmentID; + return attachmentItem; +} + +/* + * handles tags and see also data for notes and attachments + */ +Scholar.Translate.prototype._itemTagsAndSeeAlso = function(item, newItem) { + Scholar.debug("handling notes and see also"); + // add to ID map + if(item.itemID) { + this._IDMap[item.itemID] = newItem.getID(); + } + // add see alsos + for each(var seeAlso in item.seeAlso) { + if(this._IDMap[seeAlso]) { + newItem.addSeeAlso(this._IDMap[seeAlso]); + } + } + + for each(var tag in item.tags) { + newItem.addTag(tag); + } } /* @@ -968,9 +993,6 @@ Scholar.Translate.prototype._itemDone = function(item) { return; } - if(!item.title) { - throw("item has no title"); - } var notifierStatus = Scholar.Notifier.isEnabled(); if(notifierStatus) { @@ -985,8 +1007,18 @@ Scholar.Translate.prototype._itemDone = function(item) { // re-retrieve the item var newItem = Scholar.Items.get(myID); } else if(type == "attachment") { - var myID = this._itemImportAttachment(item, null); + if(this.type == "import") { + var newItem = this._itemImportAttachment(item, null); + var myID = newItem.getID(); + } else { + Scholar.debug("discarding standalone attachment"); + return false; + } } else { + if(!item.title) { + throw("item has no title"); + } + // create new item var typeID = Scholar.ItemTypes.getID(type); var newItem = Scholar.Items.getNewItemByType(typeID); @@ -1016,13 +1048,11 @@ Scholar.Translate.prototype._itemDone = function(item) { } } else if(i == "title") { // skip checks for title newItem.setField(i, data); - } else if(i == "tags") { // add tags - for(var j in data) { - newItem.addTag(data[j]); - } } else if(i == "seeAlso") { newItem.translateSeeAlso = data; - } else if(i != "note" && i != "notes" && i != "itemID" && (fieldID = Scholar.ItemFields.getID(i))) { + } else if(i != "note" && i != "notes" && i != "itemID" && + i != "attachments" && i != "tags" && + (fieldID = Scholar.ItemFields.getID(i))) { // if field is in db if(Scholar.ItemFields.isValidForType(fieldID, typeID)) { // if field is valid for this type @@ -1049,15 +1079,8 @@ Scholar.Translate.prototype._itemDone = function(item) { var noteID = Scholar.Notes.add(note.note, myID); // handle see also - if(note.seeAlso) { - var myNote = Scholar.Items.get(noteID); - - for each(var seeAlso in note.seeAlso) { - if(this._IDMap[seeAlso]) { - myNote.addSeeAlso(this._IDMap[seeAlso]); - } - } - } + var myNote = Scholar.Items.get(noteID); + this._itemTagsAndSeeAlso(note, myNote); } } @@ -1071,7 +1094,7 @@ Scholar.Translate.prototype._itemDone = function(item) { if(attachment.downloadable && this._downloadAssociatedFiles) { if(attachment.document) { - var attachmentID = Scholar.Attachments.importFromDocument(attachment.document, myID); + attachmentID = Scholar.Attachments.importFromDocument(attachment.document, myID); // change title, if a different one was specified if(attachment.title && (!attachment.document.title @@ -1086,7 +1109,7 @@ Scholar.Translate.prototype._itemDone = function(item) { } } else { if(attachment.document) { - var attachmentID = Scholar.Attachments.linkFromURL(attachment.document.location.href, myID, + attachmentID = Scholar.Attachments.linkFromURL(attachment.document.location.href, myID, (attachment.mimeType ? attachment.mimeType : attachment.document.contentType), (attachment.title ? attachment.title : attachment.document.title)); } else { @@ -1094,13 +1117,16 @@ Scholar.Translate.prototype._itemDone = function(item) { Scholar.debug("notice: either mimeType or title is missing; attaching file will be slower"); } - var attachmentID = Scholar.Attachments.linkFromURL(attachment.url, myID, + attachmentID = Scholar.Attachments.linkFromURL(attachment.url, myID, (attachment.mimeType ? attachment.mimeType : undefined), (attachment.title ? attachment.title : undefined)); } } } else if(this.type == "import") { - this._itemImportAttachment(attachment, myID); + var attachmentItem = this._itemImportAttachment(attachment, myID); + if(attachmentItem) { + this._itemTagsAndSeeAlso(attachment, attachmentItem); + } } } } @@ -1120,6 +1146,12 @@ Scholar.Translate.prototype._itemDone = function(item) { } } + if(item.tags) { + for each(var tag in item.tags) { + newItem.addTag(tag); + } + } + delete item; // only re-enable if notifier was enabled at the beginning of scraping @@ -1358,7 +1390,7 @@ Scholar.Translate.prototype._export = function() { var extensionMatch = /^(.*)\.[a-zA-Z0-9]+$/ var m = extensionMatch.exec(name); if(m) { - name = m[0]; + name = m[1]; } directory.append(name); @@ -1447,13 +1479,18 @@ Scholar.Translate.prototype._exportGetAttachment = function(attachment) { attachmentArray.title = attachment.getField("title"); // get mime type attachmentArray.mimeType = attachment.getAttachmentMimeType(); + // get charset + attachmentArray.charset = attachment.getAttachmentCharset(); + // get seeAlso + attachmentArray.seeAlso = attachment.getSeeAlso(); + // get tags + attachmentArray.tags = attachment.getTags(); if(linkMode != Scholar.Attachments.LINK_MODE_LINKED_URL && this._displayOptions["exportFileData"]) { // add path and filename if not an internet link - attachmentArray.path = "files/"+attachmentID+"/"; var file = attachment.getFile(); - attachmentArray.filename = file.leafName; + attachmentArray.path = "files/"+attachmentID+"/"+file.leafName; if(linkMode == Scholar.Attachments.LINK_MODE_LINKED_FILE) { // create a new directory @@ -1847,7 +1884,7 @@ Scholar.Translate.RDF.prototype.getSources = function(resource, property) { property = this._getResource(property); resource = this._getResource(resource); - var enumerator = this._dataSource.GetSources(resource, property, true); + var enumerator = this._dataSource.GetSources(property, resource, true); return this._deEnumerate(enumerator); } diff --git a/scrapers.sql b/scrapers.sql index ade56ce81..4d5003f2f 100644 --- a/scrapers.sql +++ b/scrapers.sql @@ -1,4 +1,4 @@ --- 53 +-- 54 -- Set the following timestamp to the most recent scraper update date REPLACE INTO "version" VALUES ('repository', STRFTIME('%s', '2006-08-15 15:42:00')); @@ -3721,10 +3721,18 @@ REPLACE INTO "translators" VALUES ('14763d24-8ba0-45df-8f52-b8d1108e7ac9', '2006 'Scholar.configure("getCollections", true); Scholar.configure("dataMode", "rdf"); Scholar.addOption("exportNotes", true); -Scholar.addOption("exportFileData", true);', +Scholar.addOption("exportFileData", false);', 'function generateSeeAlso(resource, seeAlso) { for(var i in seeAlso) { - Scholar.RDF.addStatement(resource, n.dc+"relation", itemResources[seeAlso[i]], false); + if(itemResources[seeAlso[i]]) { + Scholar.RDF.addStatement(resource, n.dc+"relation", itemResources[seeAlso[i]], false); + } + } +} + +function generateTags(resource, tags) { + for(var j in tags) { + Scholar.RDF.addStatement(resource, n.dc+"subject", tags[j], true); } } @@ -3746,7 +3754,11 @@ function generateCollection(collection) { } function handleAttachment(attachmentResource, attachment) { - Scholar.RDF.addStatement(attachmentResource, rdf+"type", n.fs+"File", false); + Scholar.RDF.addStatement(attachmentResource, rdf+"type", n.fs+"Attachment", false); + + if(attachment.path) { + Scholar.RDF.addStatement(attachmentResource, rdf+"resource", attachment.path, false); + } if(attachment.url) { // add url as identifier @@ -3759,17 +3771,17 @@ function handleAttachment(attachmentResource, attachment) { Scholar.RDF.addStatement(attachmentResource, n.dc+"identifier", term, false); } - // add mime type - var term = Scholar.RDF.newResource(); - // set term type - Scholar.RDF.addStatement(term, rdf+"type", n.dcterms+"IMT", false); // set mime type value - Scholar.RDF.addStatement(term, rdf+"value", attachment.mimeType, true); - // add relationship to resource - Scholar.RDF.addStatement(attachmentResource, n.dc+"format", term, false); - + Scholar.RDF.addStatement(attachmentResource, n.link+"type", attachment.mimeType, true); + // set charset value + if(attachment.charset) { + Scholar.RDF.addStatement(attachmentResource, n.link+"charset", attachment.charset, true); + } // add title Scholar.RDF.addStatement(attachmentResource, n.dc+"title", attachment.title, true); + // Add see also info to RDF + generateSeeAlso(attachmentResource, attachment.seeAlso); + generateTags(attachmentResource, attachment.tags); } function doExport() { @@ -3782,6 +3794,7 @@ function doExport() { prism:"http://prismstandard.org/namespaces/1.2/basic/", foaf:"http://xmlns.com/foaf/0.1/", vcard:"http://nwalsh.com/rdf/vCard#", + link:"http://purl.org/rss/1.0/modules/link/", fs:"http://chnm.gmu.edu/firefoxscholar/rdf#" }; @@ -3803,13 +3816,10 @@ function doExport() { while(item = Scholar.nextItem()) { items.push(item); - if(item.itemType == "attachment" && item.path) { - // file is stored locally (paths are always unique) - itemResources[item.itemID] = item.path+item.filename; - } else if(item.ISBN && !usedResources["urn:isbn:"+item.ISBN]) { + if(item.ISBN && !usedResources["urn:isbn:"+item.ISBN]) { itemResources[item.itemID] = "urn:isbn:"+item.ISBN; usedResources[itemResources[item.itemID]] = true; - } else if(item.url && !usedResources[item.url]) { + } else if(item.itemType != "attachment" && item.url && !usedResources[item.url]) { itemResources[item.itemID] = item.url; usedResources[itemResources[item.itemID]] = true; } else { @@ -3822,17 +3832,8 @@ function doExport() { } for each(var attachment in item.attachments) { - if(attachment.path) { - // file is stored locally (paths are always unique) - itemResources[attachment.itemID] = attachment.path+attachment.filename; - } else if(!usedResources[attachment.url]) { - // file is referenced via url, and no other item has this url - itemResources[attachment.itemID] = attachment.url; - usedResources[attachment.url] = true; - } else { - // just specify a node ID - itemResources[attachment.itemID] = "#item:"+attachment.itemID; - } + // just specify a node ID + itemResources[attachment.itemID] = "#item:"+attachment.itemID; } } @@ -4103,7 +4104,8 @@ function doExport() { Scholar.RDF.addStatement(resource, n.dcterms+"isReferencedBy", noteResource, false); // Add see also info to RDF - generateSeeAlso(resource, item.notes[j].seeAlso); + generateSeeAlso(noteResource, item.notes[j].seeAlso); + generateTags(noteResource, item.notes[j].tags); } if(item.note) { @@ -4115,18 +4117,14 @@ function doExport() { for each(var attachment in item.attachments) { var attachmentResource = itemResources[attachment.itemID]; - Scholar.RDF.addStatement(resource, n.dc+"relation", attachmentResource, false); + Scholar.RDF.addStatement(resource, n.link+"link", attachmentResource, false); handleAttachment(attachmentResource, attachment); } - /** TAGS **/ + /** SEE ALSO AND TAGS **/ - for(var j in item.tags) { - Scholar.RDF.addStatement(resource, n.dc+"subject", item.tags[j], true); - } - - // Add see also info to RDF generateSeeAlso(resource, item.seeAlso); + generateTags(resource, item.tags); } /** RDF COLLECTION STRUCTURE **/ @@ -4314,6 +4312,12 @@ function handleAttachment(node, attachment) { } attachment.title = getFirstResults(node, [n.dc+"title"], true); + var path = getFirstResults(node, [rdf+"resource"]); + if(path) { + attachment.path = Scholar.RDF.getResourceURI(path[0]); + } + attachment.charset = getFirstResults(node, [n.link+"charset"], true); + attachment.mimeType = getFirstResults(node, [n.link+"type"], true); var identifiers = getFirstResults(node, [n.dc+"identifier"]); for each(var identifier in identifiers) { @@ -4329,28 +4333,9 @@ function handleAttachment(node, attachment) { } } - var formats = getFirstResults(node, [n.dc+"format"]); - for each(var format in formats) { - if(typeof(format) != "string") { - var formatType = Scholar.RDF.getTargets(format, rdf+"type"); - if(formatType) { - formatType = Scholar.RDF.getResourceURI(formatType[0]); - - if(formatType == n.dcterms+"IMT") { // uri is url - attachment.mimeType = getFirstResults(format, [rdf+"value"], true); - } - } - } - } - - var stringNode = node; - if(typeof(stringNode) != "string") { - stringNode = Scholar.RDF.getResourceURI(stringNode); - } - if(stringNode.substr(0, 8) == "file:///") { - // not a protocol specifier; we have a path name - attachment.path = stringNode; - } + // get seeAlso and tags + processSeeAlso(node, attachment); + processTags(node, attachment); return attachment; } @@ -4384,6 +4369,29 @@ function processCollection(node, collection) { return collection; } +function processSeeAlso(node, newItem) { + var relations; + newItem.itemID = Scholar.RDF.getResourceURI(node); + newItem.seeAlso = new Array(); + if(relations = getFirstResults(node, [n.dc+"relation"])) { + for each(var relation in relations) { + newItem.seeAlso.push(Scholar.RDF.getResourceURI(relation)); + } + } +} + +function processTags(node, newItem) { + var subjects; + newItem.tags = new Array(); + if(subjects = getFirstResults(node, [n.dc+"subject"])) { + for each(var subject in subjects) { + if(typeof(subject) == "string") { // a regular tag + newItem.tags.push(subject); + } + } + } +} + // gets the node with a given type from an array function getNodeByType(nodes, type) { if(!nodes) { @@ -4402,6 +4410,23 @@ function getNodeByType(nodes, type) { return false; } +// returns true if this resource is part of another (related by any arc besides +// dc:relation or dcterms:hasPart) +// +// used to differentiate independent notes and files +function isPart(node) { + var arcs = Scholar.RDF.getArcsIn(node); + var skip = false; + for each(var arc in arcs) { + arc = Scholar.RDF.getResourceURI(arc); + if(arc != n.dc+"relation" && arc != n.dcterms+"hasPart") { + // related to another item by some arc besides see also + skip = true; + } + } + return skip; +} + function doImport() { rdf = "http://www.w3.org/1999/02/22-rdf-syntax-ns#"; @@ -4412,6 +4437,7 @@ function doImport() { prism:"http://prismstandard.org/namespaces/1.2/basic/", foaf:"http://xmlns.com/foaf/0.1/", vcard:"http://nwalsh.com/rdf/vCard#", + link:"http://purl.org/rss/1.0/modules/link/", fs:"http://chnm.gmu.edu/firefoxscholar/rdf#" }; @@ -4432,6 +4458,13 @@ function doImport() { newItem.itemID = Scholar.RDF.getResourceURI(node); var container = undefined; + // figure out if this is a part of another resource, or a linked + // attachment + if(Scholar.RDF.getSources(node, n.dcterms+"isPartOf") || + Scholar.RDF.getSources(node, n.link+"link")) { + continue; + } + // type var type = Scholar.RDF.getTargets(node, rdf+"type"); // also deal with type detection based on parts, so we can differentiate @@ -4472,16 +4505,7 @@ function doImport() { newItem.itemType = "website"; } else if(type == n.bib+"Memo") { // check to see if this note is independent - var arcs = Scholar.RDF.getArcsIn(node); - var skip = false; - for each(var arc in arcs) { - arc = Scholar.RDF.getResourceURI(arc); - if(arc != n.dc+"relation" && arc != n.dcterms+"hasPart") { - // related to another item by some arc besides see also - skip = true; - } - } - if(skip) { + if(isPart(node)) { continue; } @@ -4490,10 +4514,9 @@ function doImport() { // skip collections until all the items are done collections.push(node); continue; - } else if(type == n.fs+"File") { + } else if(type == n.fs+"Attachment") { // check to see if file is independent - var arcs = Scholar.RDF.getArcsIn(node); - if(arcs.length) { + if(isPart(node)) { continue; } @@ -4627,12 +4650,7 @@ function doImport() { newItem.journalAbbreviation = getFirstResults((container ? container : node), [n.dcterms+"alternative"], true); // see also - var relations; - if(relations = getFirstResults(node, [n.dc+"relation"])) { - for each(var relation in relations) { - newItem.seeAlso.push(Scholar.RDF.getResourceURI(relation)); - } - } + processSeeAlso(node, newItem); /** NOTES **/ @@ -4645,13 +4663,8 @@ function doImport() { note.note = getFirstResults(referentNode, [rdf+"value", n.dc+"description"], true); if(note.note != undefined) { // handle see also - var relations; - if(relations = getFirstResults(referentNode, [n.dc+"relation"])) { - note.seeAlso = new Array(); - for each(var relation in relations) { - note.seeAlso.push(Scholar.RDF.getResourceURI(relation)); - } - } + processSeeAlso(referentNode, note); + processTags(referentNode, note); // add note newItem.notes.push(note); @@ -4681,18 +4694,16 @@ function doImport() { } } - /* ATTACHMENTS */ - var relations = getFirstResults(node, [n.dc+"relation"]); - for each(var relation in relations) { + /** ATTACHMENTS **/ + var relations = getFirstResults(node, [n.link+"link"]); + for each(var relation in relations) { var type = Scholar.RDF.getTargets(relation, rdf+"type"); - if(type) { - type = Scholar.RDF.getResourceURI(type[0]); - if(type == n.fs+"File") { - newItem.attachments.push(handleAttachment(relation)); - } + if(Scholar.RDF.getResourceURI(type[0]) == n.fs+"Attachment") { + newItem.attachments.push(handleAttachment(relation)); } } + Scholar.Utilities.debug(newItem); newItem.complete(); }