From 43025f971d39ce29bb9762ffb74400554fa545f8 Mon Sep 17 00:00:00 2001 From: Dan Stillman Date: Thu, 17 Dec 2009 09:33:04 +0000 Subject: [PATCH] Pushed National Archives of Australia Deleted NAA RecordSearch (which it replaces) --- deleted.txt | 3 +- repotime.txt | 2 +- translators/NAA RecordSearch.js | 72 ------- translators/National Archives of Australia.js | 204 ++++++++++++++++++ 4 files changed, 207 insertions(+), 74 deletions(-) delete mode 100644 translators/NAA RecordSearch.js create mode 100644 translators/National Archives of Australia.js diff --git a/deleted.txt b/deleted.txt index 40e9c94db..3d160ba90 100644 --- a/deleted.txt +++ b/deleted.txt @@ -1,4 +1,4 @@ -2 # Increment number when modifying file +3 # Increment number when modifying file 96b9f483-c44d-5784-cdad-ce21b984 # Amazon add7c71c-21f3-ee14-d188-caf9da12 # SIRSI 2003+ @@ -13,3 +13,4 @@ e07e9b8c-0e98-4915-bb5a-32a08cb2f365 # Open WorldCat (Search) dd149efc-7f0e-43e4-b3df-b6d15e171717 # Persée 8c1f42d5-02fa-437b-b2b2-73afc768eb07 # PNAS (replaced by HighWire 2.0) 56ea09bc-57ee-4f50-976e-cf7cb1f6c6d8 # Royal Society Publishing (replaced by HighWire 2.0) +83538f48-906f-40ef-bdb3-e94f63676307 # NAA RecordSearch (replaced by National Archives of Australia) diff --git a/repotime.txt b/repotime.txt index 2188969b9..cc6becc65 100644 --- a/repotime.txt +++ b/repotime.txt @@ -1 +1 @@ -2009-12-17 08:10:00 +2009-12-17 09:35:00 diff --git a/translators/NAA RecordSearch.js b/translators/NAA RecordSearch.js deleted file mode 100644 index b6d19a02e..000000000 --- a/translators/NAA RecordSearch.js +++ /dev/null @@ -1,72 +0,0 @@ -{ - "translatorID":"83538f48-906f-40ef-bdb3-e94f63676307", - "translatorType":4, - "label":"NAA RecordSearch", - "creator":"Tim Sherratt", - "target":"http://naa12.naa.gov.au/scripts/", - "minVersion":"1.0.0b4.r5", - "maxVersion":"", - "priority":100, - "inRepository":true, - "lastUpdated":"2009-01-08 08:19:07" -} - -function detectWeb(doc, url) { - if (url.match(/Items_listing.asp/i)) { - return "multiple"; - } else if (url.match(/ItemDetail.asp/i)) { - return "manuscript"; - } -} - -function doWeb(doc, url) { - var namespace = doc.documentElement.namespaceURI; - var nsResolver = namespace ? function(prefix) { - if (prefix == 'x') return namespace; else return null; - } : null; - if (detectWeb(doc, url) == "multiple") { - var records = new Array(); - var items = new Object(); - var titles = doc.evaluate('//form[2]/table/tbody/tr/td[b="Title"]', doc, nsResolver, XPathResult.ANY_TYPE, null); - var links = doc.evaluate('//form[2]/table/tbody/tr/td[b="Control symbol"]/a', doc, nsResolver, XPathResult.ANY_TYPE, null); - var title; - var link; - while ((title = titles.iterateNext()) && (link = links.iterateNext())) { - items[link.href] = Zotero.Utilities.trimInternal(title.lastChild.textContent); - } - items = Zotero.selectItems(items); - for (var i in items) { - records.push(i); - } - } else { - records = [url]; - } - Zotero.Utilities.processDocuments(records, function(doc) { - var title = Zotero.Utilities.cleanString(doc.evaluate('//table/tbody/tr/td[b="Title"]', doc, null, XPathResult.ANY_TYPE, null).iterateNext().lastChild.textContent); - var series = Zotero.Utilities.cleanString(doc.evaluate('//table/tbody/tr/td[b="Series number"]', doc, null, XPathResult.ANY_TYPE, null).iterateNext().lastChild.textContent); - var control = Zotero.Utilities.cleanString(doc.evaluate('//table/tbody/tr/td[b="Control symbol"]', doc, null, XPathResult.ANY_TYPE, null).iterateNext().lastChild.textContent); - var date = Zotero.Utilities.cleanString(doc.evaluate('//table/tbody/tr/td[b="Contents date range"]', doc, null, XPathResult.ANY_TYPE, null).iterateNext().lastChild.textContent); - var access = Zotero.Utilities.cleanString(doc.evaluate('//table/tbody/tr/td[b="Access status"]', doc, null, XPathResult.ANY_TYPE, null).iterateNext().lastChild.textContent); - var location = Zotero.Utilities.cleanString(doc.evaluate('//table/tbody/tr/td[b="Location"]', doc, null, XPathResult.ANY_TYPE, null).iterateNext().lastChild.textContent); - var barcode = Zotero.Utilities.cleanString(doc.evaluate('//table/tbody/tr/td[b="Barcode"]', doc, null, XPathResult.ANY_TYPE, null).iterateNext().lastChild.textContent); - if (doc.body.innerHTML.match("View digital copy")) { - var digitised = "yes"; - } else { - var digitised = "no"; - } - var repository ="National Archives of Australia, " + location; - var url = "http://www.aa.gov.au/cgi-bin/Search?O=I&Number=" + barcode; - var ref_number = series + ", " + control; - var type = "file"; - var item = new Zotero.Item("manuscript"); - item.title = title; - item.archiveLocation = ref_number; - item.url = url; - item.date = date; - item.manuscriptType = type; - item.extra = "Access: " + access + "\nDigitised: " + digitised; - item.repository = repository; - item.complete(); - - }, function() {Zotero.done();}); -} \ No newline at end of file diff --git a/translators/National Archives of Australia.js b/translators/National Archives of Australia.js new file mode 100644 index 000000000..f4945a603 --- /dev/null +++ b/translators/National Archives of Australia.js @@ -0,0 +1,204 @@ +{ + "translatorID":"50a4cf3f-92ef-4e9f-ab15-815229159b16", + "translatorType":4, + "label":"National Archives of Australia", + "creator":"Tim Sherratt", + "target":"^http://[^/]*naa.gov.au/", + "minVersion":"1.0", + "maxVersion":"", + "priority":90, + "inRepository":false, + "lastUpdated":"2009-12-17 09:35:00" +} + +function detectWeb(doc, url) { + //RecordSearch - items and series - or Photosearch results + if (url.match(/Series_listing.asp/i) || url.match(/Items_listing.asp/i) || url.match(/PhotoSearchSearchResults.asp/i)) { + return "multiple"; + } else if (url.match(/SeriesDetail.asp/i) || url.match(/ItemDetail.asp/i) || url.match(/PhotoSearchItemDetail.asp/i) || url.match(/imagine.asp/i)) { + return "manuscript"; + } +} +function doWeb(doc, url) { + var namespace = doc.documentElement.namespaceURI; + var nsResolver = namespace ? function(prefix) { + if (prefix == 'x') return namespace; else return null; + } : null; + + // To avoid cross domain errors make sure links match current sub-domain + if (url.match(/naa12/i)) { + baseURL = "http://naa12.naa.gov.au/scripts/"; + } else if (url.match(/recordsearch/i)) { + baseURL = "http://recordsearch.naa.gov.au/scripts/"; + } + var records = new Array(); + var titles, links, title, link; + if (detectWeb(doc, url) == "multiple") { + var items = new Object(); + // Files + if (url.match(/Items_listing.asp/i)) { + titles = doc.evaluate('//td[b="Title"]', doc, nsResolver, XPathResult.ANY_TYPE, null); + links = doc.evaluate('//td[b="Control symbol"]/a', doc, nsResolver, XPathResult.ANY_TYPE, null); + // Photos + } else if (url.match(/PhotoSearchSearchResults.asp/i)) { + titles = doc.evaluate('//td[b="Title :"]/a[1]', doc, nsResolver, XPathResult.ANY_TYPE, null); + links = doc.evaluate('//td[b="Title :"]/a[1]', doc, nsResolver, XPathResult.ANY_TYPE, null); + //Series + } else if (url.match(/Series_listing.asp/i)) { + titles = doc.evaluate('//td[b="Title"]', doc, nsResolver, XPathResult.ANY_TYPE, null); + links = doc.evaluate('//td[b="Series number"]/a', doc, nsResolver, XPathResult.ANY_TYPE, null); + } + while ((title = titles.iterateNext()) && (link = links.iterateNext())) { + items[link.href] = Zotero.Utilities.trimInternal(title.lastChild.textContent); + Zotero.debug(title.lastChild.textContent); + } + items = Zotero.selectItems(items); + for (var i in items) { + records.push(i); + } + } else { + records = [url]; + } + var setupCallback = function () { + if (records.length) { + var item = new Zotero.Item("manuscript"); + item.repository = "National Archives of Australia"; + var record = records.shift(); + Zotero.debug(record); + var postString; + // Scrape digital image - ie a single folio - details + if (record.match(/Imagine.asp/i)) { + // You're using my Greasemonkey script to view images + var b, i, c; + if (doc.body.innerHTML.match(/Digital copy of NAA:/)) { + doc.evaluate('//img[@id="fileimage"]/@src', doc, nsResolver, XPathResult.FIRST_ORDERED_NODE_TYPE, null).singleNodeValue.textContent.match(/B=(\d+)&S=(\d+)&/); + b = RegExp.$1; + i = RegExp.$2; + c = Zotero.Utilities.trimInternal(doc.evaluate('//input[@id="printto"]/@value', doc, nsResolver, XPathResult.FIRST_ORDERED_NODE_TYPE, null).singleNodeValue.textContent); + // You're using the original RS interface + } else { + b = Zotero.Utilities.trimInternal(doc.evaluate('//input[@id="Hidden1"]/@value', doc, nsResolver, XPathResult.FIRST_ORDERED_NODE_TYPE, null).singleNodeValue.textContent); + i = Zotero.Utilities.trimInternal(doc.evaluate('//input[@id="Text1"]/@value', doc, nsResolver, XPathResult.FIRST_ORDERED_NODE_TYPE, null).singleNodeValue.textContent); + c = Zotero.Utilities.trimInternal(doc.evaluate('//input[@id="Hidden3"]/@value', doc, nsResolver, XPathResult.FIRST_ORDERED_NODE_TYPE, null).singleNodeValue.textContent); + } + postString = "B=" + b + "&C=" + c + "&F=1&I=" + i + "&L=Y&M=R&MX=Y&S=Y&SE=1&X=N"; + Zotero.Utilities.HTTP.doPost(record, postString, function (text) { + // This is a digital image -- ie a folio + var barcode = text.match(/Digital copy of item with barcode\s+(\d+)/)[1]; + Zotero.debug(barcode); + item.pages = text.match(/NAME="I" VALUE="(\d+)"/)[1]; + item.numPages = text.match(/NAME="C" VALUE="(\d+)"/)[1]; + item.url = "http://naa16.naa.gov.au/rs_images/ShowImage.php?B=" + barcode + "&S=" + item.pages + "&T=P"; + var itemURL = baseURL + "ItemDetail.asp?M=0&B=" + barcode; + item.manuscriptType = 'folio'; + Zotero.Utilities.processDocuments(itemURL, function(itemDoc) { + var series = Zotero.Utilities.trimInternal(itemDoc.evaluate('//td[b="Series number"]', itemDoc, nsResolver, XPathResult.FIRST_ORDERED_NODE_TYPE, null).singleNodeValue.lastChild.textContent); + var control = Zotero.Utilities.trimInternal(itemDoc.evaluate('//td[b="Control symbol"]', itemDoc, nsResolver, XPathResult.FIRST_ORDERED_NODE_TYPE, null).singleNodeValue.lastChild.textContent); + var refNumber = series + ", " + control; + item.archiveLocation = refNumber; + item.title = "Page " + item.pages + " of NAA: "+refNumber; + item.shortTitle = "NAA: " + refNumber; + item.attachments = [{url:item.url, title:"Digital image of NAA: " + refNumber + ", page " + item.pages, mimeType:"image/jpeg" }]; + item.complete(); + setupCallback(); + }); + }); + // Scrape photo details + } else if (record.match(/PhotoSearchItemDetail.asp/)) { + Zotero.Utilities.HTTP.doGet(record, function (text) { + // Clean up unpredictable linebreaks and tabs + text = text.replace(/\n/gm, ""); + text = text.replace(/\r/gm, ""); + text = text.replace(/\t/gm, ""); + item.title = Zotero.Utilities.trimInternal(text.match(/Title :<\/b>(.*?)
Date :<\/b>(.*?)
Image no. :<\/b>(.*?)
Barcode : <\/b>(.*?)
Location : <\/b>(.*?)
Primary subject :<\/b>.*?Not Assigned/)) { var tag1 = text.match(/Primary subject :<\/b>.*?(.*?)<\/a>/)[1]}; + if (!text.match(/Secondary subject :<\/b>.*?Not Assigned/)) { var tag2 = text.match(/Secondary subject :<\/b>.*?(.*?)<\/a>/)[1]}; + if (tag1) { item.tags.push(Zotero.Utilities.trimInternal(tag1).toLowerCase()) }; + if (tag2) { item.tags.push(Zotero.Utilities.trimInternal(tag2).toLowerCase()) }; + var imgURL = "http://naa16.naa.gov.au/rs_images/ShowImage.php?B=" + barcode + "&T=P&S=1"; + item.url = "http://www.naa.gov.au/cgi-bin/Search?O=PSI&Number=" + barcode; + item.manuscriptType = "photograph"; + Zotero.debug(item.tags); + // Save a copy of the photo + item.attachments = [{url:imgURL, title:"Digital image of NAA: "+ item.archiveLocation, mimeType:"image/jpeg" }]; + item.complete(); + setupCallback(); + }); + // Scrape series details + } else if (record.match(/SeriesDetail.asp/i)) { + Zotero.Utilities.processDocuments(record, function (doc) { + item.title = Zotero.Utilities.trimInternal(doc.evaluate('//td[b="Title"]', doc, nsResolver, XPathResult.FIRST_ORDERED_NODE_TYPE, null).singleNodeValue.lastChild.textContent); + item.archiveLocation = Zotero.Utilities.trimInternal(doc.evaluate('//td[b="Series number"]', doc, nsResolver, XPathResult.FIRST_ORDERED_NODE_TYPE, null).singleNodeValue.lastChild.textContent); + item.date = Zotero.Utilities.trimInternal(doc.evaluate('//td[b="Accumulation dates"]', doc, nsResolver, XPathResult.FIRST_ORDERED_NODE_TYPE, null).singleNodeValue.lastChild.textContent); + var location = doc.evaluate('//td[b="Quantity and location"]', doc, nsResolver, XPathResult.FIRST_ORDERED_NODE_TYPE, null).singleNodeValue; + if (location) { + location = location.textContent.replace(/Quantity and location/i, "").replace(/\s([\w]+)([\d]+\.*\d*)/gi, " $1; $2"); + } + Zotero.debug(location); + var agencies = doc.evaluate('//td[b="Agency / person recording"]/table/tbody/tr/td[2]', doc, nsResolver, XPathResult.ANY_TYPE, null); + while (agency = agencies.iterateNext()) { + item.creators.push({lastName: agency.textContent, creatorType: "creator"}); + } + item.url = "http://www.naa.gov.au/cgi-bin/Search?Number=" + item.archiveLocation; + item.manuscriptType = "series"; + // Find out how many items from this series have been described on RecordSearch + var itemsURL = baseURL + "SearchOF.asp?DP=2&Q=SER_SERIES_NO=QT" + item.archiveLocation + "QT"; + Zotero.Utilities.processDocuments(itemsURL, function(itemDoc) { + var numItems = Zotero.Utilities.trimInternal(itemDoc.evaluate('//tr[2]/td[2]', itemDoc, nsResolver, XPathResult.FIRST_ORDERED_NODE_TYPE, null).singleNodeValue.textContent); + Zotero.debug(numItems); + if (numItems == "No records found") { + numItems = "none"; + } + item.extra = "Quantity and location: " + location + "\nNumber of items described: " + numItems; + item.complete(); + setupCallback(); + }); + }); + // Scrape file details + } else if (record.match(/ItemDetail.asp/i)) { + Zotero.Utilities.processDocuments(record, function (doc) { + item.title = Zotero.Utilities.trimInternal(doc.evaluate('//td[b="Title"]', doc, nsResolver, XPathResult.FIRST_ORDERED_NODE_TYPE, null).singleNodeValue.lastChild.textContent); + var series = Zotero.Utilities.trimInternal(doc.evaluate('//td[b="Series number"]', doc, nsResolver, XPathResult.FIRST_ORDERED_NODE_TYPE, null).singleNodeValue.lastChild.textContent); + var control = Zotero.Utilities.trimInternal(doc.evaluate('//td[b="Control symbol"]', doc, nsResolver, XPathResult.FIRST_ORDERED_NODE_TYPE, null).singleNodeValue.lastChild.textContent); + item.date = Zotero.Utilities.trimInternal(doc.evaluate('//td[b="Contents date range"]', doc, nsResolver, XPathResult.FIRST_ORDERED_NODE_TYPE, null).singleNodeValue.lastChild.textContent); + var access = Zotero.Utilities.trimInternal(doc.evaluate('//td[b="Access status"]', doc, nsResolver, XPathResult.FIRST_ORDERED_NODE_TYPE, null).singleNodeValue.lastChild.textContent); + var location = Zotero.Utilities.trimInternal(doc.evaluate('//td[b="Location"]', doc, nsResolver, XPathResult.FIRST_ORDERED_NODE_TYPE, null).singleNodeValue.lastChild.textContent); + var barcode = Zotero.Utilities.trimInternal(doc.evaluate('//td[b="Barcode"]', doc, nsResolver, XPathResult.FIRST_ORDERED_NODE_TYPE, null).singleNodeValue.lastChild.textContent); + // Has the file been digitised? + if (doc.body.innerHTML.match("View digital copy")) { + var digitised = "yes"; + } else { + var digitised = "no"; + } + item.url = "http://www.naa.gov.au/cgi-bin/Search?O=I&Number=" + barcode; + item.archiveLocation = series + ", " + control; + item.manuscriptType = "file"; + item.extra = "Location: " + location + "\nAccess: " + access + "\nDigitised: " + digitised; + // If it's digitised find out how many pages in the digitised file + itemURL = baseURL + "imagine.asp?B=" + barcode + "&I=1&SE=1"; + if (digitised == "yes") { + Zotero.Utilities.processDocuments(itemURL, function(itemDoc) { + var pages = Zotero.Utilities.trimInternal(itemDoc.evaluate('//input[@id="Hidden3"]/@value', itemDoc, nsResolver, XPathResult.FIRST_ORDERED_NODE_TYPE, null).singleNodeValue.textContent); + item.numPages = "1-" + pages; + item.pages = "1-" + pages; + item.complete(); + setupCallback(); + }); + } else { + item.complete(); + setupCallback(); + } + }); + } + } else { + Zotero.done(); + } + } + setupCallback(); + Zotero.wait(); +} +