diff --git a/translators/Google Patents.js b/translators/Google Patents.js index 12dffd5cb..2a6719e96 100644 --- a/translators/Google Patents.js +++ b/translators/Google Patents.js @@ -12,8 +12,12 @@ } function detectWeb(doc, url) { + var namespace = doc.documentElement.namespaceURI; + var nsResolver = namespace ? function(prefix) { + if (prefix == 'x') return namespace; else return null; + } : null; - if (doc.location.href.match("Search")) { + if (doc.evaluate('//font[contains(./text(), "Result")]', doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext()) { return "multiple"; } else if (doc.location.href.match("id")) { return "patent"; @@ -36,44 +40,28 @@ function scrape(doc, url) { } : null; var dataTags = new Object(); - var headings = new Array(); var newItem = new Zotero.Item("patent"); - //checks format type - if (doc.location.href.match("printsec")) { - - var contents = doc.evaluate('//table[@id="summarytable"]/tbody/tr[1]/td', doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext().textContent; - var xPathHeadings = doc.evaluate('//b', doc, nsResolver, XPathResult.ANY_TYPE, null); - - var xPathCount = doc.evaluate('count (//b)', doc, nsResolver, XPathResult.ANY_TYPE, null); - - if (doc.evaluate('//span[@class="addmd"]', doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext()) { - - var author = doc.evaluate('//span[@class="addmd"]', doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext().textContent; - newItem.creators.push(Zotero.Utilities.cleanAuthor(author, "inventor")); + //Grab the patent_bibdata items and the text node directly next to them + var xPathHeadings = doc.evaluate('//div[@class="patent_bibdata"]//b', doc, nsResolver, XPathResult.ANY_TYPE, null); + var xPathContents = doc.evaluate('//div[@class="patent_bibdata"]//b/following::text()[1]', doc, nsResolver, XPathResult.ANY_TYPE, null); + + // create an associative array of the items and their contents + var heading, content; + while( heading = xPathHeadings.iterateNext(), content = xPathContents.iterateNext()){ + if(heading.textContent == 'Publication number'){ + content = doc.evaluate('//div[@class="patent_bibdata"]//b[text()="Publication number"]/following::nobr[1]', doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext(); } - - } else { - - var xPathHeadings = doc.evaluate('//div[@class="patent_bibdata"]/p/b', doc, nsResolver, XPathResult.ANY_TYPE, null); - - var xPathCount = doc.evaluate('count (//div[@class="patent_bibdata"]/p/b)', doc, nsResolver, XPathResult.ANY_TYPE, null); - - - var xPathContents = doc.evaluate('//div[@class="patent_bibdata"]/p', doc, nsResolver, XPathResult.ANY_TYPE, null); - var contentsCount = doc.evaluate('count (//div[@class="patent_bibdata"]/p)', doc, nsResolver, XPathResult.ANY_TYPE, null); - - var contents; - for (i = 0; i < contentsCount.numberValue; i++) { - contents = (contents + xPathContents.iterateNext().textContent + " "); - } - - if (doc.evaluate('//td[3]/p', doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext()) { - newItem.abstractNote = (doc.evaluate('//td[3]/p', doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext().textContent.replace("Abstract", '')); - } - + dataTags[heading.textContent] = content.textContent.replace(": ", '');; + //Zotero.debug(dataTags); } + if (doc.evaluate('//td[3]/p', doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext()) { + newItem.abstractNote = (doc.evaluate('//td[3]/p', doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext().textContent.replace("Abstract", '')); + } + + + /* for (var i =0; i < xPathCount.numberValue; i++) { headings.push(xPathHeadings.iterateNext().textContent); @@ -83,23 +71,28 @@ function scrape(doc, url) { var splitContent = new Array(); splitContent = contents.split(/xxx/); - + */ //associate headings with contents. - for (var i = 0; i < headings.length; i++) { - fieldTitle = headings[i].replace(/\s+|\W*/g, ''); - - if (fieldTitle == "USClassification" | fieldTitle == "InternationalClassification" | fieldTitle == "Abstract") { + +//extra field\ + newItem.extra = ''; + + for (fieldTitle in dataTags) { + Zotero.debug(fieldTitle); + //fieldTitle = item.replace(/\s+|\W*/g, ''); + /* + if (fieldTitle == "US Classification" | fieldTitle == "International Classification" | fieldTitle == "Abstract") { dataTags[fieldTitle] = splitContent[i+1]; } else { dataTags[fieldTitle] = splitContent[i+1].replace(": ", ''); } - + */ if (dataTags[fieldTitle].match("About this patent")) { dataTags[fieldTitle] = dataTags[fieldTitle].replace("About this patent", ''); } //author(s) - if (fieldTitle == "Inventors") { + if (fieldTitle == "Inventors" | fieldTitle == "Inventor") { var authors = dataTags[fieldTitle].split(", "); for (var j = 0; j < authors.length; j++) { newItem.creators.push(Zotero.Utilities.cleanAuthor(authors[j], "inventor")); @@ -107,26 +100,28 @@ function scrape(doc, url) { } else if (fieldTitle == "Inventor") { newItem.creators.push(Zotero.Utilities.cleanAuthor(dataTags["Inventor"], "inventor")); } + + if (fieldTitle == "U.S. Classification" ) { + newItem.extra += "U.S. Classification: " + dataTags["U.S. Classification"]+"\n"; + } else if (fieldTitle == "International Classification" ) { + newItem.extra += "International Classification: " + dataTags["International Classification"]+"\n"; + } else if (fieldTitle == "Filing date" ) { + newItem.extra += "Filing Date: " + dataTags["Filing date"]+"\n"; + } else if (fieldTitle == "Publication number" ) { + newItem.extra += "Publication number: " +dataTags["Publication number"]+"\n"; + } } - //extra field - if (dataTags["USClassification"] && dataTags["InternationalClassification"]) { - Zotero.debug(doc.title); - newItem.extra = ("U.S. Classification: " + dataTags["USClassification"] + "; International Classification: " + dataTags["InternationalClassification"]); - } else if (dataTags["USClassification"] ) { - newItem.extra = ("U.S. Classification: " + dataTags["USClassification"]); - } else if (dataTags["InternationalClassification"]) { - newItem.extra = ("International Classification: " + dataTags["InternationalClassification"]); - } + - associateData (newItem, dataTags, "Patentnumber", "patentNumber"); - associateData (newItem, dataTags, "Issuedate", "date"); + associateData (newItem, dataTags, "Patent number", "patentNumber"); + associateData (newItem, dataTags, "Issue date", "date"); associateData (newItem, dataTags, "Assignees", "assignee"); associateData (newItem, dataTags, "Assignee", "assignee"); associateData (newItem, dataTags, "Abstract", "abstractNote"); - associateData (newItem, dataTags, "Applicationnumber", "applicationNumber"); + associateData (newItem, dataTags, "Application number", "applicationNumber"); - newItem.title = doc.evaluate('//h2[@class="title"]', doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext().textContent; + newItem.title = doc.evaluate('//h1[@class="title"]', doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext().textContent; newItem.url = doc.location.href; newItem.complete(); @@ -138,33 +133,27 @@ function doWeb(doc, url) { if (prefix == 'x') return namespace; else return null; } : null; + var host = 'http://' + doc.location.host + "/"; + var articles = new Array(); if (detectWeb(doc, url) == "multiple") { + var iterator = doc.evaluate('//a[@class = "big"]', doc, nsResolver, XPathResult.ANY_TYPE, null); + var links = []; + var element = iterator.iterateNext(); var items = new Object(); - - var xPathFirstTitle = doc.evaluate('//div[@id="results_container"]/a', doc, nsResolver, XPathResult.ANY_TYPE, null); - var firstTitle = xPathFirstTitle.iterateNext(); - - var titles = doc.evaluate('//p/a', doc, nsResolver, XPathResult.ANY_TYPE, null); - - items[firstTitle.href] = firstTitle.textContent; - - var next_title; - while (next_title = titles.iterateNext()) { - if (next_title.textContent.match("RSS feed")) { - - } else { - items[next_title.href] = next_title.textContent; - } + while(element) { + items[element.href] = element.textContent; + element = iterator.iterateNext(); } items = Zotero.selectItems(items); + if(!items) return true; for (var i in items) { articles.push(i); } - } else { - articles = [url]; + } Zotero.Utilities.processDocuments(articles, scrape, function() {Zotero.done();}); Zotero.wait(); + }