diff --git a/scrapers.sql b/scrapers.sql index f304df8d4..fe2949637 100644 --- a/scrapers.sql +++ b/scrapers.sql @@ -866,7 +866,7 @@ REPLACE INTO translators VALUES ('a2363670-7040-4cb9-8c48-6b96584e92ee', '1.0.0b Zotero.wait(); }'); -REPLACE INTO translators VALUES ('da440efe-646c-4a18-9958-abe1f7d55cde', '1.0.0b4r1', '', '2007-09-13 12:00:00', '0', '100', '4', 'NCSU Library (Endeca 2)', 'Sean Takats', '^https?://[^\.]+.lib.ncsu.edu/(?:web2/tramp2\.exe|catalog/\?)', +REPLACE INTO translators VALUES ('da440efe-646c-4a18-9958-abe1f7d55cde', '1.0.0b4.r1', '', '2008-03-25 16:30:00', '0', '100', '4', 'NCSU Library (Endeca 2)', 'Sean Takats', '^https?://[^\.]+.lib.ncsu.edu/(?:web2/tramp2\.exe|catalog/\?)', 'function detectWeb(doc, url) { var namespace = doc.documentElement.namespaceURI; var nsResolver = namespace ? function(prefix) { @@ -2918,7 +2918,143 @@ REPLACE INTO translators VALUES ('5dd22e9a-5124-4942-9b9e-6ee779f1023e', '1.0.0b Zotero.wait(); }'); -REPLACE INTO translators VALUES ('d3b1d34c-f8a1-43bb-9dd6-27aa6403b217', '1.0.0b4.r5', '', '2008-03-25 00:50:00', '1', '100', '4', 'YouTube', 'Sean Takats and Michael Berkowitz', 'https?://[^/]*youtube\.com\/', +REPLACE INTO translators VALUES ('d3b1d34c-f8a1-43bb-9dd6-27aa6403b217', '1.0.0b4.r5', '1.0.0rc3', '2008-03-25 16:30:00', '1', '100', '4', 'YouTube (legacy)', 'Sean Takats and Michael Berkowitz', 'https?://[^/]*youtube\.com\/', +'function detectWeb(doc, url){ + var namespace = doc.documentElement.namespaceURI; + var nsResolver = namespace ? function(prefix) { + if (prefix == ''x'') return namespace; else return null; + } : null; + + var xpath = ''//input[@type="hidden" and @name="video_id"]''; + if(doc.evaluate(xpath, doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext()) { + return "videoRecording"; + } + if (doc.evaluate(''//div[@class="vtitle"]/a[@class="vtitlelink" and contains(@href, "/watch?v=")]'', doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext()){ + return "multiple"; + } + if (doc.evaluate(''//div[starts-with(@class, "vtitle")]/a[contains(@href, "/watch?v=")]'', doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext()){ + return "multiple"; + } + if (doc.evaluate(''//div[@class="vltitle"]/div[@class="vlshortTitle"]/a[contains(@href, "/watch?v=")]'', doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext()){ + return "multiple"; + } +} + +', +'function doWeb(doc, url){ + var namespace = doc.documentElement.namespaceURI; + var nsResolver = namespace ? function(prefix) { + if (prefix == ''x'') return namespace; else return null; + } : null; + var host = doc.location.host; + var video_ids = new Array(); + var xpath = ''//input[@type="hidden" and @name="video_id"]''; + var elmts; + var elmt; + elmts = doc.evaluate(xpath, doc, nsResolver, XPathResult.ANY_TYPE, null); + elmt = elmts.iterateNext(); + if(elmt) { + //single video + var video_id = elmt.value; + video_ids.push(video_id); + } else { + // multiple videos + var items = new Object(); + var videoRe = /\/watch\?v=([a-zA-Z0-9-_]+)/; +// search results + if (elmt = doc.evaluate(''//div[@class="vtitle"]/a[@class="vtitlelink" and contains(@href, "/watch?v=")]'', doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext()){ + elmts = doc.evaluate(''//div[@class="vtitle"]/a[@class="vtitlelink" and contains(@href, "/watch?v=")]'', doc, nsResolver, XPathResult.ANY_TYPE, null); +// categories and community pages and user pages and browse pages + } else if (doc.evaluate(''//div[starts-with(@class, "vtitle")]/a[contains(@href, "/watch?v=")]'', doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext()){ + elmts = doc.evaluate(''//div[starts-with(@class, "vtitle")]/a[contains(@href, "/watch?v=")]'', doc, nsResolver, XPathResult.ANY_TYPE, null); + } else if (doc.evaluate(''//div[@class="vltitle"]/div[@class="vlshortTitle"]/a[contains(@href, "/watch?v=")]'', doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext()){ + elmts = doc.evaluate(''//div[@class="vltitle"]/div[@class="vlshortTitle"]/a[contains(@href, "/watch?v=")]'', doc, nsResolver, XPathResult.ANY_TYPE, null); + } + while (elmt = elmts.iterateNext()){ + var title = elmt.textContent; + title = Zotero.Utilities.cleanString(title); + var link = elmt.href; + var m = videoRe(link); + var video_id = m[1]; + items[video_id] = title; + } + items = Zotero.selectItems(items); + if(!items) return true; + for(var i in items) { + video_ids.push(i); + } + } + getData(video_ids, host); +} + +function getData(ids, host){ + var uris = new Array(); + var url = "http://gdata.youtube.com/feeds/videos/"; + for each(var id in ids){ + uris.push(url+id); + } + Zotero.Utilities.HTTP.doGet(uris, function(text) { + // clean up header + text = text.replace(/<\?xml[^>]*\?>/, ""); + text = text.replace(/]*>/, ""); + // replace colons in XML tags + text = text.replace(/]+>/g, ""); + // pad xml + text = ""+text+""; + var xml = new XML(text); + var newItem = new Zotero.Item("videoRecording"); + var title = ""; + var title = xml..media_title[0].text().toString(); + if (xml..media_title.length()){ + var title = Zotero.Utilities.cleanString(xml..media_title[0].text().toString()); + if (title == ""){ + title = " "; + } + newItem.title = title; + } + if (xml..media_keywords.length()){ + var keywords = xml..media_keywords[0].text().toString(); + keywords = keywords.split(","); + for each(var tag in keywords){ + newItem.tags.push(Zotero.Utilities.cleanString(tag)); + } + } + if (xml..published.length()){ + var date = xml..published[0].text().toString(); + newItem.date = date.substr(0, 10); + } + if (xml..author.name.length()){ + var author = xml..author.name[0].text().toString(); + newItem.creators.push(Zotero.Utilities.cleanAuthor(author, "contributor", true)); + } + if (xml..media_player.length()){ + var url = xml..media_player[0].@url.toString(); + newItem.url = url; + newItem.attachments.push({title:"YouTube Link", snapshot:false, mimeType:"text/html", url:url}); + } + if (xml..yt_duration.length()){ + var runningTime = xml..yt_duration[0].@seconds.toString(); + newItem.runningTime = runningTime + " seconds"; + } + if (xml..media_description.length()){ + newItem.abstractNote = xml..media_description[0].text().toString(); + } + + var next_url = newItem.url.replace(/\/\/([^/]+)/, "//" + host).replace("watch?v=", "v/") + ''&rel=1''; + Zotero.Utilities.loadDocument(next_url, function(newDoc) { + var new_url = newDoc.location.href.replace("swf/l.swf", "get_video"); + newItem.attachments.push({url:new_url, title:"YouTube Video Recording", mimeType:"video/x-flv"}); + newItem.complete(); + }, function() {Zotero.done;}); + }); + Zotero.wait(); +}'); + +REPLACE INTO translators VALUES ('360da304-5a4c-44ea-b655-28dcb5ebfa25', '1.0.0rc4', '', '2008-03-25 16:30:00', '1', '100', '4', 'YouTube', 'Sean Takats and Michael Berkowitz', 'https?://[^/]*youtube\.com\/', 'function detectWeb(doc, url){ var namespace = doc.documentElement.namespaceURI; var nsResolver = namespace ? function(prefix) {