diff --git a/scrapers.sql b/scrapers.sql index 7e62b8fd9..657b8c5c2 100644 --- a/scrapers.sql +++ b/scrapers.sql @@ -3051,143 +3051,7 @@ REPLACE INTO translators VALUES ('5dd22e9a-5124-4942-9b9e-6ee779f1023e', '1.0.0b Zotero.wait(); }'); -REPLACE INTO translators VALUES ('d3b1d34c-f8a1-43bb-9dd6-27aa6403b217', '1.0.0b4.r5', '1.0.0rc3', '2008-03-26 03:00:00', '1', '100', '4', 'YouTube (legacy)', 'Sean Takats and Michael Berkowitz', 'https?://[^/]*youtube\.com\/', -'function detectWeb(doc, url){ - var namespace = doc.documentElement.namespaceURI; - var nsResolver = namespace ? function(prefix) { - if (prefix == ''x'') return namespace; else return null; - } : null; - - var xpath = ''//input[@type="hidden" and @name="video_id"]''; - if(doc.evaluate(xpath, doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext()) { - return "videoRecording"; - } - if (doc.evaluate(''//div[@class="vtitle"]/a[@class="vtitlelink" and contains(@href, "/watch?v=")]'', doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext()){ - return "multiple"; - } - if (doc.evaluate(''//div[starts-with(@class, "vtitle")]/a[contains(@href, "/watch?v=")]'', doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext()){ - return "multiple"; - } - if (doc.evaluate(''//div[@class="vltitle"]/div[@class="vlshortTitle"]/a[contains(@href, "/watch?v=")]'', doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext()){ - return "multiple"; - } -} - -', -'function doWeb(doc, url){ - var namespace = doc.documentElement.namespaceURI; - var nsResolver = namespace ? function(prefix) { - if (prefix == ''x'') return namespace; else return null; - } : null; - var host = doc.location.host; - var video_ids = new Array(); - var xpath = ''//input[@type="hidden" and @name="video_id"]''; - var elmts; - var elmt; - elmts = doc.evaluate(xpath, doc, nsResolver, XPathResult.ANY_TYPE, null); - elmt = elmts.iterateNext(); - if(elmt) { - //single video - var video_id = elmt.value; - video_ids.push(video_id); - } else { - // multiple videos - var items = new Object(); - var videoRe = /\/watch\?v=([a-zA-Z0-9-_]+)/; -// search results - if (elmt = doc.evaluate(''//div[@class="vtitle"]/a[@class="vtitlelink" and contains(@href, "/watch?v=")]'', doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext()){ - elmts = doc.evaluate(''//div[@class="vtitle"]/a[@class="vtitlelink" and contains(@href, "/watch?v=")]'', doc, nsResolver, XPathResult.ANY_TYPE, null); -// categories and community pages and user pages and browse pages - } else if (doc.evaluate(''//div[starts-with(@class, "vtitle")]/a[contains(@href, "/watch?v=")]'', doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext()){ - elmts = doc.evaluate(''//div[starts-with(@class, "vtitle")]/a[contains(@href, "/watch?v=")]'', doc, nsResolver, XPathResult.ANY_TYPE, null); - } else if (doc.evaluate(''//div[@class="vltitle"]/div[@class="vlshortTitle"]/a[contains(@href, "/watch?v=")]'', doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext()){ - elmts = doc.evaluate(''//div[@class="vltitle"]/div[@class="vlshortTitle"]/a[contains(@href, "/watch?v=")]'', doc, nsResolver, XPathResult.ANY_TYPE, null); - } - while (elmt = elmts.iterateNext()){ - var title = elmt.textContent; - title = Zotero.Utilities.cleanString(title); - var link = elmt.href; - var m = videoRe(link); - var video_id = m[1]; - items[video_id] = title; - } - items = Zotero.selectItems(items); - if(!items) return true; - for(var i in items) { - video_ids.push(i); - } - } - getData(video_ids, host); -} - -function getData(ids, host){ - var uris = new Array(); - var url = "http://gdata.youtube.com/feeds/videos/"; - for each(var id in ids){ - uris.push(url+id); - } - Zotero.Utilities.HTTP.doGet(uris, function(text) { - // clean up header - text = text.replace(/<\?xml[^>]*\?>/, ""); - text = text.replace(/]*>/, ""); - // replace colons in XML tags - text = text.replace(/]+>/g, ""); - // pad xml - text = ""+text+""; - var xml = new XML(text); - var newItem = new Zotero.Item("videoRecording"); - var title = ""; - var title = xml..media_title[0].text().toString(); - if (xml..media_title.length()){ - var title = Zotero.Utilities.cleanString(xml..media_title[0].text().toString()); - if (title == ""){ - title = " "; - } - newItem.title = title; - } - if (xml..media_keywords.length()){ - var keywords = xml..media_keywords[0].text().toString(); - keywords = keywords.split(","); - for each(var tag in keywords){ - newItem.tags.push(Zotero.Utilities.cleanString(tag)); - } - } - if (xml..published.length()){ - var date = xml..published[0].text().toString(); - newItem.date = date.substr(0, 10); - } - if (xml..author.name.length()){ - var author = xml..author.name[0].text().toString(); - newItem.creators.push(Zotero.Utilities.cleanAuthor(author, "contributor", true)); - } - if (xml..media_player.length()){ - var url = xml..media_player[0].@url.toString(); - newItem.url = url; - newItem.attachments.push({title:"YouTube Link", snapshot:false, mimeType:"text/html", url:url}); - } - if (xml..yt_duration.length()){ - var runningTime = xml..yt_duration[0].@seconds.toString(); - newItem.runningTime = runningTime + " seconds"; - } - if (xml..media_description.length()){ - newItem.abstractNote = xml..media_description[0].text().toString(); - } - - var next_url = newItem.url.replace(/\/\/([^/]+)/, "//" + host).replace("watch?v=", "v/") + ''&rel=1''; - Zotero.Utilities.loadDocument(next_url, function(newDoc) { - var new_url = newDoc.location.href.replace("swf/l.swf", "get_video"); - newItem.attachments.push({url:new_url, title:"YouTube Video Recording", mimeType:"video/x-flv"}); - newItem.complete(); - }, function() {Zotero.done;}); - }); - Zotero.wait(); -}'); - -REPLACE INTO translators VALUES ('360da304-5a4c-44ea-b655-28dcb5ebfa25', '1.0.0rc4', '', '2008-03-26 03:00:00', '1', '100', '4', 'YouTube', 'Sean Takats and Michael Berkowitz', 'https?://[^/]*youtube\.com\/', +REPLACE INTO translators VALUES ('d3b1d34c-f8a1-43bb-9dd6-27aa6403b217', '1.0.0rc4', '', '2008-03-30 08:30:00', '1', '100', '4', 'YouTube', 'Sean Takats and Michael Berkowitz', 'https?://[^/]*youtube\.com\/', 'function detectWeb(doc, url){ var namespace = doc.documentElement.namespaceURI; var nsResolver = namespace ? function(prefix) {