diff --git a/src/Tools/offlinedoc/buildwikiindex.py b/src/Tools/offlinedoc/buildwikiindex.py index 581ef0d3e..f7cee86c8 100755 --- a/src/Tools/offlinedoc/buildwikiindex.py +++ b/src/Tools/offlinedoc/buildwikiindex.py @@ -42,6 +42,7 @@ NORETRIEVE = ['Manual','Developer_hub','Power_users_hub','Users_hub','Source_doc GETTRANSLATIONS = False # Set true if you want to get the translations too. MAXFAIL = 3 # max number of retries if download fails VERBOSE = True # to display what's going on. Otherwise, runs totally silent. +WRITETHROUGH = True # if true, fetched files are constantly written to disk, in case of failure. # END CONFIGURATION ############################################## @@ -52,8 +53,24 @@ def crawl(): todolist = [] processed = [] count = 1 - indexpages,imgs = get(INDEX) - todolist.extend(indexpages) + if os.path.exists("wikifiles.txt"): + f = open("wikifiles.txt","r") + if VERBOSE: print "Reading existing list..." + for l in f.readlines(): + if l.strip() != "": + if VERBOSE: print "Adding ",l + processed.append(l.strip()) + f.close() + if os.path.exists("todolist.txt"): + f = open("todolist.txt","r") + if VERBOSE: print "Reading existing todo list..." + for l in f.readlines(): + if l.strip() != "": + todolist.append(l.strip()) + f.close() + else: + indexpages,imgs = get(INDEX) + todolist.extend(indexpages) while todolist: targetpage = todolist.pop() if not targetpage in NORETRIEVE: @@ -66,8 +83,12 @@ def crawl(): for p in pages: if (not (p in todolist)) and (not (p in processed)): todolist.append(p) + if WRITETHROUGH: + writeList(processed) + writeList(todolist,"todolist.txt") if VERBOSE: print "Fetched ", count, " pages" - writeList(processed) + if not WRITETHROUGH: + writeList(processed) return 0 def get(page): @@ -136,12 +157,22 @@ def fetchpage(page): failcount += 1 print 'Error: unable to fetch page ' + page -def writeList(pages): - f = open("wikifiles.txt","wb") +def cleanList(pagelist): + "cleans the list" + npages = [] + for p in pagelist: + if not p in npages: + if not "redlink" in p: + npages.append(p) + return npages + +def writeList(pages,filename="wikifiles.txt"): + pages = cleanList(pages) + f = open(filename,"wb") for p in pages: f.write(p+"\n") f.close() - if VERBOSE: print "written wikifiles.txt" + if VERBOSE: print "written ",filename if __name__ == "__main__": crawl() diff --git a/src/Tools/offlinedoc/downloadwiki.py b/src/Tools/offlinedoc/downloadwiki.py index 758910766..50f88a2eb 100755 --- a/src/Tools/offlinedoc/downloadwiki.py +++ b/src/Tools/offlinedoc/downloadwiki.py @@ -236,6 +236,9 @@ def fetchpage(page): def fetchimage(imagelink): "retrieves given image from the wiki and saves it" + if imagelink[0:5] == "File:": + print "Skipping file page link" + return filename = re.findall('.*/(.*)',imagelink)[0] print "saving",filename if not exists(filename,image=True): @@ -263,7 +266,7 @@ def local(page,image=False): def exists(page,image=False): "checks if given page/image already exists" - path = local(page,image) + path = local(page.replace("/","-"),image) if os.path.exists(path): return True return False