Compare commits

...

15 Commits

Author SHA1 Message Date
Suzanne Soy
119b100a16 re-enabled update.py, needs debuging 2021-01-31 20:34:11 +00:00
Suzanne Soy
c9d627351e Do not ignore error from Qt tools 2021-01-31 20:30:30 +00:00
Suzanne Soy
6a88c9fe78 fixup! Nightly builds 2021-01-31 20:26:53 +00:00
Suzanne Soy
15e98ff2cf Do not ignore error from calling Inkscape 2021-01-31 20:24:40 +00:00
Suzanne Soy
57521f129a Use os.path.join 2021-01-31 20:21:38 +00:00
Suzanne Soy
a762491d78 inkscape dependency for SVG to PDF 2021-01-31 20:21:06 +00:00
Suzanne Soy
e5f5213d65 Do not loose exit code of crawl() when an error occurs 2021-01-31 20:15:33 +00:00
Suzanne Soy
77d08bb399 Do not ask about copying files in the source directory in the github action; Added --non-interactive 2021-01-31 20:14:26 +00:00
Suzanne Soy
d0803db4fe fixup! Fix qhelpgenerator: could not find a Qt installation of '' 2021-01-31 20:05:49 +00:00
Suzanne Soy
3093572100 fixup! Don't store compiled .pyc files in version control 2021-01-31 20:01:49 +00:00
Suzanne Soy
d1a7903efb Don't store compiled .pyc files in version control 2021-01-31 20:01:49 +00:00
Suzanne Soy
6a37ab405b Fix qhelpgenerator: could not find a Qt installation of '' 2021-01-31 20:01:24 +00:00
Suzanne Soy
ff957af676 Nightly builds 2021-01-31 19:53:13 +00:00
Yorik van Havre
8015a63b27
Merge pull request #1 from cclauss/modernize-Python-2-codes
Use print() function in both Python 2 and Python 3
2019-06-10 15:24:34 -03:00
cclauss
244f63da64 Use print() function in both Python 2 and Python 3
Legacy __print__ statements are syntax errors in Python 3 but __print()__ function works as expected in both Python 2 and Python 3.
2019-01-09 18:00:43 +01:00
7 changed files with 218 additions and 86 deletions

108
.github/workflows/main.yml vendored Normal file
View File

@ -0,0 +1,108 @@
name: Build-Documentation
on:
# Trigger the workflow every night
schedule:
- cron: '0 1 * * *'
# Trigger on push to master and for pull requests targetting master
push:
branches: [ master ]
pull_request:
branches: [ master ]
# Trigger manually from the Actions tab
workflow_dispatch:
jobs:
build-documentation:
# The type of runner that the job will run on
runs-on: ubuntu-latest
steps:
- name: Check-out the repository under $GITHUB_WORKSPACE
uses: actions/checkout@v2
with:
fetch-depth: 0
- name: Install prerequisites from the README
run: sudo apt install python qttools5-dev-tools wkhtmltopdf python-pypdf2 inkscape
- name: Print versions
run: |
python2 --version || true
wkhtmltopdf --version --version || true
qhelpgenerator --qt=qt4 --version || true
# GitHub Actions currently uses 18.04 as of 02021-01-31, but will likely update in the future
- name: Try to install prerequisites for Ubuntu 20.04 from the README
run: sudo apt install qhelpgenerator-qt5 || true
- name: Install prerequisites for testing
run: sudo apt install xvfb
# - name: Remove existing downloads (for now, because the update.py script seems to not work well)
# run: |
# rm -rf localwiki revisions.txt updates.txt wikifiles.txt
# mkdir localwiki
# touch revisions.txt updates.txt wikifiles.txt
- name: Build an index file containing a list of all the files to download
run: python2 ./buildwikiindex.py
- name: Download wiki pages
# Redirect log because it seems we exceed the amount of data GitHub is willing
# to show in the interacive logs and there seems to be no UI to access the raw
# logs at this time
#run: python2 ./downloadwiki.py > ./downloadwiki.py.log
run: python2 ./downloadwiki.py
- name: Create a list of revision IDs for each page
run: python2 ./update.py
- name: Get a list of pages that have changed
run: python2 ./update.py
- name: Download the changed pages (and all their dependencies) again
run: python2 ./update.py
- name: Generate freecad.qhc and freecad.qch files
run: python2 ./buildqhelp.py --non-interactive
- name: Generate freecad.pdf
run: python2 ./buildpdf.py
- name: Split the generated freecad.qch into parts that are smaller than 50Mb (github limit)
run: split -d --byte=49M localwiki/freecad.qch localwiki/freecad.qch.part
- name: Reassemble the previously-split freecad.qch
run: cat localwiki/freecad.qch.part* >> test.qch
- name: Check that the reassembled test.qch is identical to localwiki/freecad.qch
run: diff -q test.qch localwiki/freecad.qch
- name: Try to open the .qch file
run: |
xvfb-run assistant -collectionFile localwiki/freecad.qhc &
sleep 5
killall assistant
- name: Upload freecad.qch
uses: actions/upload-artifact@v2
with:
name: freecad.qch
path: localwiki/freecad.qch
- name: Upload freecad.qhc
uses: actions/upload-artifact@v2
with:
name: freecad.qhc
path: localwiki/freecad.qhc
- name: Upload freecad.pdf
uses: actions/upload-artifact@v2
with:
name: freecad.pdf
path: localwiki/freecad.pdf

2
.gitignore vendored Normal file
View File

@ -0,0 +1,2 @@
/*.pyc
/__pycache__

View File

@ -22,6 +22,7 @@
#* *
#***************************************************************************
from __future__ import print_function
__title__="buildpdf"
__author__ = "Yorik van Havre <yorik@uncreated.net>"
__url__ = "http://www.freecadweb.org"
@ -363,19 +364,19 @@ def crawl():
return 1
elif PDFCONVERTOR == 'htmldoc':
if os.system('htmldoc --version'):
print "Error: Htmldoc not found, exiting."
print("Error: Htmldoc not found, exiting.")
return 1
try:
from PyPDF2 import PdfFileReader,PdfFileWriter
except:
print "Error: Python-pypdf2 not installed, exiting."
print("Error: Python-pypdf2 not installed, exiting.")
# run ########################################################
buildpdffiles()
joinpdf()
if VERBOSE: print "All done!"
if VERBOSE: print("All done!")
return 0
@ -389,10 +390,10 @@ def buildpdffiles():
for i in templist:
if i[-5:] == '.html':
fileslist.append(i)
print "converting ",len(fileslist)," pages"
print("converting ",len(fileslist)," pages")
i = 1
for f in fileslist:
print i," : ",f
print(i," : ",f)
if PDFCONVERTOR == 'pisa':
createpdf_pisa(f[:-5])
elif PDFCONVERTOR == 'wkhtmltopdf':
@ -421,7 +422,7 @@ def createpdf_pisa(pagename):
if (not exists(pagename+".pdf",image=True)) or OVERWRTIE:
infile = open(FOLDER + os.sep + pagename+'.html','ro')
outfile = open(FOLDER + os.sep + pagename+'.pdf','wb')
if VERBOSE: print "Converting " + pagename + " to pdf..."
if VERBOSE: print("Converting " + pagename + " to pdf...")
pdf = pisa.CreatePDF(infile,outfile,FOLDER,link_callback=fetch_resources)
outfile.close()
if pdf.err:
@ -441,7 +442,7 @@ def createpdf_firefox(pagename):
if os.path.exists(FIREFOXPDFFOLDER + os.sep + pagename + ".pdf"):
shutil.move(FIREFOXPDFFOLDER+os.sep+pagename+".pdf",outfile)
else:
print "-----------------------------------------> Couldn't find print output!"
print("-----------------------------------------> Couldn't find print output!")
def createpdf_htmldoc(pagename):
@ -458,20 +459,20 @@ def createpdf_wkhtmltopdf(pagename):
infile = FOLDER + os.sep + pagename+'.html'
outfile = FOLDER + os.sep + pagename+'.pdf'
cmd = 'wkhtmltopdf -L 5mm --user-style-sheet '+FOLDER+os.sep+'wkhtmltopdf.css '+infile+' '+outfile
print cmd
print(cmd)
#return os.system(cmd)
else:
print "skipping"
print("skipping")
def joinpdf():
"creates one pdf file from several others, following order from the cover"
from PyPDF2 import PdfFileReader,PdfFileWriter
if VERBOSE: print "Building table of contents..."
if VERBOSE: print("Building table of contents...")
result = PdfFileWriter()
createCover()
inputfile = PdfFileReader(open(FOLDER+os.sep+'Cover.pdf','rb'))
inputfile = PdfFileReader(open(os.path.join(FOLDER,'Cover.pdf'),'rb'))
result.addPage(inputfile.getPage(0))
count = 1
@ -488,7 +489,7 @@ def joinpdf():
if page == "end":
parent = False
continue
if VERBOSE: print 'Appending',page, "at position",count
if VERBOSE: print('Appending',page, "at position",count)
title = page.replace("_"," ")
pdffile = page + ".pdf"
if exists(pdffile,True):
@ -504,16 +505,16 @@ def joinpdf():
result.addBookmark(title,count,parent)
count += numpages
else:
print "page",pdffile,"not found, aborting."
print("page",pdffile,"not found, aborting.")
sys.exit()
if VERBOSE: print "Writing..."
if VERBOSE: print("Writing...")
outputfile = open(FOLDER+os.sep+"freecad.pdf",'wb')
result.write(outputfile)
outputfile.close()
if VERBOSE:
print ' '
print 'Successfully created '+FOLDER+os.sep+'freecad.pdf'
print(' ')
print('Successfully created '+FOLDER+os.sep+'freecad.pdf')
def local(page,image=False):
@ -544,13 +545,16 @@ def makeStyleSheet():
def createCover():
"downloads and creates a cover page"
if VERBOSE: print "fetching " + COVER
if VERBOSE: print("fetching " + COVER)
data = (urlopen(COVER).read())
path = FOLDER + os.sep + "Cover.svg"
path = os.path.join(FOLDER, "Cover.svg")
fil = open(path,'wb')
fil.write(data)
fil.close()
os.system('inkscape --export-pdf='+FOLDER+os.sep+'Cover.pdf'+' '+FOLDER+os.sep+'Cover.svg')
if os.system('inkscape --export-pdf='+os.path.join(FOLDER,'Cover.pdf')+' '+os.path.join(FOLDER,'Cover.svg')) == 0:
return
else:
raise Exception('Conversion of Cover.svg to Cover.pdf failed. Is Inkscape installed?')
if __name__ == "__main__":

View File

@ -22,6 +22,7 @@
#* *
#***************************************************************************
from __future__ import print_function
__title__="wiki2qhelp"
__author__ = "Yorik van Havre <yorik@uncreated.net>"
__url__ = "http://www.freecadweb.org"
@ -38,8 +39,8 @@ from urllib2 import urlopen, HTTPError
FOLDER = "./localwiki"
INDEX = "Online_Help_Toc" # the start page from where to crawl the wiki
VERBOSE = True # to display what's going on. Otherwise, runs totally silent.
QHELPCOMPILER = 'qhelpgenerator'
QCOLLECTIOMGENERATOR = 'qcollectiongenerator'
QHELPCOMPILER = 'qhelpgenerator -qt=qt5'
QCOLLECTIOMGENERATOR = 'qcollectiongenerator -qt=qt5'
RELEASE = '0.17'
# END CONFIGURATION ##############################################
@ -52,10 +53,10 @@ def crawl():
# tests ###############################################
if os.system(QHELPCOMPILER +' -v'):
print "Error: QAssistant not fully installed, exiting."
print("Error: QAssistant not fully installed, exiting.")
return 1
if os.system(QCOLLECTIOMGENERATOR +' -v'):
print "Error: QAssistant not fully installed, exiting."
print("Error: QAssistant not fully installed, exiting.")
return 1
# run ########################################################
@ -64,23 +65,34 @@ def crawl():
qhcp = createCollProjectFile()
shutil.copy("freecad-icon-64.png","localwiki/freecad-icon-64.png")
if generate(qhcp) or compile(qhp):
print "Error at compiling"
print("Error at compiling")
return 1
if VERBOSE: print "All done!"
i=raw_input("Copy the files to their correct location in the source tree? y/n (default=no) ")
if VERBOSE: print("All done!")
if "--yes-copy" in sys.argv:
i="yes"
elif "--no-copy" in sys.argv:
i="no"
else:
try:
i=raw_input("Copy the files to their correct location in the source tree? y/n (default=no) ")
except:
i="no"
if i.upper() in ["Y","YES"]:
shutil.copy("localwiki/freecad.qch","../../Doc/freecad.qch")
shutil.copy("localwiki/freecad.qhc","../../Doc/freecad.qhc")
else:
print 'Files are in localwiki. Test with "assistant -collectionFile localwiki/freecad.qhc"'
print('Files are in localwiki. Test with "assistant -collectionFile localwiki/freecad.qhc"')
return 0
def compile(qhpfile):
"compiles the whole html doc with qassistant"
qchfile = FOLDER + os.sep + "freecad.qch"
if not os.system(QHELPCOMPILER + ' '+qhpfile+' -o '+qchfile):
if VERBOSE: print "Successfully created",qchfile
if VERBOSE: print("Successfully created",qchfile)
return 0
else:
os.system('cat -v ' + qhpfile)
raise "Error during generation of freecad.qch"
def generate(qhcpfile):
"generates qassistant-specific settings like icon, title, ..."
@ -93,8 +105,11 @@ def generate(qhcpfile):
about.close()
qhcfile = FOLDER + os.sep + "freecad.qhc"
if not os.system(QCOLLECTIOMGENERATOR+' '+qhcpfile+' -o '+qhcfile):
if VERBOSE: print "Successfully created ",qhcfile
if VERBOSE: print("Successfully created ",qhcfile)
return 0
else:
os.system('cat -v ' + qhcpfile)
raise "Error during generation of freecad.qhc"
def createCollProjectFile():
qprojectfile = '''<?xml version="1.0" encoding="UTF-8"?>
@ -131,12 +146,12 @@ def createCollProjectFile():
</docFiles>
</QHelpCollectionProject>
'''
if VERBOSE: print "Building project file..."
if VERBOSE: print("Building project file...")
qfilename = FOLDER + os.sep + "freecad.qhcp"
f = open(qfilename,'w')
f.write(qprojectfile)
f.close()
if VERBOSE: print "Done writing qhcp file",qfilename
if VERBOSE: print("Done writing qhcp file",qfilename)
return qfilename
def buildtoc():
@ -182,7 +197,7 @@ def buildtoc():
if not link: link = 'default.html'
return title,link
if VERBOSE: print "Building table of contents..."
if VERBOSE: print("Building table of contents...")
f = open(FOLDER+os.sep+INDEX+'.html')
html = ''
for line in f: html += line
@ -229,9 +244,9 @@ def buildtoc():
f = open(qfilename,'wb')
f.write(qhelpfile)
f.close()
if VERBOSE: print "Done writing qhp file",qfilename
if VERBOSE: print("Done writing qhp file",qfilename)
return qfilename
if __name__ == "__main__":
crawl()
exit(crawl())

View File

@ -22,6 +22,7 @@
#* *
#***************************************************************************
from __future__ import print_function
__title__="buildwikiindex.py"
__author__ = "Yorik van Havre <yorik@uncreated.net>"
__url__ = "http://www.freecadweb.org"
@ -60,15 +61,15 @@ def crawl(pagename=[]):
else:
if os.path.exists("wikifiles.txt"):
f = open("wikifiles.txt","r")
if VERBOSE: print "Reading existing list..."
if VERBOSE: print("Reading existing list...")
for l in f.readlines():
if l.strip() != "":
if VERBOSE: print "Adding ",l
if VERBOSE: print("Adding ",l)
processed.append(l.strip())
f.close()
if os.path.exists("todolist.txt"):
f = open("todolist.txt","r")
if VERBOSE: print "Reading existing todo list..."
if VERBOSE: print("Reading existing todo list...")
for l in f.readlines():
if l.strip() != "":
todolist.append(l.strip())
@ -79,19 +80,19 @@ def crawl(pagename=[]):
while todolist:
targetpage = todolist.pop()
if (not targetpage in NORETRIEVE):
if VERBOSE: print count, ": Scanning ", targetpage
if VERBOSE: print(count, ": Scanning ", targetpage)
pages,images = get(targetpage)
count += 1
processed.append(targetpage)
processed.extend(images)
if VERBOSE: print "got",len(pages),"links"
if VERBOSE: print("got",len(pages),"links")
for p in pages:
if (not (p in todolist)) and (not (p in processed)):
todolist.append(p)
if WRITETHROUGH:
writeList(processed)
writeList(todolist,"todolist.txt")
if VERBOSE: print "Fetched ", count, " pages"
if VERBOSE: print("Fetched ", count, " pages")
if not WRITETHROUGH:
writeList(processed)
if pagename:
@ -156,7 +157,7 @@ def getlinks(html):
NORETRIEVE.append(rg)
if not rg in NORETRIEVE:
pages.append(rg)
print "got link: ",rg
print("got link: ",rg)
return pages
def getimagelinks(html):
@ -167,7 +168,7 @@ def getimagelinks(html):
def fetchpage(page):
"retrieves given page from the wiki"
print "fetching: ",page
print("fetching: ",page)
failcount = 0
while failcount < MAXFAIL:
try:
@ -175,7 +176,7 @@ def fetchpage(page):
return html
except HTTPError:
failcount += 1
print 'Error: unable to fetch page ' + page
print('Error: unable to fetch page ' + page)
sys.exit()
def cleanList(pagelist):
@ -193,7 +194,7 @@ def writeList(pages,filename="wikifiles.txt"):
for p in pages:
f.write(p+"\n")
f.close()
if VERBOSE: print "written ",filename
if VERBOSE: print("written ",filename)
if __name__ == "__main__":
crawl(sys.argv[1:])

View File

@ -22,6 +22,7 @@
#* *
#***************************************************************************
from __future__ import print_function
__title__="downloadwiki"
__author__ = "Yorik van Havre <yorik@uncreated.net>"
__url__ = "http://www.freecadweb.org"
@ -137,7 +138,7 @@ def crawl():
"downloads an entire wiki site"
global processed
processed = []
if VERBOSE: print "crawling ", URL, ", saving in ", FOLDER
if VERBOSE: print("crawling ", URL, ", saving in ", FOLDER)
if not os.path.isdir(FOLDER): os.mkdir(FOLDER)
file = open(FOLDER + os.sep + "wiki.css",'wb')
file.write(css)
@ -151,16 +152,16 @@ def crawl():
for l in lfile: locallist.append(l.replace("\n",""))
lfile.close()
todolist = locallist[:]
print "getting",len(todolist),"files..."
print("getting",len(todolist),"files...")
count = 1
indexpages = get(INDEX)
while todolist:
targetpage = todolist.pop()
if VERBOSE: print count, ": Fetching ", targetpage
if VERBOSE: print(count, ": Fetching ", targetpage)
get(targetpage)
count += 1
if VERBOSE: print "Fetched ", count, " pages"
if VERBOSE: print "All done!"
if VERBOSE: print("Fetched ", count, " pages")
if VERBOSE: print("All done!")
return 0
def get(page):
@ -180,7 +181,7 @@ def get(page):
html = cleanimagelinks(html)
output(html,page)
else:
if VERBOSE: print " skipping",page
if VERBOSE: print(" skipping",page)
def getlinks(html):
"returns a list of wikipage links in html file"
@ -268,7 +269,7 @@ def cleanimagelinks(html,links=None):
def fetchpage(page):
"retrieves given page from the wiki"
print " fetching: ",page
print(" fetching: ",page)
failcount = 0
while failcount < MAXFAIL:
try:
@ -276,19 +277,19 @@ def fetchpage(page):
return html
except HTTPError:
failcount += 1
print 'Error: unable to fetch page ' + page
print('Error: unable to fetch page ' + page)
def fetchimage(imagelink):
"retrieves given image from the wiki and saves it"
if imagelink[0:5] == "File:":
print "Skipping file page link"
print("Skipping file page link")
return
filename = re.findall('.*/(.*)',imagelink)[0]
if not exists(filename,image=True):
failcount = 0
while failcount < MAXFAIL:
try:
if VERBOSE: print " fetching " + filename
if VERBOSE: print(" fetching " + filename)
data = (urlopen(URL + imagelink).read())
path = local(filename,image=True)
file = open(path,'wb')
@ -298,11 +299,11 @@ def fetchimage(imagelink):
failcount += 1
else:
processed.append(filename)
if VERBOSE: print " saving",local(filename,image=True)
if VERBOSE: print(" saving",local(filename,image=True))
return
print 'Error: unable to fetch file ' + filename
print('Error: unable to fetch file ' + filename)
else:
if VERBOSE: print " skipping",filename
if VERBOSE: print(" skipping",filename)
def local(page,image=False):
"returns a local path for a given page/image"
@ -337,7 +338,7 @@ def output(html,page):
filename = filename.replace("&pagefrom=","+")
filename = filename.replace("#mw-pages","")
filename = filename.replace(".html.html",".html")
print " saving",filename
print(" saving",filename)
file = open(filename,'wb')
file.write(html)
file.close()

View File

@ -22,6 +22,7 @@
#* *
#***************************************************************************
from __future__ import print_function
__title__="update.py"
__author__ = "Yorik van Havre <yorik@uncreated.net>"
__url__ = "http://www.freecadweb.org"
@ -58,70 +59,70 @@ def update(pagename=None):
if not os.path.exists("revisions.txt"): # case 1)
if not os.path.exists("wikifiles.txt"):
print "No wikifiles.txt found. Aborting"
print("No wikifiles.txt found. Aborting")
sys.exit()
pages = []
f = open("wikifiles.txt","r")
if VERBOSE: print "Reading existing list..."
if VERBOSE: print("Reading existing list...")
for l in f.readlines():
if l.strip() != "":
if not "/wiki/" in l:
if VERBOSE: print "Adding ",l.strip()
if VERBOSE: print("Adding ",l.strip())
pages.append(l.strip())
f.close()
if VERBOSE: print "Added ",str(len(pages))," entries"
if VERBOSE: print("Added ",str(len(pages))," entries")
i = 1
revs = []
for page in pages:
rev = getRevision(page)
if VERBOSE: print str(i)," revision: ",rev
if VERBOSE: print(str(i)," revision: ",rev)
revs.append(page+":"+rev)
i += 1
writeList(revs,"revisions.txt")
print "All done. Successfully written revisions.txt with ",len(revs)," entries."
print("All done. Successfully written revisions.txt with ",len(revs)," entries.")
elif os.path.exists("revisions.txt") and (not os.path.exists("updates.txt")): # case 2)
f = open("revisions.txt","r")
if VERBOSE: print "Reading revisions list..."
if VERBOSE: print("Reading revisions list...")
revisions = {}
for l in f.readlines():
if l.strip() != "":
r = l.strip().split(":")
p = ":".join(r[:-1])
if VERBOSE: print "Adding ",p
if VERBOSE: print("Adding ",p)
revisions[p] = r[1]
f.close()
if VERBOSE: print "Added ",str(len(revisions.keys()))," entries"
if VERBOSE: print("Added ",str(len(revisions.keys()))," entries")
updates = []
i = 1
for page in revisions.keys():
rev = getRevision(page)
if rev != revisions[page]:
if VERBOSE: print str(i),page," has a new revision: ",rev
if VERBOSE: print(str(i),page," has a new revision: ",rev)
updates.append(page)
else:
if VERBOSE: print str(i),page," is up to date "
if VERBOSE: print(str(i),page," is up to date ")
i += 1
if updates:
writeList(updates,"updates.txt")
print "All done. Successfully written updates.txt with ",len(updates)," entries."
print("All done. Successfully written updates.txt with ",len(updates)," entries.")
else:
print "Everything up to date. Nothing to be done."
print("Everything up to date. Nothing to be done.")
elif os.path.exists("revisions.txt") and os.path.exists("updates.txt"): # case 3)
if not os.path.exists("wikifiles.txt"):
print "No wikifiles.txt found. Aborting"
print("No wikifiles.txt found. Aborting")
sys.exit()
wikifiles = []
f = open("wikifiles.txt","r")
if VERBOSE: print "Reading wikifiles list..."
if VERBOSE: print("Reading wikifiles list...")
for l in f.readlines():
if l.strip() != "":
wikifiles.append(l.strip())
f.close()
if VERBOSE: print "Read ",str(len(wikifiles))," entries"
if VERBOSE: print("Read ",str(len(wikifiles))," entries")
f = open("revisions.txt","r")
if VERBOSE: print "Reading revisions list..."
if VERBOSE: print("Reading revisions list...")
revisions = {}
for l in f.readlines():
if l.strip() != "":
@ -131,25 +132,25 @@ def update(pagename=None):
f.close()
todo = []
f = open("updates.txt","r")
if VERBOSE: print "Reading updates list..."
if VERBOSE: print("Reading updates list...")
for l in f.readlines():
if l.strip() != "":
todo.append(l.strip())
f.close()
if VERBOSE: print str(len(todo))," pages to scan..."
if VERBOSE: print(str(len(todo))," pages to scan...")
import buildwikiindex
buildwikiindex.WRITETHROUGH = False
buildwikiindex.VERBOSE = VERBOSE
updates = []
for t in todo:
if VERBOSE: print "Scanning ",t
if VERBOSE: print("Scanning ",t)
updates.extend(buildwikiindex.crawl(t))
updates = [u for u in updates if not u in wikifiles]
if VERBOSE: print str(len(updates))," files to download..."
if VERBOSE: print(str(len(updates))," files to download...")
import downloadwiki
i = 1
for u in updates:
if VERBOSE: print i, ": Fetching ", u
if VERBOSE: print(i, ": Fetching ", u)
downloadwiki.get(u)
if not "/wiki/" in u:
rev = getRevision(u)
@ -157,26 +158,26 @@ def update(pagename=None):
if not u in wikifiles:
wikifiles.append(u)
i += 1
if VERBOSE: print "Updating wikifiles and revisions..."
if VERBOSE: print("Updating wikifiles and revisions...")
writeList(wikifiles,"wikifiles.txt")
updatedrevs = []
for k in revisions.keys():
updatedrevs.append(k+":"+revisions[k])
writeList(updatedrevs,"revisions.txt")
os.remove("updates.txt")
if VERBOSE: print "All done!"
if VERBOSE: print("All done!")
def getRevision(page):
html = fetchPage(page)
revs = re.findall("wgCurRevisionId\"\:(.*?),",html)
if len(revs) == 1:
return revs[0]
print 'Error: unable to get revision ID of ' + page
print('Error: unable to get revision ID of ' + page)
sys.exit()
def fetchPage(page):
"retrieves given page from the wiki"
print "fetching: ",page
print("fetching: ",page)
failcount = 0
while failcount < MAXFAIL:
try:
@ -184,7 +185,7 @@ def fetchPage(page):
return html
except HTTPError:
failcount += 1
print 'Error: unable to fetch page ' + page
print('Error: unable to fetch page ' + page)
sys.exit()
def writeList(pages,filename):
@ -192,7 +193,7 @@ def writeList(pages,filename):
for p in pages:
f.write(p+"\n")
f.close()
if VERBOSE: print "written ",filename
if VERBOSE: print("written ",filename)
if __name__ == "__main__":
update(sys.argv[1:])