Compare commits

...

15 Commits

Author SHA1 Message Date
Suzanne Soy
119b100a16 re-enabled update.py, needs debuging 2021-01-31 20:34:11 +00:00
Suzanne Soy
c9d627351e Do not ignore error from Qt tools 2021-01-31 20:30:30 +00:00
Suzanne Soy
6a88c9fe78 fixup! Nightly builds 2021-01-31 20:26:53 +00:00
Suzanne Soy
15e98ff2cf Do not ignore error from calling Inkscape 2021-01-31 20:24:40 +00:00
Suzanne Soy
57521f129a Use os.path.join 2021-01-31 20:21:38 +00:00
Suzanne Soy
a762491d78 inkscape dependency for SVG to PDF 2021-01-31 20:21:06 +00:00
Suzanne Soy
e5f5213d65 Do not loose exit code of crawl() when an error occurs 2021-01-31 20:15:33 +00:00
Suzanne Soy
77d08bb399 Do not ask about copying files in the source directory in the github action; Added --non-interactive 2021-01-31 20:14:26 +00:00
Suzanne Soy
d0803db4fe fixup! Fix qhelpgenerator: could not find a Qt installation of '' 2021-01-31 20:05:49 +00:00
Suzanne Soy
3093572100 fixup! Don't store compiled .pyc files in version control 2021-01-31 20:01:49 +00:00
Suzanne Soy
d1a7903efb Don't store compiled .pyc files in version control 2021-01-31 20:01:49 +00:00
Suzanne Soy
6a37ab405b Fix qhelpgenerator: could not find a Qt installation of '' 2021-01-31 20:01:24 +00:00
Suzanne Soy
ff957af676 Nightly builds 2021-01-31 19:53:13 +00:00
Yorik van Havre
8015a63b27
Merge pull request #1 from cclauss/modernize-Python-2-codes
Use print() function in both Python 2 and Python 3
2019-06-10 15:24:34 -03:00
cclauss
244f63da64 Use print() function in both Python 2 and Python 3
Legacy __print__ statements are syntax errors in Python 3 but __print()__ function works as expected in both Python 2 and Python 3.
2019-01-09 18:00:43 +01:00
7 changed files with 218 additions and 86 deletions

108
.github/workflows/main.yml vendored Normal file
View File

@ -0,0 +1,108 @@
name: Build-Documentation
on:
# Trigger the workflow every night
schedule:
- cron: '0 1 * * *'
# Trigger on push to master and for pull requests targetting master
push:
branches: [ master ]
pull_request:
branches: [ master ]
# Trigger manually from the Actions tab
workflow_dispatch:
jobs:
build-documentation:
# The type of runner that the job will run on
runs-on: ubuntu-latest
steps:
- name: Check-out the repository under $GITHUB_WORKSPACE
uses: actions/checkout@v2
with:
fetch-depth: 0
- name: Install prerequisites from the README
run: sudo apt install python qttools5-dev-tools wkhtmltopdf python-pypdf2 inkscape
- name: Print versions
run: |
python2 --version || true
wkhtmltopdf --version --version || true
qhelpgenerator --qt=qt4 --version || true
# GitHub Actions currently uses 18.04 as of 02021-01-31, but will likely update in the future
- name: Try to install prerequisites for Ubuntu 20.04 from the README
run: sudo apt install qhelpgenerator-qt5 || true
- name: Install prerequisites for testing
run: sudo apt install xvfb
# - name: Remove existing downloads (for now, because the update.py script seems to not work well)
# run: |
# rm -rf localwiki revisions.txt updates.txt wikifiles.txt
# mkdir localwiki
# touch revisions.txt updates.txt wikifiles.txt
- name: Build an index file containing a list of all the files to download
run: python2 ./buildwikiindex.py
- name: Download wiki pages
# Redirect log because it seems we exceed the amount of data GitHub is willing
# to show in the interacive logs and there seems to be no UI to access the raw
# logs at this time
#run: python2 ./downloadwiki.py > ./downloadwiki.py.log
run: python2 ./downloadwiki.py
- name: Create a list of revision IDs for each page
run: python2 ./update.py
- name: Get a list of pages that have changed
run: python2 ./update.py
- name: Download the changed pages (and all their dependencies) again
run: python2 ./update.py
- name: Generate freecad.qhc and freecad.qch files
run: python2 ./buildqhelp.py --non-interactive
- name: Generate freecad.pdf
run: python2 ./buildpdf.py
- name: Split the generated freecad.qch into parts that are smaller than 50Mb (github limit)
run: split -d --byte=49M localwiki/freecad.qch localwiki/freecad.qch.part
- name: Reassemble the previously-split freecad.qch
run: cat localwiki/freecad.qch.part* >> test.qch
- name: Check that the reassembled test.qch is identical to localwiki/freecad.qch
run: diff -q test.qch localwiki/freecad.qch
- name: Try to open the .qch file
run: |
xvfb-run assistant -collectionFile localwiki/freecad.qhc &
sleep 5
killall assistant
- name: Upload freecad.qch
uses: actions/upload-artifact@v2
with:
name: freecad.qch
path: localwiki/freecad.qch
- name: Upload freecad.qhc
uses: actions/upload-artifact@v2
with:
name: freecad.qhc
path: localwiki/freecad.qhc
- name: Upload freecad.pdf
uses: actions/upload-artifact@v2
with:
name: freecad.pdf
path: localwiki/freecad.pdf

2
.gitignore vendored Normal file
View File

@ -0,0 +1,2 @@
/*.pyc
/__pycache__

View File

@ -22,6 +22,7 @@
#* * #* *
#*************************************************************************** #***************************************************************************
from __future__ import print_function
__title__="buildpdf" __title__="buildpdf"
__author__ = "Yorik van Havre <yorik@uncreated.net>" __author__ = "Yorik van Havre <yorik@uncreated.net>"
__url__ = "http://www.freecadweb.org" __url__ = "http://www.freecadweb.org"
@ -363,19 +364,19 @@ def crawl():
return 1 return 1
elif PDFCONVERTOR == 'htmldoc': elif PDFCONVERTOR == 'htmldoc':
if os.system('htmldoc --version'): if os.system('htmldoc --version'):
print "Error: Htmldoc not found, exiting." print("Error: Htmldoc not found, exiting.")
return 1 return 1
try: try:
from PyPDF2 import PdfFileReader,PdfFileWriter from PyPDF2 import PdfFileReader,PdfFileWriter
except: except:
print "Error: Python-pypdf2 not installed, exiting." print("Error: Python-pypdf2 not installed, exiting.")
# run ######################################################## # run ########################################################
buildpdffiles() buildpdffiles()
joinpdf() joinpdf()
if VERBOSE: print "All done!" if VERBOSE: print("All done!")
return 0 return 0
@ -389,10 +390,10 @@ def buildpdffiles():
for i in templist: for i in templist:
if i[-5:] == '.html': if i[-5:] == '.html':
fileslist.append(i) fileslist.append(i)
print "converting ",len(fileslist)," pages" print("converting ",len(fileslist)," pages")
i = 1 i = 1
for f in fileslist: for f in fileslist:
print i," : ",f print(i," : ",f)
if PDFCONVERTOR == 'pisa': if PDFCONVERTOR == 'pisa':
createpdf_pisa(f[:-5]) createpdf_pisa(f[:-5])
elif PDFCONVERTOR == 'wkhtmltopdf': elif PDFCONVERTOR == 'wkhtmltopdf':
@ -421,7 +422,7 @@ def createpdf_pisa(pagename):
if (not exists(pagename+".pdf",image=True)) or OVERWRTIE: if (not exists(pagename+".pdf",image=True)) or OVERWRTIE:
infile = open(FOLDER + os.sep + pagename+'.html','ro') infile = open(FOLDER + os.sep + pagename+'.html','ro')
outfile = open(FOLDER + os.sep + pagename+'.pdf','wb') outfile = open(FOLDER + os.sep + pagename+'.pdf','wb')
if VERBOSE: print "Converting " + pagename + " to pdf..." if VERBOSE: print("Converting " + pagename + " to pdf...")
pdf = pisa.CreatePDF(infile,outfile,FOLDER,link_callback=fetch_resources) pdf = pisa.CreatePDF(infile,outfile,FOLDER,link_callback=fetch_resources)
outfile.close() outfile.close()
if pdf.err: if pdf.err:
@ -441,7 +442,7 @@ def createpdf_firefox(pagename):
if os.path.exists(FIREFOXPDFFOLDER + os.sep + pagename + ".pdf"): if os.path.exists(FIREFOXPDFFOLDER + os.sep + pagename + ".pdf"):
shutil.move(FIREFOXPDFFOLDER+os.sep+pagename+".pdf",outfile) shutil.move(FIREFOXPDFFOLDER+os.sep+pagename+".pdf",outfile)
else: else:
print "-----------------------------------------> Couldn't find print output!" print("-----------------------------------------> Couldn't find print output!")
def createpdf_htmldoc(pagename): def createpdf_htmldoc(pagename):
@ -458,20 +459,20 @@ def createpdf_wkhtmltopdf(pagename):
infile = FOLDER + os.sep + pagename+'.html' infile = FOLDER + os.sep + pagename+'.html'
outfile = FOLDER + os.sep + pagename+'.pdf' outfile = FOLDER + os.sep + pagename+'.pdf'
cmd = 'wkhtmltopdf -L 5mm --user-style-sheet '+FOLDER+os.sep+'wkhtmltopdf.css '+infile+' '+outfile cmd = 'wkhtmltopdf -L 5mm --user-style-sheet '+FOLDER+os.sep+'wkhtmltopdf.css '+infile+' '+outfile
print cmd print(cmd)
#return os.system(cmd) #return os.system(cmd)
else: else:
print "skipping" print("skipping")
def joinpdf(): def joinpdf():
"creates one pdf file from several others, following order from the cover" "creates one pdf file from several others, following order from the cover"
from PyPDF2 import PdfFileReader,PdfFileWriter from PyPDF2 import PdfFileReader,PdfFileWriter
if VERBOSE: print "Building table of contents..." if VERBOSE: print("Building table of contents...")
result = PdfFileWriter() result = PdfFileWriter()
createCover() createCover()
inputfile = PdfFileReader(open(FOLDER+os.sep+'Cover.pdf','rb')) inputfile = PdfFileReader(open(os.path.join(FOLDER,'Cover.pdf'),'rb'))
result.addPage(inputfile.getPage(0)) result.addPage(inputfile.getPage(0))
count = 1 count = 1
@ -488,7 +489,7 @@ def joinpdf():
if page == "end": if page == "end":
parent = False parent = False
continue continue
if VERBOSE: print 'Appending',page, "at position",count if VERBOSE: print('Appending',page, "at position",count)
title = page.replace("_"," ") title = page.replace("_"," ")
pdffile = page + ".pdf" pdffile = page + ".pdf"
if exists(pdffile,True): if exists(pdffile,True):
@ -504,16 +505,16 @@ def joinpdf():
result.addBookmark(title,count,parent) result.addBookmark(title,count,parent)
count += numpages count += numpages
else: else:
print "page",pdffile,"not found, aborting." print("page",pdffile,"not found, aborting.")
sys.exit() sys.exit()
if VERBOSE: print "Writing..." if VERBOSE: print("Writing...")
outputfile = open(FOLDER+os.sep+"freecad.pdf",'wb') outputfile = open(FOLDER+os.sep+"freecad.pdf",'wb')
result.write(outputfile) result.write(outputfile)
outputfile.close() outputfile.close()
if VERBOSE: if VERBOSE:
print ' ' print(' ')
print 'Successfully created '+FOLDER+os.sep+'freecad.pdf' print('Successfully created '+FOLDER+os.sep+'freecad.pdf')
def local(page,image=False): def local(page,image=False):
@ -544,13 +545,16 @@ def makeStyleSheet():
def createCover(): def createCover():
"downloads and creates a cover page" "downloads and creates a cover page"
if VERBOSE: print "fetching " + COVER if VERBOSE: print("fetching " + COVER)
data = (urlopen(COVER).read()) data = (urlopen(COVER).read())
path = FOLDER + os.sep + "Cover.svg" path = os.path.join(FOLDER, "Cover.svg")
fil = open(path,'wb') fil = open(path,'wb')
fil.write(data) fil.write(data)
fil.close() fil.close()
os.system('inkscape --export-pdf='+FOLDER+os.sep+'Cover.pdf'+' '+FOLDER+os.sep+'Cover.svg') if os.system('inkscape --export-pdf='+os.path.join(FOLDER,'Cover.pdf')+' '+os.path.join(FOLDER,'Cover.svg')) == 0:
return
else:
raise Exception('Conversion of Cover.svg to Cover.pdf failed. Is Inkscape installed?')
if __name__ == "__main__": if __name__ == "__main__":

View File

@ -22,6 +22,7 @@
#* * #* *
#*************************************************************************** #***************************************************************************
from __future__ import print_function
__title__="wiki2qhelp" __title__="wiki2qhelp"
__author__ = "Yorik van Havre <yorik@uncreated.net>" __author__ = "Yorik van Havre <yorik@uncreated.net>"
__url__ = "http://www.freecadweb.org" __url__ = "http://www.freecadweb.org"
@ -38,8 +39,8 @@ from urllib2 import urlopen, HTTPError
FOLDER = "./localwiki" FOLDER = "./localwiki"
INDEX = "Online_Help_Toc" # the start page from where to crawl the wiki INDEX = "Online_Help_Toc" # the start page from where to crawl the wiki
VERBOSE = True # to display what's going on. Otherwise, runs totally silent. VERBOSE = True # to display what's going on. Otherwise, runs totally silent.
QHELPCOMPILER = 'qhelpgenerator' QHELPCOMPILER = 'qhelpgenerator -qt=qt5'
QCOLLECTIOMGENERATOR = 'qcollectiongenerator' QCOLLECTIOMGENERATOR = 'qcollectiongenerator -qt=qt5'
RELEASE = '0.17' RELEASE = '0.17'
# END CONFIGURATION ############################################## # END CONFIGURATION ##############################################
@ -52,10 +53,10 @@ def crawl():
# tests ############################################### # tests ###############################################
if os.system(QHELPCOMPILER +' -v'): if os.system(QHELPCOMPILER +' -v'):
print "Error: QAssistant not fully installed, exiting." print("Error: QAssistant not fully installed, exiting.")
return 1 return 1
if os.system(QCOLLECTIOMGENERATOR +' -v'): if os.system(QCOLLECTIOMGENERATOR +' -v'):
print "Error: QAssistant not fully installed, exiting." print("Error: QAssistant not fully installed, exiting.")
return 1 return 1
# run ######################################################## # run ########################################################
@ -64,23 +65,34 @@ def crawl():
qhcp = createCollProjectFile() qhcp = createCollProjectFile()
shutil.copy("freecad-icon-64.png","localwiki/freecad-icon-64.png") shutil.copy("freecad-icon-64.png","localwiki/freecad-icon-64.png")
if generate(qhcp) or compile(qhp): if generate(qhcp) or compile(qhp):
print "Error at compiling" print("Error at compiling")
return 1 return 1
if VERBOSE: print "All done!" if VERBOSE: print("All done!")
i=raw_input("Copy the files to their correct location in the source tree? y/n (default=no) ") if "--yes-copy" in sys.argv:
i="yes"
elif "--no-copy" in sys.argv:
i="no"
else:
try:
i=raw_input("Copy the files to their correct location in the source tree? y/n (default=no) ")
except:
i="no"
if i.upper() in ["Y","YES"]: if i.upper() in ["Y","YES"]:
shutil.copy("localwiki/freecad.qch","../../Doc/freecad.qch") shutil.copy("localwiki/freecad.qch","../../Doc/freecad.qch")
shutil.copy("localwiki/freecad.qhc","../../Doc/freecad.qhc") shutil.copy("localwiki/freecad.qhc","../../Doc/freecad.qhc")
else: else:
print 'Files are in localwiki. Test with "assistant -collectionFile localwiki/freecad.qhc"' print('Files are in localwiki. Test with "assistant -collectionFile localwiki/freecad.qhc"')
return 0 return 0
def compile(qhpfile): def compile(qhpfile):
"compiles the whole html doc with qassistant" "compiles the whole html doc with qassistant"
qchfile = FOLDER + os.sep + "freecad.qch" qchfile = FOLDER + os.sep + "freecad.qch"
if not os.system(QHELPCOMPILER + ' '+qhpfile+' -o '+qchfile): if not os.system(QHELPCOMPILER + ' '+qhpfile+' -o '+qchfile):
if VERBOSE: print "Successfully created",qchfile if VERBOSE: print("Successfully created",qchfile)
return 0 return 0
else:
os.system('cat -v ' + qhpfile)
raise "Error during generation of freecad.qch"
def generate(qhcpfile): def generate(qhcpfile):
"generates qassistant-specific settings like icon, title, ..." "generates qassistant-specific settings like icon, title, ..."
@ -93,8 +105,11 @@ def generate(qhcpfile):
about.close() about.close()
qhcfile = FOLDER + os.sep + "freecad.qhc" qhcfile = FOLDER + os.sep + "freecad.qhc"
if not os.system(QCOLLECTIOMGENERATOR+' '+qhcpfile+' -o '+qhcfile): if not os.system(QCOLLECTIOMGENERATOR+' '+qhcpfile+' -o '+qhcfile):
if VERBOSE: print "Successfully created ",qhcfile if VERBOSE: print("Successfully created ",qhcfile)
return 0 return 0
else:
os.system('cat -v ' + qhcpfile)
raise "Error during generation of freecad.qhc"
def createCollProjectFile(): def createCollProjectFile():
qprojectfile = '''<?xml version="1.0" encoding="UTF-8"?> qprojectfile = '''<?xml version="1.0" encoding="UTF-8"?>
@ -131,12 +146,12 @@ def createCollProjectFile():
</docFiles> </docFiles>
</QHelpCollectionProject> </QHelpCollectionProject>
''' '''
if VERBOSE: print "Building project file..." if VERBOSE: print("Building project file...")
qfilename = FOLDER + os.sep + "freecad.qhcp" qfilename = FOLDER + os.sep + "freecad.qhcp"
f = open(qfilename,'w') f = open(qfilename,'w')
f.write(qprojectfile) f.write(qprojectfile)
f.close() f.close()
if VERBOSE: print "Done writing qhcp file",qfilename if VERBOSE: print("Done writing qhcp file",qfilename)
return qfilename return qfilename
def buildtoc(): def buildtoc():
@ -182,7 +197,7 @@ def buildtoc():
if not link: link = 'default.html' if not link: link = 'default.html'
return title,link return title,link
if VERBOSE: print "Building table of contents..." if VERBOSE: print("Building table of contents...")
f = open(FOLDER+os.sep+INDEX+'.html') f = open(FOLDER+os.sep+INDEX+'.html')
html = '' html = ''
for line in f: html += line for line in f: html += line
@ -229,9 +244,9 @@ def buildtoc():
f = open(qfilename,'wb') f = open(qfilename,'wb')
f.write(qhelpfile) f.write(qhelpfile)
f.close() f.close()
if VERBOSE: print "Done writing qhp file",qfilename if VERBOSE: print("Done writing qhp file",qfilename)
return qfilename return qfilename
if __name__ == "__main__": if __name__ == "__main__":
crawl() exit(crawl())

View File

@ -22,6 +22,7 @@
#* * #* *
#*************************************************************************** #***************************************************************************
from __future__ import print_function
__title__="buildwikiindex.py" __title__="buildwikiindex.py"
__author__ = "Yorik van Havre <yorik@uncreated.net>" __author__ = "Yorik van Havre <yorik@uncreated.net>"
__url__ = "http://www.freecadweb.org" __url__ = "http://www.freecadweb.org"
@ -60,15 +61,15 @@ def crawl(pagename=[]):
else: else:
if os.path.exists("wikifiles.txt"): if os.path.exists("wikifiles.txt"):
f = open("wikifiles.txt","r") f = open("wikifiles.txt","r")
if VERBOSE: print "Reading existing list..." if VERBOSE: print("Reading existing list...")
for l in f.readlines(): for l in f.readlines():
if l.strip() != "": if l.strip() != "":
if VERBOSE: print "Adding ",l if VERBOSE: print("Adding ",l)
processed.append(l.strip()) processed.append(l.strip())
f.close() f.close()
if os.path.exists("todolist.txt"): if os.path.exists("todolist.txt"):
f = open("todolist.txt","r") f = open("todolist.txt","r")
if VERBOSE: print "Reading existing todo list..." if VERBOSE: print("Reading existing todo list...")
for l in f.readlines(): for l in f.readlines():
if l.strip() != "": if l.strip() != "":
todolist.append(l.strip()) todolist.append(l.strip())
@ -79,19 +80,19 @@ def crawl(pagename=[]):
while todolist: while todolist:
targetpage = todolist.pop() targetpage = todolist.pop()
if (not targetpage in NORETRIEVE): if (not targetpage in NORETRIEVE):
if VERBOSE: print count, ": Scanning ", targetpage if VERBOSE: print(count, ": Scanning ", targetpage)
pages,images = get(targetpage) pages,images = get(targetpage)
count += 1 count += 1
processed.append(targetpage) processed.append(targetpage)
processed.extend(images) processed.extend(images)
if VERBOSE: print "got",len(pages),"links" if VERBOSE: print("got",len(pages),"links")
for p in pages: for p in pages:
if (not (p in todolist)) and (not (p in processed)): if (not (p in todolist)) and (not (p in processed)):
todolist.append(p) todolist.append(p)
if WRITETHROUGH: if WRITETHROUGH:
writeList(processed) writeList(processed)
writeList(todolist,"todolist.txt") writeList(todolist,"todolist.txt")
if VERBOSE: print "Fetched ", count, " pages" if VERBOSE: print("Fetched ", count, " pages")
if not WRITETHROUGH: if not WRITETHROUGH:
writeList(processed) writeList(processed)
if pagename: if pagename:
@ -156,7 +157,7 @@ def getlinks(html):
NORETRIEVE.append(rg) NORETRIEVE.append(rg)
if not rg in NORETRIEVE: if not rg in NORETRIEVE:
pages.append(rg) pages.append(rg)
print "got link: ",rg print("got link: ",rg)
return pages return pages
def getimagelinks(html): def getimagelinks(html):
@ -167,7 +168,7 @@ def getimagelinks(html):
def fetchpage(page): def fetchpage(page):
"retrieves given page from the wiki" "retrieves given page from the wiki"
print "fetching: ",page print("fetching: ",page)
failcount = 0 failcount = 0
while failcount < MAXFAIL: while failcount < MAXFAIL:
try: try:
@ -175,7 +176,7 @@ def fetchpage(page):
return html return html
except HTTPError: except HTTPError:
failcount += 1 failcount += 1
print 'Error: unable to fetch page ' + page print('Error: unable to fetch page ' + page)
sys.exit() sys.exit()
def cleanList(pagelist): def cleanList(pagelist):
@ -193,7 +194,7 @@ def writeList(pages,filename="wikifiles.txt"):
for p in pages: for p in pages:
f.write(p+"\n") f.write(p+"\n")
f.close() f.close()
if VERBOSE: print "written ",filename if VERBOSE: print("written ",filename)
if __name__ == "__main__": if __name__ == "__main__":
crawl(sys.argv[1:]) crawl(sys.argv[1:])

View File

@ -22,6 +22,7 @@
#* * #* *
#*************************************************************************** #***************************************************************************
from __future__ import print_function
__title__="downloadwiki" __title__="downloadwiki"
__author__ = "Yorik van Havre <yorik@uncreated.net>" __author__ = "Yorik van Havre <yorik@uncreated.net>"
__url__ = "http://www.freecadweb.org" __url__ = "http://www.freecadweb.org"
@ -137,7 +138,7 @@ def crawl():
"downloads an entire wiki site" "downloads an entire wiki site"
global processed global processed
processed = [] processed = []
if VERBOSE: print "crawling ", URL, ", saving in ", FOLDER if VERBOSE: print("crawling ", URL, ", saving in ", FOLDER)
if not os.path.isdir(FOLDER): os.mkdir(FOLDER) if not os.path.isdir(FOLDER): os.mkdir(FOLDER)
file = open(FOLDER + os.sep + "wiki.css",'wb') file = open(FOLDER + os.sep + "wiki.css",'wb')
file.write(css) file.write(css)
@ -151,16 +152,16 @@ def crawl():
for l in lfile: locallist.append(l.replace("\n","")) for l in lfile: locallist.append(l.replace("\n",""))
lfile.close() lfile.close()
todolist = locallist[:] todolist = locallist[:]
print "getting",len(todolist),"files..." print("getting",len(todolist),"files...")
count = 1 count = 1
indexpages = get(INDEX) indexpages = get(INDEX)
while todolist: while todolist:
targetpage = todolist.pop() targetpage = todolist.pop()
if VERBOSE: print count, ": Fetching ", targetpage if VERBOSE: print(count, ": Fetching ", targetpage)
get(targetpage) get(targetpage)
count += 1 count += 1
if VERBOSE: print "Fetched ", count, " pages" if VERBOSE: print("Fetched ", count, " pages")
if VERBOSE: print "All done!" if VERBOSE: print("All done!")
return 0 return 0
def get(page): def get(page):
@ -180,7 +181,7 @@ def get(page):
html = cleanimagelinks(html) html = cleanimagelinks(html)
output(html,page) output(html,page)
else: else:
if VERBOSE: print " skipping",page if VERBOSE: print(" skipping",page)
def getlinks(html): def getlinks(html):
"returns a list of wikipage links in html file" "returns a list of wikipage links in html file"
@ -268,7 +269,7 @@ def cleanimagelinks(html,links=None):
def fetchpage(page): def fetchpage(page):
"retrieves given page from the wiki" "retrieves given page from the wiki"
print " fetching: ",page print(" fetching: ",page)
failcount = 0 failcount = 0
while failcount < MAXFAIL: while failcount < MAXFAIL:
try: try:
@ -276,19 +277,19 @@ def fetchpage(page):
return html return html
except HTTPError: except HTTPError:
failcount += 1 failcount += 1
print 'Error: unable to fetch page ' + page print('Error: unable to fetch page ' + page)
def fetchimage(imagelink): def fetchimage(imagelink):
"retrieves given image from the wiki and saves it" "retrieves given image from the wiki and saves it"
if imagelink[0:5] == "File:": if imagelink[0:5] == "File:":
print "Skipping file page link" print("Skipping file page link")
return return
filename = re.findall('.*/(.*)',imagelink)[0] filename = re.findall('.*/(.*)',imagelink)[0]
if not exists(filename,image=True): if not exists(filename,image=True):
failcount = 0 failcount = 0
while failcount < MAXFAIL: while failcount < MAXFAIL:
try: try:
if VERBOSE: print " fetching " + filename if VERBOSE: print(" fetching " + filename)
data = (urlopen(URL + imagelink).read()) data = (urlopen(URL + imagelink).read())
path = local(filename,image=True) path = local(filename,image=True)
file = open(path,'wb') file = open(path,'wb')
@ -298,11 +299,11 @@ def fetchimage(imagelink):
failcount += 1 failcount += 1
else: else:
processed.append(filename) processed.append(filename)
if VERBOSE: print " saving",local(filename,image=True) if VERBOSE: print(" saving",local(filename,image=True))
return return
print 'Error: unable to fetch file ' + filename print('Error: unable to fetch file ' + filename)
else: else:
if VERBOSE: print " skipping",filename if VERBOSE: print(" skipping",filename)
def local(page,image=False): def local(page,image=False):
"returns a local path for a given page/image" "returns a local path for a given page/image"
@ -337,7 +338,7 @@ def output(html,page):
filename = filename.replace("&pagefrom=","+") filename = filename.replace("&pagefrom=","+")
filename = filename.replace("#mw-pages","") filename = filename.replace("#mw-pages","")
filename = filename.replace(".html.html",".html") filename = filename.replace(".html.html",".html")
print " saving",filename print(" saving",filename)
file = open(filename,'wb') file = open(filename,'wb')
file.write(html) file.write(html)
file.close() file.close()

View File

@ -22,6 +22,7 @@
#* * #* *
#*************************************************************************** #***************************************************************************
from __future__ import print_function
__title__="update.py" __title__="update.py"
__author__ = "Yorik van Havre <yorik@uncreated.net>" __author__ = "Yorik van Havre <yorik@uncreated.net>"
__url__ = "http://www.freecadweb.org" __url__ = "http://www.freecadweb.org"
@ -58,70 +59,70 @@ def update(pagename=None):
if not os.path.exists("revisions.txt"): # case 1) if not os.path.exists("revisions.txt"): # case 1)
if not os.path.exists("wikifiles.txt"): if not os.path.exists("wikifiles.txt"):
print "No wikifiles.txt found. Aborting" print("No wikifiles.txt found. Aborting")
sys.exit() sys.exit()
pages = [] pages = []
f = open("wikifiles.txt","r") f = open("wikifiles.txt","r")
if VERBOSE: print "Reading existing list..." if VERBOSE: print("Reading existing list...")
for l in f.readlines(): for l in f.readlines():
if l.strip() != "": if l.strip() != "":
if not "/wiki/" in l: if not "/wiki/" in l:
if VERBOSE: print "Adding ",l.strip() if VERBOSE: print("Adding ",l.strip())
pages.append(l.strip()) pages.append(l.strip())
f.close() f.close()
if VERBOSE: print "Added ",str(len(pages))," entries" if VERBOSE: print("Added ",str(len(pages))," entries")
i = 1 i = 1
revs = [] revs = []
for page in pages: for page in pages:
rev = getRevision(page) rev = getRevision(page)
if VERBOSE: print str(i)," revision: ",rev if VERBOSE: print(str(i)," revision: ",rev)
revs.append(page+":"+rev) revs.append(page+":"+rev)
i += 1 i += 1
writeList(revs,"revisions.txt") writeList(revs,"revisions.txt")
print "All done. Successfully written revisions.txt with ",len(revs)," entries." print("All done. Successfully written revisions.txt with ",len(revs)," entries.")
elif os.path.exists("revisions.txt") and (not os.path.exists("updates.txt")): # case 2) elif os.path.exists("revisions.txt") and (not os.path.exists("updates.txt")): # case 2)
f = open("revisions.txt","r") f = open("revisions.txt","r")
if VERBOSE: print "Reading revisions list..." if VERBOSE: print("Reading revisions list...")
revisions = {} revisions = {}
for l in f.readlines(): for l in f.readlines():
if l.strip() != "": if l.strip() != "":
r = l.strip().split(":") r = l.strip().split(":")
p = ":".join(r[:-1]) p = ":".join(r[:-1])
if VERBOSE: print "Adding ",p if VERBOSE: print("Adding ",p)
revisions[p] = r[1] revisions[p] = r[1]
f.close() f.close()
if VERBOSE: print "Added ",str(len(revisions.keys()))," entries" if VERBOSE: print("Added ",str(len(revisions.keys()))," entries")
updates = [] updates = []
i = 1 i = 1
for page in revisions.keys(): for page in revisions.keys():
rev = getRevision(page) rev = getRevision(page)
if rev != revisions[page]: if rev != revisions[page]:
if VERBOSE: print str(i),page," has a new revision: ",rev if VERBOSE: print(str(i),page," has a new revision: ",rev)
updates.append(page) updates.append(page)
else: else:
if VERBOSE: print str(i),page," is up to date " if VERBOSE: print(str(i),page," is up to date ")
i += 1 i += 1
if updates: if updates:
writeList(updates,"updates.txt") writeList(updates,"updates.txt")
print "All done. Successfully written updates.txt with ",len(updates)," entries." print("All done. Successfully written updates.txt with ",len(updates)," entries.")
else: else:
print "Everything up to date. Nothing to be done." print("Everything up to date. Nothing to be done.")
elif os.path.exists("revisions.txt") and os.path.exists("updates.txt"): # case 3) elif os.path.exists("revisions.txt") and os.path.exists("updates.txt"): # case 3)
if not os.path.exists("wikifiles.txt"): if not os.path.exists("wikifiles.txt"):
print "No wikifiles.txt found. Aborting" print("No wikifiles.txt found. Aborting")
sys.exit() sys.exit()
wikifiles = [] wikifiles = []
f = open("wikifiles.txt","r") f = open("wikifiles.txt","r")
if VERBOSE: print "Reading wikifiles list..." if VERBOSE: print("Reading wikifiles list...")
for l in f.readlines(): for l in f.readlines():
if l.strip() != "": if l.strip() != "":
wikifiles.append(l.strip()) wikifiles.append(l.strip())
f.close() f.close()
if VERBOSE: print "Read ",str(len(wikifiles))," entries" if VERBOSE: print("Read ",str(len(wikifiles))," entries")
f = open("revisions.txt","r") f = open("revisions.txt","r")
if VERBOSE: print "Reading revisions list..." if VERBOSE: print("Reading revisions list...")
revisions = {} revisions = {}
for l in f.readlines(): for l in f.readlines():
if l.strip() != "": if l.strip() != "":
@ -131,25 +132,25 @@ def update(pagename=None):
f.close() f.close()
todo = [] todo = []
f = open("updates.txt","r") f = open("updates.txt","r")
if VERBOSE: print "Reading updates list..." if VERBOSE: print("Reading updates list...")
for l in f.readlines(): for l in f.readlines():
if l.strip() != "": if l.strip() != "":
todo.append(l.strip()) todo.append(l.strip())
f.close() f.close()
if VERBOSE: print str(len(todo))," pages to scan..." if VERBOSE: print(str(len(todo))," pages to scan...")
import buildwikiindex import buildwikiindex
buildwikiindex.WRITETHROUGH = False buildwikiindex.WRITETHROUGH = False
buildwikiindex.VERBOSE = VERBOSE buildwikiindex.VERBOSE = VERBOSE
updates = [] updates = []
for t in todo: for t in todo:
if VERBOSE: print "Scanning ",t if VERBOSE: print("Scanning ",t)
updates.extend(buildwikiindex.crawl(t)) updates.extend(buildwikiindex.crawl(t))
updates = [u for u in updates if not u in wikifiles] updates = [u for u in updates if not u in wikifiles]
if VERBOSE: print str(len(updates))," files to download..." if VERBOSE: print(str(len(updates))," files to download...")
import downloadwiki import downloadwiki
i = 1 i = 1
for u in updates: for u in updates:
if VERBOSE: print i, ": Fetching ", u if VERBOSE: print(i, ": Fetching ", u)
downloadwiki.get(u) downloadwiki.get(u)
if not "/wiki/" in u: if not "/wiki/" in u:
rev = getRevision(u) rev = getRevision(u)
@ -157,26 +158,26 @@ def update(pagename=None):
if not u in wikifiles: if not u in wikifiles:
wikifiles.append(u) wikifiles.append(u)
i += 1 i += 1
if VERBOSE: print "Updating wikifiles and revisions..." if VERBOSE: print("Updating wikifiles and revisions...")
writeList(wikifiles,"wikifiles.txt") writeList(wikifiles,"wikifiles.txt")
updatedrevs = [] updatedrevs = []
for k in revisions.keys(): for k in revisions.keys():
updatedrevs.append(k+":"+revisions[k]) updatedrevs.append(k+":"+revisions[k])
writeList(updatedrevs,"revisions.txt") writeList(updatedrevs,"revisions.txt")
os.remove("updates.txt") os.remove("updates.txt")
if VERBOSE: print "All done!" if VERBOSE: print("All done!")
def getRevision(page): def getRevision(page):
html = fetchPage(page) html = fetchPage(page)
revs = re.findall("wgCurRevisionId\"\:(.*?),",html) revs = re.findall("wgCurRevisionId\"\:(.*?),",html)
if len(revs) == 1: if len(revs) == 1:
return revs[0] return revs[0]
print 'Error: unable to get revision ID of ' + page print('Error: unable to get revision ID of ' + page)
sys.exit() sys.exit()
def fetchPage(page): def fetchPage(page):
"retrieves given page from the wiki" "retrieves given page from the wiki"
print "fetching: ",page print("fetching: ",page)
failcount = 0 failcount = 0
while failcount < MAXFAIL: while failcount < MAXFAIL:
try: try:
@ -184,7 +185,7 @@ def fetchPage(page):
return html return html
except HTTPError: except HTTPError:
failcount += 1 failcount += 1
print 'Error: unable to fetch page ' + page print('Error: unable to fetch page ' + page)
sys.exit() sys.exit()
def writeList(pages,filename): def writeList(pages,filename):
@ -192,7 +193,7 @@ def writeList(pages,filename):
for p in pages: for p in pages:
f.write(p+"\n") f.write(p+"\n")
f.close() f.close()
if VERBOSE: print "written ",filename if VERBOSE: print("written ",filename)
if __name__ == "__main__": if __name__ == "__main__":
update(sys.argv[1:]) update(sys.argv[1:])