Further adjustments in offline doc tools
This commit is contained in:
parent
56935813e5
commit
1b08c704ea
|
@ -30,15 +30,17 @@ __url__ = "http://www.freecadweb.org"
|
|||
This script builds a pdf file from a local copy of the wiki
|
||||
"""
|
||||
|
||||
import sys, os, re, tempfile, getopt
|
||||
import sys, os, re, tempfile, getopt, shutil, time
|
||||
from urllib2 import urlopen, HTTPError
|
||||
|
||||
# CONFIGURATION #################################################
|
||||
|
||||
INDEX = "Online_Help_Toc" # the start page from where to crawl the wiki
|
||||
REMOVE = True # if true, the temp html files are removed after successful operation
|
||||
PDFCONVERTOR = 'wkhtmltopdf' # can be 'pisa', 'htmldoc' or 'wkhtmltopdf'
|
||||
VERBOSE = True
|
||||
PDFCONVERTOR = 'firefox' # can be 'pisa', 'htmldoc', 'wkhtmltopdf' or 'firefox'
|
||||
VERBOSE = True # set true to get output messages
|
||||
INCLUDECOMMANDS = True # if true, the command pages of each workbench are included after each WB page
|
||||
OVERWRITE = True # if true, pdf files are recreated even if already existing
|
||||
FIREFOXPDFFOLDER = os.path.expanduser("~")+os.sep+"PDF" # if firefox is used, set this to where it places its pdf files by default
|
||||
|
||||
# END CONFIGURATION ##############################################
|
||||
|
||||
|
@ -78,14 +80,26 @@ def crawl():
|
|||
def buildpdffiles():
|
||||
"scans a folder for html files and converts them all to pdf"
|
||||
templist = os.listdir(FOLDER)
|
||||
if PDFCONVERTOR == 'wkhtmltopdf':
|
||||
makeStyleSheet()
|
||||
global fileslist
|
||||
fileslist = []
|
||||
for i in templist:
|
||||
if i[-5:] == '.html':
|
||||
fileslist.append(i)
|
||||
print "converting ",len(fileslist)," pages"
|
||||
i = 1
|
||||
for f in fileslist:
|
||||
if PDFCONVERTOR == 'pisa': createpdf_pisa(f[:-5])
|
||||
elif PDFCONVERTOR == 'wkhtmltopdf': createpdf_wkhtmltopdf(f[:-5])
|
||||
else: createpdf_htmldoc(f[:-5])
|
||||
print i," : ",f
|
||||
if PDFCONVERTOR == 'pisa':
|
||||
createpdf_pisa(f[:-5])
|
||||
elif PDFCONVERTOR == 'wkhtmltopdf':
|
||||
createpdf_wkhtmltopdf(f[:-5])
|
||||
elif PDFCONVERTOR == 'firefox':
|
||||
createpdf_firefox(f[:-5])
|
||||
else:
|
||||
createpdf_htmldoc(f[:-5])
|
||||
i += 1
|
||||
|
||||
def fetch_resources(uri, rel):
|
||||
"""
|
||||
|
@ -101,28 +115,45 @@ def fetch_resources(uri, rel):
|
|||
def createpdf_pisa(pagename):
|
||||
"creates a pdf file from a saved page using pisa (python module)"
|
||||
import ho.pisa as pisa
|
||||
if not exists(pagename+".pdf",image=True):
|
||||
if (not exists(pagename+".pdf",image=True)) or OVERWRTIE:
|
||||
infile = file(FOLDER + os.sep + pagename+'.html','ro')
|
||||
outfile = file(FOLDER + os.sep + pagename+'.pdf','wb')
|
||||
if VERBOSE: print "Converting " + pagename + " to pdf..."
|
||||
pdf = pisa.CreatePDF(infile,outfile,FOLDER,link_callback=fetch_resources)
|
||||
outfile.close()
|
||||
if pdf.err: return pdf.err
|
||||
if pdf.err:
|
||||
return pdf.err
|
||||
return 0
|
||||
|
||||
def createpdf_firefox(pagename):
|
||||
"creates a pdf file from a saved page using firefox (needs command line printing extension)"
|
||||
# the default printer will be used, so make sure it is set to pdf
|
||||
# command line printing extension http://forums.mozillazine.org/viewtopic.php?f=38&t=2729795
|
||||
if (not exists(pagename+".pdf",image=True)) or OVERWRITE:
|
||||
infile = FOLDER + os.sep + pagename+'.html'
|
||||
outfile = FOLDER + os.sep + pagename+'.pdf'
|
||||
return os.system('firefox -print ' + infile)
|
||||
time.sleep(6)
|
||||
if os.path.exists(FIREFOXPDFFOLDER + os.sep + pagename + ".pdf"):
|
||||
shutil.move(FIREFOXPDFFOLDER+os.sep+pagename+".pdf",outfile)
|
||||
else:
|
||||
print "-----------------------------------------> Couldn't find print output!"
|
||||
|
||||
def createpdf_htmldoc(pagename):
|
||||
"creates a pdf file from a saved page using htmldoc (external app, but supports images)"
|
||||
if not exists(pagename+".pdf",image=True):
|
||||
if (not exists(pagename+".pdf",image=True)) or OVERWRITE:
|
||||
infile = FOLDER + os.sep + pagename+'.html'
|
||||
outfile = FOLDER + os.sep + pagename+'.pdf'
|
||||
return os.system('htmldoc --webpage --textfont sans --browserwidth 840 -f '+outfile+' '+infile)
|
||||
|
||||
def createpdf_wkhtmltopdf(pagename):
|
||||
"creates a pdf file from a saved page using htmldoc (external app, but supports images)"
|
||||
if not exists(pagename+".pdf",image=True):
|
||||
if (not exists(pagename+".pdf",image=True)) or OVERWRTIE:
|
||||
infile = FOLDER + os.sep + pagename+'.html'
|
||||
outfile = FOLDER + os.sep + pagename+'.pdf'
|
||||
return os.system('wkhtmltopdf '+infile+' '+outfile)
|
||||
return os.system('wkhtmltopdf --user-style-sheet '+FOLDER+os.sep+'wkhtmltppdf.css '+infile+' '+outfile)
|
||||
else:
|
||||
print "skipping"
|
||||
|
||||
def joinpdf():
|
||||
"creates one pdf file from several others, following order from startpage"
|
||||
|
@ -140,16 +171,38 @@ def joinpdf():
|
|||
result = PdfFileWriter()
|
||||
for p in pages:
|
||||
if exists(p[:-5]):
|
||||
if VERBOSE: print 'Appending',p
|
||||
try: inputfile = PdfFileReader(file(FOLDER+os.sep+p[:-5]+'.pdf','rb'))
|
||||
except: print 'Unable to append',p
|
||||
if VERBOSE: print 'Appending',p[:-5]+'.pdf'
|
||||
try:
|
||||
inputfile = PdfFileReader(file(FOLDER+os.sep+p[:-5]+'.pdf','rb'))
|
||||
except:
|
||||
print 'Unable to append',p
|
||||
else:
|
||||
for i in range(inputfile.getNumPages()):
|
||||
result.addPage(inputfile.getPage(i))
|
||||
outputfile = file("freecad.pdf",'wb')
|
||||
if INCLUDECOMMANDS:
|
||||
if ("_Workbench.html" in p) or ("_Module.html" in p):
|
||||
mod = [p.split("_")[0]]
|
||||
if mod[0] == "PartDesign":
|
||||
mod.append("Constraint")
|
||||
for m in mod:
|
||||
for f in fileslist:
|
||||
if f[:len(m)+1] == m+"_":
|
||||
if (not("Module" in f)) and (not("Workbench" in f)) and (not("Scripting" in f)) and (not("API" in f)):
|
||||
if VERBOSE: print ' Appending',f[:-5]+'.pdf'
|
||||
try:
|
||||
inputfile = PdfFileReader(file(FOLDER+os.sep+f[:-5]+'.pdf','rb'))
|
||||
except:
|
||||
print 'Unable to append',f
|
||||
else:
|
||||
for i in range(inputfile.getNumPages()):
|
||||
result.addPage(inputfile.getPage(i))
|
||||
if VERBOSE: print "Writing..."
|
||||
outputfile = file(FOLDER+os.sep+"freecad.pdf",'wb')
|
||||
result.write(outputfile)
|
||||
outputfile.close()
|
||||
if VERBOSE: print 'Successfully created freecad.pdf'
|
||||
if VERBOSE:
|
||||
print ' '
|
||||
print 'Successfully created '+FOLDER+os.sep+'freecad.pdf'
|
||||
|
||||
def local(page,image=False):
|
||||
"returns a local path for a given page/image"
|
||||
|
@ -163,6 +216,16 @@ def exists(page,image=False):
|
|||
path = local(page,image)
|
||||
if os.path.exists(path): return True
|
||||
return False
|
||||
|
||||
def makeStyleSheet():
|
||||
"Creates a stylesheet for wkhtmltopdf"
|
||||
outputfile = file(FOLDER+os.sep+"wkhtmltopdf.css",'wb')
|
||||
outputfile.write("""
|
||||
html {
|
||||
margin: 20px 0;
|
||||
}
|
||||
""")
|
||||
outputfile.close()
|
||||
|
||||
if __name__ == "__main__":
|
||||
crawl()
|
||||
|
|
|
@ -40,7 +40,7 @@ INDEX = "Online_Help_Toc" # the start page from where to crawl the wiki
|
|||
VERBOSE = True # to display what's going on. Otherwise, runs totally silent.
|
||||
QHELPCOMPILER = 'qhelpgenerator'
|
||||
QCOLLECTIOMGENERATOR = 'qcollectiongenerator'
|
||||
RELEASE = '0.13'
|
||||
RELEASE = '0.14'
|
||||
|
||||
# END CONFIGURATION ##############################################
|
||||
|
||||
|
|
|
@ -97,10 +97,6 @@ a:hover {
|
|||
font-size: 0.8em;
|
||||
}
|
||||
|
||||
#toc,.docnav {
|
||||
display: none;
|
||||
}
|
||||
|
||||
.ct, .ctTitle, .ctOdd, .ctEven th {
|
||||
text-align: left;
|
||||
width: 200px;
|
||||
|
@ -111,6 +107,7 @@ a:hover {
|
|||
|
||||
def crawl():
|
||||
"downloads an entire wiki site"
|
||||
global processed
|
||||
processed = []
|
||||
if VERBOSE: print "crawling ", URL, ", saving in ", FOLDER
|
||||
if not os.path.isdir(FOLDER): os.mkdir(FOLDER)
|
||||
|
@ -141,7 +138,6 @@ def crawl():
|
|||
def get(page):
|
||||
"downloads a single page, returns the other pages it links to"
|
||||
if page[-4:] in [".png",".jpg",".svg",".gif","jpeg"]:
|
||||
print "getting image",page
|
||||
fetchimage(page)
|
||||
elif not exists(page):
|
||||
html = fetchpage(page)
|
||||
|
@ -151,7 +147,7 @@ def get(page):
|
|||
html = cleanimagelinks(html)
|
||||
output(html,page)
|
||||
else:
|
||||
if VERBOSE: print "skipping",page
|
||||
if VERBOSE: print " skipping",page
|
||||
|
||||
def getlinks(html):
|
||||
"returns a list of wikipage links in html file"
|
||||
|
@ -183,6 +179,7 @@ def getimagelinks(html):
|
|||
def cleanhtml(html):
|
||||
"cleans given html code from dirty script stuff"
|
||||
html = html.replace('\n','Wlinebreak') # removing linebreaks for regex processing
|
||||
html = html.replace('\t','') # removing tab marks
|
||||
html = re.compile('(.*)<div id=\"content+[^>]+>').sub('',html) # stripping before content
|
||||
html = re.compile('<div id="mw-head+[^>]+>.*').sub('',html) # stripping after content
|
||||
html = re.compile('<!--[^>]+-->').sub('',html) # removing comment tags
|
||||
|
@ -195,7 +192,9 @@ def cleanhtml(html):
|
|||
html = re.compile('<div class="NavHead.*?</div>').sub('',html) # removing nav stuff
|
||||
html = re.compile('<div class="NavContent.*?</div>').sub('',html) # removing nav stuff
|
||||
html = re.compile('<div class="NavEnd.*?</div>').sub('',html) # removing nav stuff
|
||||
html = re.compile('<div class="docnav.*?</div></div>').sub('',html) # removing docnav
|
||||
html = re.compile('<table id="toc.*?</table>').sub('',html) # removing toc
|
||||
html = re.compile('width=\"100%\" style=\"float: right; width: 230px; margin-left: 1em\"').sub('',html) # removing command box styling
|
||||
html = re.compile('<div class="docnav.*?</div>Wlinebreak</div>').sub('',html) # removing docnav
|
||||
html = re.compile('<div class="mw-pt-translate-header.*?</div>').sub('',html) # removing translations links
|
||||
if not GETTRANSLATIONS:
|
||||
html = re.compile('<div class="languages.*?</div>').sub('',html) # removing translations links
|
||||
|
@ -227,7 +226,7 @@ def cleanimagelinks(html,links=None):
|
|||
|
||||
def fetchpage(page):
|
||||
"retrieves given page from the wiki"
|
||||
print "fetching: ",page
|
||||
print " fetching: ",page
|
||||
failcount = 0
|
||||
while failcount < MAXFAIL:
|
||||
try:
|
||||
|
@ -243,22 +242,25 @@ def fetchimage(imagelink):
|
|||
print "Skipping file page link"
|
||||
return
|
||||
filename = re.findall('.*/(.*)',imagelink)[0]
|
||||
print "saving",filename
|
||||
if not exists(filename,image=True):
|
||||
failcount = 0
|
||||
while failcount < MAXFAIL:
|
||||
try:
|
||||
if VERBOSE: print "Fetching " + filename
|
||||
if VERBOSE: print " fetching " + filename
|
||||
data = (urlopen(webroot(URL) + imagelink).read())
|
||||
path = local(filename,image=True)
|
||||
file = open(path,'wb')
|
||||
file.write(data)
|
||||
file.close()
|
||||
processed.append(filename)
|
||||
return
|
||||
except:
|
||||
failcount += 1
|
||||
else:
|
||||
processed.append(filename)
|
||||
if VERBOSE: print " saving",local(filename,image=True)
|
||||
return
|
||||
print 'Error: unable to fetch file ' + filename
|
||||
else:
|
||||
if VERBOSE: print " skipping",filename
|
||||
|
||||
def local(page,image=False):
|
||||
"returns a local path for a given page/image"
|
||||
|
@ -281,13 +283,14 @@ def output(html,page):
|
|||
title = page.replace("_"," ")
|
||||
header = "<html><head>"
|
||||
header += "<title>" + title + "</title>"
|
||||
header += '<meta http-equiv="Content-Type" content="text/html; charset=utf-8">'
|
||||
header += "<link type='text/css' href='wiki.css' rel='stylesheet'>"
|
||||
header += "</head><body>"
|
||||
header += "<h1>" + title + "</h1>"
|
||||
footer = "</body></html>"
|
||||
html = header+html+footer
|
||||
filename = local(page.replace("/","-"))
|
||||
print "saving",filename
|
||||
print " saving",filename
|
||||
file = open(filename,'wb')
|
||||
file.write(html)
|
||||
file.close()
|
||||
|
|
Loading…
Reference in New Issue
Block a user