Further adjustments in offline doc tools

2014-04-11 18:21:29 -03:00 · 2014-04-11 18:21:29 -03:00 · 1b08c704ea
commit 1b08c704ea
parent 56935813e5
3 changed files with 97 additions and 31 deletions
--- a/src/Tools/offlinedoc/buildpdf.py
+++ b/src/Tools/offlinedoc/buildpdf.py
@ -30,15 +30,17 @@ __url__ = "http://www.freecadweb.org"
 This script builds a pdf file from a local copy of the wiki
 """

-import sys, os, re, tempfile, getopt
+import sys, os, re, tempfile, getopt, shutil, time
 from urllib2 import urlopen, HTTPError

 #    CONFIGURATION       #################################################

 INDEX = "Online_Help_Toc" # the start page from where to crawl the wiki
-REMOVE = True # if true, the temp html files are removed after successful operation
-PDFCONVERTOR = 'wkhtmltopdf' # can be 'pisa', 'htmldoc' or 'wkhtmltopdf'
-VERBOSE = True
+PDFCONVERTOR = 'firefox' # can be 'pisa', 'htmldoc', 'wkhtmltopdf' or 'firefox'
+VERBOSE = True # set true to get output messages
+INCLUDECOMMANDS = True # if true, the command pages of each workbench are included after each WB page
+OVERWRITE = True # if true, pdf files are recreated even if already existing
+FIREFOXPDFFOLDER = os.path.expanduser("~")+os.sep+"PDF" # if firefox is used, set this to where it places its pdf files by default

 #    END CONFIGURATION      ##############################################

@ -78,14 +80,26 @@ def crawl():
 def buildpdffiles():
    "scans a folder for html files and converts them all to pdf"
    templist = os.listdir(FOLDER)
+    if PDFCONVERTOR == 'wkhtmltopdf':
+        makeStyleSheet()
+    global fileslist
    fileslist = []
    for i in templist:
        if i[-5:] == '.html':
            fileslist.append(i)
+    print "converting ",len(fileslist)," pages"
+    i = 1
    for f in fileslist:
-        if PDFCONVERTOR == 'pisa': createpdf_pisa(f[:-5])
-        elif PDFCONVERTOR == 'wkhtmltopdf': createpdf_wkhtmltopdf(f[:-5])
-        else: createpdf_htmldoc(f[:-5])
+        print i," : ",f
+        if PDFCONVERTOR == 'pisa':
+            createpdf_pisa(f[:-5])
+        elif PDFCONVERTOR == 'wkhtmltopdf': 
+            createpdf_wkhtmltopdf(f[:-5])
+        elif PDFCONVERTOR == 'firefox': 
+            createpdf_firefox(f[:-5])
+        else: 
+            createpdf_htmldoc(f[:-5])
+        i += 1

 def fetch_resources(uri, rel):
        """
@ -101,28 +115,45 @@ def fetch_resources(uri, rel):
 def createpdf_pisa(pagename):
    "creates a pdf file from a saved page using pisa (python module)"
    import ho.pisa as pisa
-    if not exists(pagename+".pdf",image=True):
+    if (not exists(pagename+".pdf",image=True)) or OVERWRTIE:
        infile = file(FOLDER + os.sep + pagename+'.html','ro')
        outfile = file(FOLDER + os.sep + pagename+'.pdf','wb')
        if VERBOSE: print "Converting " + pagename + " to pdf..."
        pdf = pisa.CreatePDF(infile,outfile,FOLDER,link_callback=fetch_resources)
        outfile.close()
-        if pdf.err: return pdf.err
+        if pdf.err: 
+            return pdf.err
        return 0

+def createpdf_firefox(pagename):
+    "creates a pdf file from a saved page using firefox (needs command line printing extension)"
+    # the default printer will be used, so make sure it is set to pdf
+    # command line printing extension http://forums.mozillazine.org/viewtopic.php?f=38&t=2729795
+    if (not exists(pagename+".pdf",image=True)) or OVERWRITE:
+        infile = FOLDER + os.sep + pagename+'.html'
+        outfile = FOLDER + os.sep + pagename+'.pdf'
+        return os.system('firefox -print ' + infile)
+        time.sleep(6)
+        if os.path.exists(FIREFOXPDFFOLDER + os.sep + pagename + ".pdf"):
+            shutil.move(FIREFOXPDFFOLDER+os.sep+pagename+".pdf",outfile)
+        else:
+            print "-----------------------------------------> Couldn't find print output!"
+
 def createpdf_htmldoc(pagename):
    "creates a pdf file from a saved page using htmldoc (external app, but supports images)"
-    if not exists(pagename+".pdf",image=True):
+    if (not exists(pagename+".pdf",image=True)) or OVERWRITE:
        infile = FOLDER + os.sep + pagename+'.html'
        outfile = FOLDER + os.sep + pagename+'.pdf'
        return os.system('htmldoc --webpage --textfont sans --browserwidth 840 -f '+outfile+' '+infile)

 def createpdf_wkhtmltopdf(pagename):
    "creates a pdf file from a saved page using htmldoc (external app, but supports images)"
-    if not exists(pagename+".pdf",image=True):
+    if (not exists(pagename+".pdf",image=True)) or OVERWRTIE:
        infile = FOLDER + os.sep + pagename+'.html'
        outfile = FOLDER + os.sep + pagename+'.pdf'
-        return os.system('wkhtmltopdf '+infile+' '+outfile)
+        return os.system('wkhtmltopdf --user-style-sheet '+FOLDER+os.sep+'wkhtmltppdf.css '+infile+' '+outfile)
+    else:
+        print "skipping"

 def joinpdf():
    "creates one pdf file from several others, following order from startpage"
@ -140,16 +171,38 @@ def joinpdf():
    result = PdfFileWriter()
    for p in pages:
        if exists(p[:-5]):
-            if VERBOSE: print 'Appending',p
-            try: inputfile = PdfFileReader(file(FOLDER+os.sep+p[:-5]+'.pdf','rb'))
-            except: print 'Unable to append',p
+            if VERBOSE: print 'Appending',p[:-5]+'.pdf'
+            try: 
+                inputfile = PdfFileReader(file(FOLDER+os.sep+p[:-5]+'.pdf','rb'))
+            except: 
+                print 'Unable to append',p
            else:
                for i in range(inputfile.getNumPages()):
                    result.addPage(inputfile.getPage(i))
-    outputfile = file("freecad.pdf",'wb')
+                if INCLUDECOMMANDS:
+                    if ("_Workbench.html" in p) or ("_Module.html" in p):
+                        mod = [p.split("_")[0]]
+                        if mod[0] == "PartDesign":
+                            mod.append("Constraint")
+                        for m in mod:
+                            for f in fileslist:
+                                if f[:len(m)+1] == m+"_":
+                                    if (not("Module" in f)) and (not("Workbench" in f)) and (not("Scripting" in f)) and (not("API" in f)):
+                                        if VERBOSE: print '    Appending',f[:-5]+'.pdf'
+                                        try:
+                                            inputfile = PdfFileReader(file(FOLDER+os.sep+f[:-5]+'.pdf','rb'))
+                                        except: 
+                                            print 'Unable to append',f
+                                        else:
+                                            for i in range(inputfile.getNumPages()):
+                                                result.addPage(inputfile.getPage(i))
+    if VERBOSE: print "Writing..."
+    outputfile = file(FOLDER+os.sep+"freecad.pdf",'wb')
    result.write(outputfile)
    outputfile.close()
-    if VERBOSE: print 'Successfully created freecad.pdf'
+    if VERBOSE: 
+        print ' '
+        print 'Successfully created '+FOLDER+os.sep+'freecad.pdf'

 def local(page,image=False):
    "returns a local path for a given page/image"
@ -163,6 +216,16 @@ def exists(page,image=False):
    path = local(page,image)
    if os.path.exists(path): return True
    return False
+
+def makeStyleSheet():
+    "Creates a stylesheet for wkhtmltopdf"
+    outputfile = file(FOLDER+os.sep+"wkhtmltopdf.css",'wb')
+    outputfile.write("""
+html {
+    margin: 20px 0;
+}
+""")
+    outputfile.close()
    
 if __name__ == "__main__":
 	crawl()
--- a/src/Tools/offlinedoc/buildqhelp.py
+++ b/src/Tools/offlinedoc/buildqhelp.py
@ -40,7 +40,7 @@ INDEX = "Online_Help_Toc" # the start page from where to crawl the wiki
 VERBOSE = True # to display what's going on. Otherwise, runs totally silent.
 QHELPCOMPILER = 'qhelpgenerator'
 QCOLLECTIOMGENERATOR = 'qcollectiongenerator'
-RELEASE = '0.13'
+RELEASE = '0.14'

 #    END CONFIGURATION      ##############################################

--- a/src/Tools/offlinedoc/downloadwiki.py
+++ b/src/Tools/offlinedoc/downloadwiki.py
@ -97,10 +97,6 @@ a:hover {
  font-size: 0.8em;
  }

-#toc,.docnav {
-  display: none;
-  }
-
 .ct, .ctTitle, .ctOdd, .ctEven th {
  text-align: left;
  width: 200px;
@ -111,6 +107,7 @@ a:hover {

 def crawl():
    "downloads an entire wiki site"
+    global processed
    processed = []
    if VERBOSE: print "crawling ", URL, ", saving in ", FOLDER
    if not os.path.isdir(FOLDER): os.mkdir(FOLDER)
@ -141,7 +138,6 @@ def crawl():
 def get(page):
    "downloads a single page, returns the other pages it links to"
    if page[-4:] in [".png",".jpg",".svg",".gif","jpeg"]:
-        print "getting image",page
        fetchimage(page)
    elif not exists(page):
        html = fetchpage(page)
@ -151,7 +147,7 @@ def get(page):
        html = cleanimagelinks(html)
        output(html,page)
    else:
-        if VERBOSE: print "skipping",page
+        if VERBOSE: print "    skipping",page

 def getlinks(html):
    "returns a list of wikipage links in html file"
@ -183,6 +179,7 @@ def getimagelinks(html):
 def cleanhtml(html):
    "cleans given html code from dirty script stuff"
    html = html.replace('\n','Wlinebreak') # removing linebreaks for regex processing
+    html = html.replace('\t','') # removing tab marks
    html = re.compile('(.*)<div id=\"content+[^>]+>').sub('',html) # stripping before content
    html = re.compile('<div id="mw-head+[^>]+>.*').sub('',html) # stripping after content
    html = re.compile('<!--[^>]+-->').sub('',html) # removing comment tags
@ -195,7 +192,9 @@ def cleanhtml(html):
    html = re.compile('<div class="NavHead.*?</div>').sub('',html) # removing nav stuff
    html = re.compile('<div class="NavContent.*?</div>').sub('',html) # removing nav stuff
    html = re.compile('<div class="NavEnd.*?</div>').sub('',html) # removing nav stuff
-    html = re.compile('<div class="docnav.*?</div></div>').sub('',html) # removing docnav
+    html = re.compile('<table id="toc.*?</table>').sub('',html) # removing toc
+    html = re.compile('width=\"100%\" style=\"float: right; width: 230px; margin-left: 1em\"').sub('',html) # removing command box styling
+    html = re.compile('<div class="docnav.*?</div>Wlinebreak</div>').sub('',html) # removing docnav
    html = re.compile('<div class="mw-pt-translate-header.*?</div>').sub('',html) # removing translations links
    if not GETTRANSLATIONS:
        html = re.compile('<div class="languages.*?</div>').sub('',html) # removing translations links
@ -227,7 +226,7 @@ def cleanimagelinks(html,links=None):

 def fetchpage(page):
    "retrieves given page from the wiki"
-    print "fetching: ",page
+    print "    fetching: ",page
    failcount = 0
    while failcount < MAXFAIL:
        try:
@ -243,22 +242,25 @@ def fetchimage(imagelink):
        print "Skipping file page link"
        return
    filename = re.findall('.*/(.*)',imagelink)[0]
-    print "saving",filename
    if not exists(filename,image=True):
        failcount = 0
        while failcount < MAXFAIL:
            try:
-                if VERBOSE: print "Fetching " + filename
+                if VERBOSE: print "    fetching " + filename
                data = (urlopen(webroot(URL) + imagelink).read())
                path = local(filename,image=True)
                file = open(path,'wb')
                file.write(data)
                file.close()
-                processed.append(filename)
-                return
            except:
                failcount += 1
+            else:
+                processed.append(filename)
+                if VERBOSE: print "    saving",local(filename,image=True)
+                return
        print 'Error: unable to fetch file ' + filename
+    else:
+        if VERBOSE: print "    skipping",filename

 def local(page,image=False):
    "returns a local path for a given page/image"
@ -281,13 +283,14 @@ def output(html,page):
    title = page.replace("_"," ")
    header = "<html><head>"
    header += "<title>" + title + "</title>"
+    header += '<meta http-equiv="Content-Type" content="text/html; charset=utf-8">'
    header += "<link type='text/css' href='wiki.css' rel='stylesheet'>"
    header += "</head><body>"
    header += "<h1>" + title + "</h1>"
    footer = "</body></html>"
    html = header+html+footer
    filename = local(page.replace("/","-"))
-    print "saving",filename
+    print "    saving",filename
    file = open(filename,'wb')
    file.write(html)
    file.close()