
Legacy __print__ statements are syntax errors in Python 3 but __print()__ function works as expected in both Python 2 and Python 3.
200 lines
8.0 KiB
Python
Executable File
200 lines
8.0 KiB
Python
Executable File
#!/usr/bin/env python
|
|
|
|
#***************************************************************************
|
|
#* *
|
|
#* Copyright (c) 2017 Yorik van Havre <yorik@uncreated.net> *
|
|
#* *
|
|
#* This program is free software; you can redistribute it and/or modify *
|
|
#* it under the terms of the GNU Lesser General Public License (LGPL) *
|
|
#* as published by the Free Software Foundation; either version 2 of *
|
|
#* the License, or (at your option) any later version. *
|
|
#* for detail see the LICENCE text file. *
|
|
#* *
|
|
#* This program is distributed in the hope that it will be useful, *
|
|
#* but WITHOUT ANY WARRANTY; without even the implied warranty of *
|
|
#* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
|
|
#* GNU Library General Public License for more details. *
|
|
#* *
|
|
#* You should have received a copy of the GNU Library General Public *
|
|
#* License along with this program; if not, write to the Free Software *
|
|
#* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 *
|
|
#* USA *
|
|
#* *
|
|
#***************************************************************************
|
|
|
|
from __future__ import print_function
|
|
__title__="update.py"
|
|
__author__ = "Yorik van Havre <yorik@uncreated.net>"
|
|
__url__ = "http://www.freecadweb.org"
|
|
|
|
"""
|
|
This script needs to be run after the wiki has been fully downloaded. It has three usages:
|
|
|
|
1) If no revisions.txt file is found, it parses the contents of the wikifiles.txt file
|
|
and, for each entry, it retrieves a corresponding revision ID, and creates a revisions.txt file
|
|
|
|
2) If a revisions.txt file exists but no update.txt file exists, it crawls through all entries of
|
|
wikifiles.txt, and for each one, compares the current revision with the one stored in revisions.txt.
|
|
An update.txt file is created with all pages that have different revision IDs
|
|
|
|
3) If update.txt exists, each entry of it will be scanned again for new links and all the needed
|
|
files downloaded. Revision.txt and wikifiles.txt get also updated.
|
|
"""
|
|
|
|
import sys, os, re, tempfile, getopt
|
|
from urllib2 import urlopen, HTTPError
|
|
|
|
# CONFIGURATION #################################################
|
|
|
|
URL = "https://www.freecadweb.org/wiki" #default URL if no URL is passed
|
|
GETTRANSLATIONS = False # Set true if you want to get the translations too.
|
|
MAXFAIL = 3 # max number of retries if download fails
|
|
VERBOSE = True # to display what's going on. Otherwise, runs totally silent.
|
|
|
|
# END CONFIGURATION ##############################################
|
|
|
|
wikiindex = "/index.php?title="
|
|
|
|
def update(pagename=None):
|
|
|
|
if not os.path.exists("revisions.txt"): # case 1)
|
|
if not os.path.exists("wikifiles.txt"):
|
|
print("No wikifiles.txt found. Aborting")
|
|
sys.exit()
|
|
pages = []
|
|
f = open("wikifiles.txt","r")
|
|
if VERBOSE: print("Reading existing list...")
|
|
for l in f.readlines():
|
|
if l.strip() != "":
|
|
if not "/wiki/" in l:
|
|
if VERBOSE: print("Adding ",l.strip())
|
|
pages.append(l.strip())
|
|
f.close()
|
|
if VERBOSE: print("Added ",str(len(pages))," entries")
|
|
i = 1
|
|
revs = []
|
|
for page in pages:
|
|
rev = getRevision(page)
|
|
if VERBOSE: print(str(i)," revision: ",rev)
|
|
revs.append(page+":"+rev)
|
|
i += 1
|
|
writeList(revs,"revisions.txt")
|
|
print("All done. Successfully written revisions.txt with ",len(revs)," entries.")
|
|
|
|
elif os.path.exists("revisions.txt") and (not os.path.exists("updates.txt")): # case 2)
|
|
f = open("revisions.txt","r")
|
|
if VERBOSE: print("Reading revisions list...")
|
|
revisions = {}
|
|
for l in f.readlines():
|
|
if l.strip() != "":
|
|
r = l.strip().split(":")
|
|
p = ":".join(r[:-1])
|
|
if VERBOSE: print("Adding ",p)
|
|
revisions[p] = r[1]
|
|
f.close()
|
|
if VERBOSE: print("Added ",str(len(revisions.keys()))," entries")
|
|
updates = []
|
|
i = 1
|
|
for page in revisions.keys():
|
|
rev = getRevision(page)
|
|
if rev != revisions[page]:
|
|
if VERBOSE: print(str(i),page," has a new revision: ",rev)
|
|
updates.append(page)
|
|
else:
|
|
if VERBOSE: print(str(i),page," is up to date ")
|
|
i += 1
|
|
if updates:
|
|
writeList(updates,"updates.txt")
|
|
print("All done. Successfully written updates.txt with ",len(updates)," entries.")
|
|
else:
|
|
print("Everything up to date. Nothing to be done.")
|
|
|
|
elif os.path.exists("revisions.txt") and os.path.exists("updates.txt"): # case 3)
|
|
if not os.path.exists("wikifiles.txt"):
|
|
print("No wikifiles.txt found. Aborting")
|
|
sys.exit()
|
|
wikifiles = []
|
|
f = open("wikifiles.txt","r")
|
|
if VERBOSE: print("Reading wikifiles list...")
|
|
for l in f.readlines():
|
|
if l.strip() != "":
|
|
wikifiles.append(l.strip())
|
|
f.close()
|
|
if VERBOSE: print("Read ",str(len(wikifiles))," entries")
|
|
f = open("revisions.txt","r")
|
|
if VERBOSE: print("Reading revisions list...")
|
|
revisions = {}
|
|
for l in f.readlines():
|
|
if l.strip() != "":
|
|
r = l.strip().split(":")
|
|
p = ":".join(r[:-1])
|
|
revisions[p] = r[1]
|
|
f.close()
|
|
todo = []
|
|
f = open("updates.txt","r")
|
|
if VERBOSE: print("Reading updates list...")
|
|
for l in f.readlines():
|
|
if l.strip() != "":
|
|
todo.append(l.strip())
|
|
f.close()
|
|
if VERBOSE: print(str(len(todo))," pages to scan...")
|
|
import buildwikiindex
|
|
buildwikiindex.WRITETHROUGH = False
|
|
buildwikiindex.VERBOSE = VERBOSE
|
|
updates = []
|
|
for t in todo:
|
|
if VERBOSE: print("Scanning ",t)
|
|
updates.extend(buildwikiindex.crawl(t))
|
|
updates = [u for u in updates if not u in wikifiles]
|
|
if VERBOSE: print(str(len(updates))," files to download...")
|
|
import downloadwiki
|
|
i = 1
|
|
for u in updates:
|
|
if VERBOSE: print(i, ": Fetching ", u)
|
|
downloadwiki.get(u)
|
|
if not "/wiki/" in u:
|
|
rev = getRevision(u)
|
|
revisions[u] = rev
|
|
if not u in wikifiles:
|
|
wikifiles.append(u)
|
|
i += 1
|
|
if VERBOSE: print("Updating wikifiles and revisions...")
|
|
writeList(wikifiles,"wikifiles.txt")
|
|
updatedrevs = []
|
|
for k in revisions.keys():
|
|
updatedrevs.append(k+":"+revisions[k])
|
|
writeList(updatedrevs,"revisions.txt")
|
|
os.remove("updates.txt")
|
|
if VERBOSE: print("All done!")
|
|
|
|
def getRevision(page):
|
|
html = fetchPage(page)
|
|
revs = re.findall("wgCurRevisionId\"\:(.*?),",html)
|
|
if len(revs) == 1:
|
|
return revs[0]
|
|
print('Error: unable to get revision ID of ' + page)
|
|
sys.exit()
|
|
|
|
def fetchPage(page):
|
|
"retrieves given page from the wiki"
|
|
print("fetching: ",page)
|
|
failcount = 0
|
|
while failcount < MAXFAIL:
|
|
try:
|
|
html = (urlopen(URL + wikiindex + page).read())
|
|
return html
|
|
except HTTPError:
|
|
failcount += 1
|
|
print('Error: unable to fetch page ' + page)
|
|
sys.exit()
|
|
|
|
def writeList(pages,filename):
|
|
f = open(filename,"wb")
|
|
for p in pages:
|
|
f.write(p+"\n")
|
|
f.close()
|
|
if VERBOSE: print("written ",filename)
|
|
|
|
if __name__ == "__main__":
|
|
update(sys.argv[1:])
|