update of xlsx-importer: new parser,

trigonomic functions give the same result in the FreeCAD-spreadsheet,
support of celltype "inlineStr".
This commit is contained in:
Ulrich Brammer 2017-02-19 22:56:51 +01:00 committed by wmayer
parent f55635900f
commit 693f4c759e

View File

@ -30,21 +30,27 @@ __url__ = ["http://www.freecadweb.org"]
'''
This library imports an Excel-XLSX-file into FreeCAD.
Version 1.1, Nov. 2016:
Changed parser, adds rad-unit to trigonometric functions in order
to give the same result in FreeCAD.
Added factor to arcus-function in order to give the same result in FreeCAD
Added support for celltype "inlineStr"
Version 1.0:
It uses a minimal parser, in order to translate the IF-function into
the different FreeCAD version.
The other function-names are translated by search and replace.
Version 1.0:
Features:
- Imports tables defined inside Excel-document
- Set alias definitions
- Translate formulas known by FreeCAD. (see funcDic + IF)
- Translate formulas known by FreeCAD. (see tokenDic as by version 1.1)
- set cross table references
- strings are imported
- references to cells with strings are working
known issues:
- units are not imported
- string support is minimal, as it is so in FreeCAD
- string support is minimal, the same as in FreeCAD
'''
@ -60,39 +66,12 @@ if open.__module__ == '__builtin__':
pythonopen = open
funcDic = {
'ABS(':'abs(',
'ACOS(':'acos(',
'ASIN(':'asin(',
'ATAN(':'atan(',
'ATAN2(':'atan2(',
'COS(':'cos(',
'COSH(':'cosh(',
'EXP(':'exp(',
'LOG(':'log(',
'LOG10(':'log10(',
'MOD(':'mod(',
'POWER(':'pow(',
'SIN(':'sin(',
'SINH(':'sinh(',
'SQRT(':'sqrt(',
'TAN(':'tan(',
'TANH(':'tanh(',
'AVERAGE(':'average(',
'COUNT(':'count(',
'MAX(':'max(',
'MIN(':'min(',
'STDEVA(':'stddev(',
'SUM(':'sum(',
'PI()':'pi'
}
# The treeToken structure is used in the tokenizer functions isKey and
# The sepToken structure is used in the tokenizer functions isKey and
# getNextToken.
# treeToken defines a search tree for tokens with length of 1 to 3 characters
treeToken = {
# sepToken defines a search tree for separator tokens with length of 1 to 3 characters
# it is also used as a list of separators between other tokens.
sepToken = {
'(':None,
'I':'branchI',
'=':None,
'<':'branchLower',
'>':'branchHigher',
@ -100,12 +79,14 @@ treeToken = {
# '"':None,
# ';':None,
' ':None,
',':None,
'!':None
',':None, # Separator on lists
'!':None, #Connector to cells on other Sheets
'+':None,
'-':None,
'*':None,
'/':None,
'^':None
}
branchI = {'F':'branchF'}
branchF = {'(':None}
branchLower ={
'>':None,
@ -117,8 +98,6 @@ branchHigher = {'=':None}
# Needed to get a reference from a string to a dict
treeDict = {
'branchI':branchI,
'branchF':branchF,
'branchLower':branchLower,
'branchHigher':branchHigher
}
@ -127,68 +106,95 @@ treeDict = {
# The tokenDic contains the following information:
# levelchange: -1: tree down, 0, +1: tree up
# replacement token
# special token list
# function-state: needed to do something special in the parser
# 0 = normal, 1 = the pi-case, 2 = angle-function,
# 3 = IF-function, 4 = IF-truecase, 5 IF-falsecase
tokenDic = {
'(' :( 1, '(', None),
'IF(':( 1, '(', None),
'=' :( 0 ,'==', None),
'<>' :( 0 ,'!=', None),
'>=' :( 0 ,'>=', None),
'<=' :( 0 ,'<=', None),
'<' :( 0 ,'<', None),
'>' :( 0 ,'>', None),
# ';' :( 0 ,';', ['?',':']),
',' :( 0 ,',', ['?',':']),
')' :(-1 ,')', None),
'!' :( 0 ,'.', None) #Connector to cells on other Sheets
# '"' :( 2 ,'', None)
'(' :( 1, '(', 0),
'=' :( 0 ,'==', 0),
'<>' :( 0 ,'!=', 0),
'>=' :( 0 ,'>=', 0),
'<=' :( 0 ,'<=', 0),
'<' :( 0 ,'<', 0),
'>' :( 0 ,'>', 0),
',' :( 0 ,',', 0),
')' :(-1 ,')', 0),
'!' :( 0 ,'.', 0), #Connector to cells on other Sheets
# '"' :( 2 ,'', 0),
'+' :( 0 ,'+', 0),
'-' :( 0 ,'-', 0),
'*' :( 0 ,'*', 0),
'/' :( 0 ,'/', 0),
'^' :( 0 ,'^', 0),
'IF' :( 0, '', 3),
'ABS' :( 0, 'abs', 0),
'ACOS' :( 0, 'pi/180deg*acos', 0),
'ASIN' :( 0, 'pi/180deg*asin', 0),
'ATAN' :( 0, 'pi/180deg*atan', 0),
'ATAN2':( 0, 'pi/180deg*atan2',0),
'COS' :( 0, 'cos', 2),
'COSH' :( 0, 'cosh', 2),
'EXP' :( 0, 'exp', 0),
'LOG' :( 0, 'log', 0),
'LOG10':( 0, 'log10',0),
'MOD' :( 0, 'mod', 0),
'POWER':( 0, 'pow', 0),
'SIN' :( 0, 'sin', 2),
'SINH' :( 0, 'sinh', 2),
'SQRT' :( 0, 'sqrt', 0),
'TAN' :( 0, 'tan', 2),
'TANH' :( 0, 'tanh', 2),
'AVERAGE':( 0, 'average', 0),
'COUNT':( 0, 'count',0),
'MAX' :( 0, 'max', 0),
'MIN' :( 0, 'min', 0),
'STDEVA':( 0, 'stddev',0),
'SUM' :( 0, 'sum', 0),
'PI' :( 0, 'pi', 1)
}
class exprNode(object):
''' This defines a tree class for expression parsing'''
def __init__(self, parent, state):
self.state = state #see comment: State machine for expression parsing
''' This defines a tree class for expression parsing.
A tree is build, to step down into the levels of the expression.'''
def __init__(self, parent, state, actIndex):
self.state = state #see comment: State used for Angle-functions and IF-function
self.parent = parent # Parent tree node
self.lIndex = actIndex # Index to the list of tokens
self.result = ''
class FormulaTranslator(object):
''' This class tranlates a cell-formula from Excel to FreeCAD.'''
def __init__(self):
self.theTList = ['=']
self.tokenList = ['=']
def translateForm(self, actExpr):
self.getNextToken(actExpr)
#print "tokenList: ", self.theTList
self.resultTree = exprNode(None, 5)
self.resultTree.result = self.resultTree.result + self.theTList[0]
self.parseExpr(self.theTList, 1, self.resultTree)
#print 'parseResult: ', self.resultTree.result
return self.replaceFunc(self.resultTree.result)
#print("tokenList: ", self.tokenList)
self.resultTree = exprNode(None, 0, 1)
self.resultTree.result = self.tokenList[0]
self.parseExpr(self.resultTree)
#print('parseResult: ', self.resultTree.result)
return self.resultTree.result
def replaceFunc(self, cellFormula):
for funcKey in funcDic:
if funcKey in cellFormula:
cellFormula = cellFormula.replace(funcKey, funcDic[funcKey])
return cellFormula
def getNextToken(self, theExpr):
#print 'next Token theExpr: ', theExpr
#print 'actTList: ', self.theTList
''' This is the recursive tokenizer for an excel formula.
It appends all identified tokens to self.tokenList.'''
#print('next Token theExpr: ', theExpr)
#print('actTList: ', self.tokenList)
tokenComplete = False
keyToken = False
if len(theExpr)>0:
theTok = theExpr[0]
theExpr = theExpr[1:]
if theTok in treeToken:
if theTok in sepToken:
keyToken = True
branch = treeToken[theTok]
branch = sepToken[theTok]
while branch:
#print branch, ' theExpr[0]: ',theExpr[0]
#print(branch, ' theExpr[0]: ',theExpr[0])
if theExpr[0] in treeDict[branch]:
branch = treeDict[branch][theExpr[0]]
theTok = theTok + theExpr[0]
@ -196,7 +202,7 @@ class FormulaTranslator(object):
else:
branch= None
tokenComplete = True
self.theTList.append(theTok)
self.tokenList.append(theTok)
self.getNextToken(theExpr)
else:
if len(theExpr)>0:
@ -208,21 +214,21 @@ class FormulaTranslator(object):
tokenComplete = True
else:
tokenComplete = True
self.theTList.append(theTok)
self.tokenList.append(theTok)
self.getNextToken(theExpr)
def isKey(self, theExpr):
#print 'look up: ', theExpr
#print('look up: ', theExpr)
keyToken = False
lenExpr = len(theExpr)
if theExpr[0] in treeToken:
branch = treeToken[theExpr[0]]
if theExpr[0] in sepToken:
branch = sepToken[theExpr[0]]
if branch == None:
keyToken = True
else:
#print 'There is a branch. look up: ', theExpr[1]
#print('There is a branch. look up: ', theExpr[1])
if (lenExpr > 1) and (theExpr[1] in treeDict[branch]):
branch = treeDict[branch][theExpr[0]]
if branch == None:
@ -235,60 +241,79 @@ class FormulaTranslator(object):
return keyToken
# State machine for expression parsing
# 0 in ifsubexpression
# 1 in conditional
# 2 in truecase
# 3 in falsecase
# 4 in subexpression
# 5 toplevel '='
def parseExpr(self, tokenList, index, theTree):
token = tokenList[index]
#print 'state: ', theTree.state, ' ', token
nextIdx = index + 1
def parseExpr(self, treeNode):
token = self.tokenList[treeNode.lIndex]
treeNode.lIndex += 1
if token in tokenDic:
lChange, newToken, specialList = tokenDic[token]
lChange, newToken, funcState = tokenDic[token]
else:
lChange = 0
newToken = token
specialList = None
if lChange == 1:
theTree.result = theTree.result + newToken
if token == '(':
state = 4
else:
state = 1
newNode = exprNode(theTree, state)
self.parseExpr(tokenList, nextIdx, newNode)
else:
if lChange == 0:
if theTree.state > 2:
theTree.result = theTree.result + newToken
else:
if (theTree.state == 1):
if specialList:
theTree.result = theTree.result + specialList[0]
theTree.state = 2
else:
theTree.result = theTree.result + newToken
else:
if (theTree.state == 2):
if specialList:
theTree.result = theTree.result + specialList[1]
theTree.state = 3
else:
theTree.result = theTree.result + newToken
if nextIdx < len(tokenList):
self.parseExpr(tokenList, nextIdx, theTree)
else:
theTree.parent.result = theTree.parent.result + theTree.result + newToken
if nextIdx < len(tokenList):
self.parseExpr(tokenList, nextIdx, theTree.parent)
funcState = 0
#print('treeNode.state: ', treeNode.state, ' my.index: ', treeNode.lIndex-1, ' ', token, ' fState: ', funcState)
# End of Formula Translator
if token == ',':
if (treeNode.state == 4):
newToken = ':'
treeNode.state = 6
if (treeNode.state == 3):
newToken = '?'
treeNode.state = 4
if funcState == 3:
funcState = 0
newNode = exprNode(treeNode, 3, treeNode.lIndex)
self.parseIF(newNode)
else:
treeNode.result = treeNode.result + newToken
if funcState == 2:
funcState = 0
newNode = exprNode(treeNode, 2, treeNode.lIndex)
self.parseAngle(newNode)
treeNode.result = treeNode.result + ')'
elif funcState == 1:
treeNode.lIndex += 2 # do skip the 2 parentheses of the PI()
if lChange == -1:
#print 'state: ', treeNode.state, 'parent.result: ', treeNode.parent.result, ' mine: ', treeNode.result
treeNode.parent.result = treeNode.parent.result + treeNode.result
treeNode.parent.lIndex = treeNode.lIndex
#print('Go one level up, state: ', treeNode.state)
if (treeNode.state < 2):
#print(' Look up more token above')
if treeNode.lIndex < len(self.tokenList):
self.parseExpr(treeNode.parent)
elif lChange == 1:
#print('Go one level down')
newNode = exprNode(treeNode, 1, treeNode.lIndex)
self.parseExpr(newNode)
treeNode.lIndex = newNode.lIndex
else:
if treeNode.lIndex < len(self.tokenList):
#print('parse to the end')
self.parseExpr(treeNode)
def parseIF(self, treeNode):
#print('IF state: ', treeNode.state)
treeNode.result = treeNode.result + '('
treeNode.lIndex += 1
self.parseExpr(treeNode)
#print('IF result: ', treeNode.result)
return
def parseAngle(self, treeNode):
#print('Angle state: ', treeNode.state)
treeNode.result = treeNode.result + '(1rad*('
treeNode.lIndex += 1
self.parseExpr(treeNode)
#print('angle result: ', treeNode.result)
def getText(nodelist):
rc = []
@ -316,37 +341,50 @@ def handleCells(cellList, actCellSheet, sList):
else:
cellType = 'n' # fix me some cells dont have t and s attributes
#print "reference: ", ref, ' Cell type: ', cellType
#print("reference: ", ref, ' Cell type: ', cellType)
if cellType == 'inlineStr':
iStringList = cell.getElementsByTagName("is")
#print('iString: ', iStringList)
for stringEle in iStringList:
tElement = stringEle.getElementsByTagName('t')[0]
theString = getText(tElement.childNodes)
#print('theString: ', theString)
actCellSheet.set(ref, theString.encode('utf8'))
formulaRef = cell.getElementsByTagName("f")
if len(formulaRef)==1:
theFormula = getText(formulaRef[0].childNodes)
#print "theFormula: ", theFormula
#print("theFormula: ", theFormula)
fTrans = FormulaTranslator()
actCellSheet.set(ref, fTrans.translateForm(theFormula))
else:
valueRef = cell.getElementsByTagName("v")[0]
if valueRef:
theValue = getText(valueRef.childNodes)
#print "theValue: ", theValue
if cellType == 'n':
actCellSheet.set(ref, theValue)
if cellType == 's':
actCellSheet.set(ref, (sList[int(theValue)]).encode('utf8'))
valueRef = cell.getElementsByTagName("v")
#print('valueRef: ', valueRef)
if len(valueRef)==1:
valueRef = cell.getElementsByTagName("v")[0]
if valueRef:
theValue = getText(valueRef.childNodes)
#print("theValue: ", theValue)
if cellType == 'n':
actCellSheet.set(ref, theValue)
if cellType == 's':
actCellSheet.set(ref, (sList[int(theValue)]).encode('utf8'))
def handleWorkBook(theBook, sheetDict, Doc):
theSheets = theBook.getElementsByTagName("sheet")
#print "theSheets: ", theSheets
#print("theSheets: ", theSheets)
for sheet in theSheets:
sheetAtts = sheet.attributes
nameRef = sheetAtts.getNamedItem("name")
sheetName = getText(nameRef.childNodes)
#print "table name: ", sheetName
#print("table name: ", sheetName)
idRef = sheetAtts.getNamedItem("sheetId")
sheetFile = "sheet" + getText(idRef.childNodes) + '.xml'
#print "sheetFile: ", sheetFile
#print("sheetFile: ", sheetFile)
# add FreeCAD-spreadsheet
sheetDict[sheetName] = (Doc.addObject('Spreadsheet::Sheet', sheetName), sheetFile)
@ -355,15 +393,15 @@ def handleWorkBook(theBook, sheetDict, Doc):
aliAtts = theAlias.attributes
nameRef = aliAtts.getNamedItem("name")
aliasName = getText(nameRef.childNodes)
#print "aliasName: ", aliasName
#print("aliasName: ", aliasName)
aliasRef = getText(theAlias.childNodes)
if '$' in aliasRef:
refList = aliasRef.split('!$')
adressList = refList[1].split('$')
#print "aliasRef: ", aliasRef
#print 'Sheet Name: ', refList[0]
#print 'Adress: ', adressList[0] + adressList[1]
#print("aliasRef: ", aliasRef)
#print('Sheet Name: ', refList[0])
#print('Adress: ', adressList[0] + adressList[1])
actSheet, sheetFile = sheetDict[refList[0]]
actSheet.setAlias(adressList[0]+adressList[1], aliasName.encode('utf8'))
@ -396,7 +434,7 @@ def open(nameXLSX):
theStrings.unlink()
for sheetSpec in sheetDict:
#print "sheetSpec: ", sheetSpec
#print("sheetSpec: ", sheetSpec)
theSheet, sheetFile = sheetDict[sheetSpec]
f=z.open('xl/worksheets/' + sheetFile)
myDom = xml.dom.minidom.parse(f)
@ -434,7 +472,7 @@ def insert(nameXLSX,docname):
theStrings.unlink()
for sheetSpec in sheetDict:
#print "sheetSpec: ", sheetSpec
#print("sheetSpec: ", sheetSpec)
theSheet, sheetFile = sheetDict[sheetSpec]
f=z.open('xl/worksheets/' + sheetFile)
myDom = xml.dom.minidom.parse(f)