From 693f4c759e353267bc4007a6da79d94f7e0b764a Mon Sep 17 00:00:00 2001 From: Ulrich Brammer Date: Sun, 19 Feb 2017 22:56:51 +0100 Subject: [PATCH] update of xlsx-importer: new parser, trigonomic functions give the same result in the FreeCAD-spreadsheet, support of celltype "inlineStr". --- src/Mod/Spreadsheet/importXLSX.py | 344 +++++++++++++++++------------- 1 file changed, 191 insertions(+), 153 deletions(-) diff --git a/src/Mod/Spreadsheet/importXLSX.py b/src/Mod/Spreadsheet/importXLSX.py index 4300186c4..7930f4aaa 100644 --- a/src/Mod/Spreadsheet/importXLSX.py +++ b/src/Mod/Spreadsheet/importXLSX.py @@ -30,21 +30,27 @@ __url__ = ["http://www.freecadweb.org"] ''' This library imports an Excel-XLSX-file into FreeCAD. +Version 1.1, Nov. 2016: +Changed parser, adds rad-unit to trigonometric functions in order +to give the same result in FreeCAD. +Added factor to arcus-function in order to give the same result in FreeCAD +Added support for celltype "inlineStr" + +Version 1.0: It uses a minimal parser, in order to translate the IF-function into the different FreeCAD version. The other function-names are translated by search and replace. - -Version 1.0: +Features: - Imports tables defined inside Excel-document - Set alias definitions -- Translate formulas known by FreeCAD. (see funcDic + IF) +- Translate formulas known by FreeCAD. (see tokenDic as by version 1.1) - set cross table references - strings are imported - references to cells with strings are working known issues: - units are not imported -- string support is minimal, as it is so in FreeCAD +- string support is minimal, the same as in FreeCAD ''' @@ -60,39 +66,12 @@ if open.__module__ == '__builtin__': pythonopen = open -funcDic = { - 'ABS(':'abs(', - 'ACOS(':'acos(', - 'ASIN(':'asin(', - 'ATAN(':'atan(', - 'ATAN2(':'atan2(', - 'COS(':'cos(', - 'COSH(':'cosh(', - 'EXP(':'exp(', - 'LOG(':'log(', - 'LOG10(':'log10(', - 'MOD(':'mod(', - 'POWER(':'pow(', - 'SIN(':'sin(', - 'SINH(':'sinh(', - 'SQRT(':'sqrt(', - 'TAN(':'tan(', - 'TANH(':'tanh(', - 'AVERAGE(':'average(', - 'COUNT(':'count(', - 'MAX(':'max(', - 'MIN(':'min(', - 'STDEVA(':'stddev(', - 'SUM(':'sum(', - 'PI()':'pi' - } - -# The treeToken structure is used in the tokenizer functions isKey and +# The sepToken structure is used in the tokenizer functions isKey and # getNextToken. -# treeToken defines a search tree for tokens with length of 1 to 3 characters -treeToken = { +# sepToken defines a search tree for separator tokens with length of 1 to 3 characters +# it is also used as a list of separators between other tokens. +sepToken = { '(':None, - 'I':'branchI', '=':None, '<':'branchLower', '>':'branchHigher', @@ -100,12 +79,14 @@ treeToken = { # '"':None, # ';':None, ' ':None, - ',':None, - '!':None + ',':None, # Separator on lists + '!':None, #Connector to cells on other Sheets + '+':None, + '-':None, + '*':None, + '/':None, + '^':None } - -branchI = {'F':'branchF'} -branchF = {'(':None} branchLower ={ '>':None, @@ -117,8 +98,6 @@ branchHigher = {'=':None} # Needed to get a reference from a string to a dict treeDict = { - 'branchI':branchI, - 'branchF':branchF, 'branchLower':branchLower, 'branchHigher':branchHigher } @@ -127,68 +106,95 @@ treeDict = { # The tokenDic contains the following information: # levelchange: -1: tree down, 0, +1: tree up # replacement token -# special token list +# function-state: needed to do something special in the parser +# 0 = normal, 1 = the pi-case, 2 = angle-function, +# 3 = IF-function, 4 = IF-truecase, 5 IF-falsecase + tokenDic = { - '(' :( 1, '(', None), - 'IF(':( 1, '(', None), - '=' :( 0 ,'==', None), - '<>' :( 0 ,'!=', None), - '>=' :( 0 ,'>=', None), - '<=' :( 0 ,'<=', None), - '<' :( 0 ,'<', None), - '>' :( 0 ,'>', None), -# ';' :( 0 ,';', ['?',':']), - ',' :( 0 ,',', ['?',':']), - ')' :(-1 ,')', None), - '!' :( 0 ,'.', None) #Connector to cells on other Sheets -# '"' :( 2 ,'', None) + '(' :( 1, '(', 0), + '=' :( 0 ,'==', 0), + '<>' :( 0 ,'!=', 0), + '>=' :( 0 ,'>=', 0), + '<=' :( 0 ,'<=', 0), + '<' :( 0 ,'<', 0), + '>' :( 0 ,'>', 0), + ',' :( 0 ,',', 0), + ')' :(-1 ,')', 0), + '!' :( 0 ,'.', 0), #Connector to cells on other Sheets +# '"' :( 2 ,'', 0), + '+' :( 0 ,'+', 0), + '-' :( 0 ,'-', 0), + '*' :( 0 ,'*', 0), + '/' :( 0 ,'/', 0), + '^' :( 0 ,'^', 0), + 'IF' :( 0, '', 3), + 'ABS' :( 0, 'abs', 0), + 'ACOS' :( 0, 'pi/180deg*acos', 0), + 'ASIN' :( 0, 'pi/180deg*asin', 0), + 'ATAN' :( 0, 'pi/180deg*atan', 0), + 'ATAN2':( 0, 'pi/180deg*atan2',0), + 'COS' :( 0, 'cos', 2), + 'COSH' :( 0, 'cosh', 2), + 'EXP' :( 0, 'exp', 0), + 'LOG' :( 0, 'log', 0), + 'LOG10':( 0, 'log10',0), + 'MOD' :( 0, 'mod', 0), + 'POWER':( 0, 'pow', 0), + 'SIN' :( 0, 'sin', 2), + 'SINH' :( 0, 'sinh', 2), + 'SQRT' :( 0, 'sqrt', 0), + 'TAN' :( 0, 'tan', 2), + 'TANH' :( 0, 'tanh', 2), + 'AVERAGE':( 0, 'average', 0), + 'COUNT':( 0, 'count',0), + 'MAX' :( 0, 'max', 0), + 'MIN' :( 0, 'min', 0), + 'STDEVA':( 0, 'stddev',0), + 'SUM' :( 0, 'sum', 0), + 'PI' :( 0, 'pi', 1) } class exprNode(object): - ''' This defines a tree class for expression parsing''' - def __init__(self, parent, state): - self.state = state #see comment: State machine for expression parsing + ''' This defines a tree class for expression parsing. + A tree is build, to step down into the levels of the expression.''' + def __init__(self, parent, state, actIndex): + self.state = state #see comment: State used for Angle-functions and IF-function self.parent = parent # Parent tree node + self.lIndex = actIndex # Index to the list of tokens self.result = '' class FormulaTranslator(object): ''' This class tranlates a cell-formula from Excel to FreeCAD.''' def __init__(self): - self.theTList = ['='] + self.tokenList = ['='] def translateForm(self, actExpr): self.getNextToken(actExpr) - #print "tokenList: ", self.theTList - self.resultTree = exprNode(None, 5) - self.resultTree.result = self.resultTree.result + self.theTList[0] - self.parseExpr(self.theTList, 1, self.resultTree) - #print 'parseResult: ', self.resultTree.result - return self.replaceFunc(self.resultTree.result) + #print("tokenList: ", self.tokenList) + self.resultTree = exprNode(None, 0, 1) + self.resultTree.result = self.tokenList[0] + self.parseExpr(self.resultTree) + #print('parseResult: ', self.resultTree.result) + return self.resultTree.result - - def replaceFunc(self, cellFormula): - for funcKey in funcDic: - if funcKey in cellFormula: - cellFormula = cellFormula.replace(funcKey, funcDic[funcKey]) - return cellFormula - - def getNextToken(self, theExpr): - #print 'next Token theExpr: ', theExpr - #print 'actTList: ', self.theTList + ''' This is the recursive tokenizer for an excel formula. + It appends all identified tokens to self.tokenList.''' + #print('next Token theExpr: ', theExpr) + #print('actTList: ', self.tokenList) tokenComplete = False keyToken = False if len(theExpr)>0: theTok = theExpr[0] theExpr = theExpr[1:] - if theTok in treeToken: + if theTok in sepToken: keyToken = True - branch = treeToken[theTok] + branch = sepToken[theTok] while branch: - #print branch, ' theExpr[0]: ',theExpr[0] + #print(branch, ' theExpr[0]: ',theExpr[0]) if theExpr[0] in treeDict[branch]: branch = treeDict[branch][theExpr[0]] theTok = theTok + theExpr[0] @@ -196,7 +202,7 @@ class FormulaTranslator(object): else: branch= None tokenComplete = True - self.theTList.append(theTok) + self.tokenList.append(theTok) self.getNextToken(theExpr) else: if len(theExpr)>0: @@ -208,21 +214,21 @@ class FormulaTranslator(object): tokenComplete = True else: tokenComplete = True - self.theTList.append(theTok) + self.tokenList.append(theTok) self.getNextToken(theExpr) def isKey(self, theExpr): - #print 'look up: ', theExpr + #print('look up: ', theExpr) keyToken = False lenExpr = len(theExpr) - if theExpr[0] in treeToken: - branch = treeToken[theExpr[0]] + if theExpr[0] in sepToken: + branch = sepToken[theExpr[0]] if branch == None: keyToken = True else: - #print 'There is a branch. look up: ', theExpr[1] + #print('There is a branch. look up: ', theExpr[1]) if (lenExpr > 1) and (theExpr[1] in treeDict[branch]): branch = treeDict[branch][theExpr[0]] if branch == None: @@ -235,60 +241,79 @@ class FormulaTranslator(object): return keyToken - # State machine for expression parsing - # 0 in ifsubexpression - # 1 in conditional - # 2 in truecase - # 3 in falsecase - # 4 in subexpression - # 5 toplevel '=' - - def parseExpr(self, tokenList, index, theTree): - token = tokenList[index] - #print 'state: ', theTree.state, ' ', token - nextIdx = index + 1 + + def parseExpr(self, treeNode): + token = self.tokenList[treeNode.lIndex] + treeNode.lIndex += 1 if token in tokenDic: - lChange, newToken, specialList = tokenDic[token] + lChange, newToken, funcState = tokenDic[token] else: lChange = 0 newToken = token - specialList = None - - if lChange == 1: - theTree.result = theTree.result + newToken - if token == '(': - state = 4 - else: - state = 1 - newNode = exprNode(theTree, state) - self.parseExpr(tokenList, nextIdx, newNode) - else: - if lChange == 0: - if theTree.state > 2: - theTree.result = theTree.result + newToken - else: - if (theTree.state == 1): - if specialList: - theTree.result = theTree.result + specialList[0] - theTree.state = 2 - else: - theTree.result = theTree.result + newToken - else: - if (theTree.state == 2): - if specialList: - theTree.result = theTree.result + specialList[1] - theTree.state = 3 - else: - theTree.result = theTree.result + newToken - - if nextIdx < len(tokenList): - self.parseExpr(tokenList, nextIdx, theTree) - else: - theTree.parent.result = theTree.parent.result + theTree.result + newToken - if nextIdx < len(tokenList): - self.parseExpr(tokenList, nextIdx, theTree.parent) + funcState = 0 + #print('treeNode.state: ', treeNode.state, ' my.index: ', treeNode.lIndex-1, ' ', token, ' fState: ', funcState) - # End of Formula Translator + if token == ',': + if (treeNode.state == 4): + newToken = ':' + treeNode.state = 6 + if (treeNode.state == 3): + newToken = '?' + treeNode.state = 4 + + if funcState == 3: + funcState = 0 + newNode = exprNode(treeNode, 3, treeNode.lIndex) + self.parseIF(newNode) + else: + treeNode.result = treeNode.result + newToken + + if funcState == 2: + funcState = 0 + newNode = exprNode(treeNode, 2, treeNode.lIndex) + self.parseAngle(newNode) + treeNode.result = treeNode.result + ')' + elif funcState == 1: + treeNode.lIndex += 2 # do skip the 2 parentheses of the PI() + + if lChange == -1: + #print 'state: ', treeNode.state, 'parent.result: ', treeNode.parent.result, ' mine: ', treeNode.result + treeNode.parent.result = treeNode.parent.result + treeNode.result + treeNode.parent.lIndex = treeNode.lIndex + #print('Go one level up, state: ', treeNode.state) + if (treeNode.state < 2): + #print(' Look up more token above') + if treeNode.lIndex < len(self.tokenList): + self.parseExpr(treeNode.parent) + + elif lChange == 1: + #print('Go one level down') + newNode = exprNode(treeNode, 1, treeNode.lIndex) + self.parseExpr(newNode) + treeNode.lIndex = newNode.lIndex + else: + if treeNode.lIndex < len(self.tokenList): + #print('parse to the end') + self.parseExpr(treeNode) + + + + def parseIF(self, treeNode): + #print('IF state: ', treeNode.state) + treeNode.result = treeNode.result + '(' + treeNode.lIndex += 1 + self.parseExpr(treeNode) + #print('IF result: ', treeNode.result) + return + + def parseAngle(self, treeNode): + #print('Angle state: ', treeNode.state) + treeNode.result = treeNode.result + '(1rad*(' + treeNode.lIndex += 1 + self.parseExpr(treeNode) + #print('angle result: ', treeNode.result) + + def getText(nodelist): rc = [] @@ -316,37 +341,50 @@ def handleCells(cellList, actCellSheet, sList): else: cellType = 'n' # fix me some cells dont have t and s attributes - #print "reference: ", ref, ' Cell type: ', cellType + #print("reference: ", ref, ' Cell type: ', cellType) + + if cellType == 'inlineStr': + iStringList = cell.getElementsByTagName("is") + #print('iString: ', iStringList) + for stringEle in iStringList: + tElement = stringEle.getElementsByTagName('t')[0] + theString = getText(tElement.childNodes) + + #print('theString: ', theString) + actCellSheet.set(ref, theString.encode('utf8')) formulaRef = cell.getElementsByTagName("f") if len(formulaRef)==1: theFormula = getText(formulaRef[0].childNodes) - #print "theFormula: ", theFormula + #print("theFormula: ", theFormula) fTrans = FormulaTranslator() actCellSheet.set(ref, fTrans.translateForm(theFormula)) else: - valueRef = cell.getElementsByTagName("v")[0] - if valueRef: - theValue = getText(valueRef.childNodes) - #print "theValue: ", theValue - if cellType == 'n': - actCellSheet.set(ref, theValue) - if cellType == 's': - actCellSheet.set(ref, (sList[int(theValue)]).encode('utf8')) + valueRef = cell.getElementsByTagName("v") + #print('valueRef: ', valueRef) + if len(valueRef)==1: + valueRef = cell.getElementsByTagName("v")[0] + if valueRef: + theValue = getText(valueRef.childNodes) + #print("theValue: ", theValue) + if cellType == 'n': + actCellSheet.set(ref, theValue) + if cellType == 's': + actCellSheet.set(ref, (sList[int(theValue)]).encode('utf8')) def handleWorkBook(theBook, sheetDict, Doc): theSheets = theBook.getElementsByTagName("sheet") - #print "theSheets: ", theSheets + #print("theSheets: ", theSheets) for sheet in theSheets: sheetAtts = sheet.attributes nameRef = sheetAtts.getNamedItem("name") sheetName = getText(nameRef.childNodes) - #print "table name: ", sheetName + #print("table name: ", sheetName) idRef = sheetAtts.getNamedItem("sheetId") sheetFile = "sheet" + getText(idRef.childNodes) + '.xml' - #print "sheetFile: ", sheetFile + #print("sheetFile: ", sheetFile) # add FreeCAD-spreadsheet sheetDict[sheetName] = (Doc.addObject('Spreadsheet::Sheet', sheetName), sheetFile) @@ -355,15 +393,15 @@ def handleWorkBook(theBook, sheetDict, Doc): aliAtts = theAlias.attributes nameRef = aliAtts.getNamedItem("name") aliasName = getText(nameRef.childNodes) - #print "aliasName: ", aliasName + #print("aliasName: ", aliasName) aliasRef = getText(theAlias.childNodes) if '$' in aliasRef: refList = aliasRef.split('!$') adressList = refList[1].split('$') - #print "aliasRef: ", aliasRef - #print 'Sheet Name: ', refList[0] - #print 'Adress: ', adressList[0] + adressList[1] + #print("aliasRef: ", aliasRef) + #print('Sheet Name: ', refList[0]) + #print('Adress: ', adressList[0] + adressList[1]) actSheet, sheetFile = sheetDict[refList[0]] actSheet.setAlias(adressList[0]+adressList[1], aliasName.encode('utf8')) @@ -396,7 +434,7 @@ def open(nameXLSX): theStrings.unlink() for sheetSpec in sheetDict: - #print "sheetSpec: ", sheetSpec + #print("sheetSpec: ", sheetSpec) theSheet, sheetFile = sheetDict[sheetSpec] f=z.open('xl/worksheets/' + sheetFile) myDom = xml.dom.minidom.parse(f) @@ -434,7 +472,7 @@ def insert(nameXLSX,docname): theStrings.unlink() for sheetSpec in sheetDict: - #print "sheetSpec: ", sheetSpec + #print("sheetSpec: ", sheetSpec) theSheet, sheetFile = sheetDict[sheetSpec] f=z.open('xl/worksheets/' + sheetFile) myDom = xml.dom.minidom.parse(f)