update of xlsx-importer: new parser,

trigonomic functions give the same result in the FreeCAD-spreadsheet, support of celltype "inlineStr".
2017-02-19 22:56:51 +01:00 · 2017-02-19 22:56:51 +01:00 · 693f4c759e
commit 693f4c759e
parent f55635900f
1 changed files with 191 additions and 153 deletions
--- a/src/Mod/Spreadsheet/importXLSX.py
+++ b/src/Mod/Spreadsheet/importXLSX.py
@ -30,21 +30,27 @@ __url__ = ["http://www.freecadweb.org"]
 '''
 This library imports an Excel-XLSX-file into FreeCAD.

+Version 1.1, Nov. 2016:
+Changed parser, adds rad-unit to trigonometric functions in order
+to give the same result in FreeCAD.
+Added factor to arcus-function in order to give the same result in FreeCAD
+Added support for celltype "inlineStr"
+
+Version 1.0:
 It uses a minimal parser, in order to translate the IF-function into
 the different FreeCAD version.
 The other function-names are translated by search and replace.
-
-Version 1.0:
+Features:
 - Imports tables defined inside Excel-document
 - Set alias definitions
- Translate formulas known by FreeCAD. (see funcDic + IF)
+- Translate formulas known by FreeCAD. (see tokenDic as by version 1.1)
 - set cross table references
 - strings are imported
 - references to cells with strings are working

 known issues:
 - units are not imported
- string support is minimal, as it is so in FreeCAD
+- string support is minimal, the same as in FreeCAD
 '''


@ -60,39 +66,12 @@ if open.__module__ == '__builtin__':
    pythonopen = open


-funcDic = {
-  'ABS(':'abs(',
-  'ACOS(':'acos(', 
-  'ASIN(':'asin(',
-  'ATAN(':'atan(', 
-  'ATAN2(':'atan2(',
-  'COS(':'cos(',
-  'COSH(':'cosh(',
-  'EXP(':'exp(',
-  'LOG(':'log(',
-  'LOG10(':'log10(', 
-  'MOD(':'mod(', 
-  'POWER(':'pow(', 
-  'SIN(':'sin(', 
-  'SINH(':'sinh(', 
-  'SQRT(':'sqrt(',
-  'TAN(':'tan(', 
-  'TANH(':'tanh(',
-  'AVERAGE(':'average(',
-  'COUNT(':'count(', 
-  'MAX(':'max(', 
-  'MIN(':'min(', 
-  'STDEVA(':'stddev(', 
-  'SUM(':'sum(',
-  'PI()':'pi'
-  } 
-  
-# The treeToken structure is used in the tokenizer functions isKey and
+# The sepToken structure is used in the tokenizer functions isKey and
 # getNextToken.
-# treeToken defines a search tree for tokens with length of 1 to 3 characters
-treeToken = {
+# sepToken defines a search tree for separator tokens with length of 1 to 3 characters
+# it is also used as a list of separators between other tokens.
+sepToken = {
  '(':None,
-  'I':'branchI',
  '=':None,
  '<':'branchLower',
  '>':'branchHigher',
@ -100,12 +79,14 @@ treeToken = {
 #  '"':None,
 #  ';':None,
  ' ':None,
-  ',':None,
-  '!':None
+  ',':None, # Separator on lists
+  '!':None, #Connector to cells on other Sheets
+  '+':None,
+  '-':None,
+  '*':None,
+  '/':None,
+  '^':None
  }
-  
-branchI = {'F':'branchF'}
-branchF = {'(':None}

 branchLower ={
  '>':None,
@ -117,8 +98,6 @@ branchHigher = {'=':None}

 # Needed to get a reference from a string to a dict 
 treeDict = {
-  'branchI':branchI,
-  'branchF':branchF,
  'branchLower':branchLower,
  'branchHigher':branchHigher
  }
@ -127,68 +106,95 @@ treeDict = {
 # The tokenDic contains the following information:
 # levelchange: -1: tree down, 0, +1: tree up
 # replacement token
-# special token list
+# function-state: needed to do something special in the parser
+#     0 = normal, 1 = the pi-case, 2 = angle-function, 
+#     3 = IF-function, 4 = IF-truecase, 5 IF-falsecase
+

 tokenDic = {
-  '('  :( 1, '(',  None),
-  'IF(':( 1, '(',  None),
-  '='  :( 0 ,'==', None),
-  '<>' :( 0 ,'!=', None),
-  '>=' :( 0 ,'>=', None),
-  '<=' :( 0 ,'<=', None),
-  '<'  :( 0 ,'<',  None),
-  '>'  :( 0 ,'>',  None),
-#  ';'  :( 0 ,';', ['?',':']),
-  ','  :( 0 ,',', ['?',':']),
-  ')'  :(-1 ,')',  None),
-  '!'  :( 0 ,'.',  None)   #Connector to cells on other Sheets
-#  '"'  :( 2 ,'',  None)
+  '('  :( 1, '(',  0),
+  '='  :( 0 ,'==', 0),
+  '<>' :( 0 ,'!=', 0),
+  '>=' :( 0 ,'>=', 0),
+  '<=' :( 0 ,'<=', 0),
+  '<'  :( 0 ,'<',  0),
+  '>'  :( 0 ,'>',  0),
+  ','  :( 0 ,',',  0),
+  ')'  :(-1 ,')',  0),
+  '!'  :( 0 ,'.',  0),   #Connector to cells on other Sheets
+#  '"'  :( 2 ,'',  0),
+  '+'  :( 0 ,'+',  0),
+  '-'  :( 0 ,'-',  0),
+  '*'  :( 0 ,'*',  0),
+  '/'  :( 0 ,'/',  0),
+  '^'  :( 0 ,'^',  0),
+  'IF' :( 0, '',   3),
+  'ABS'  :( 0, 'abs',  0),
+  'ACOS' :( 0, 'pi/180deg*acos', 0),
+  'ASIN' :( 0, 'pi/180deg*asin', 0),
+  'ATAN' :( 0, 'pi/180deg*atan', 0),
+  'ATAN2':( 0, 'pi/180deg*atan2',0),
+  'COS'  :( 0, 'cos',  2),
+  'COSH' :( 0, 'cosh', 2),
+  'EXP'  :( 0, 'exp',  0),
+  'LOG'  :( 0, 'log',  0),
+  'LOG10':( 0, 'log10',0),
+  'MOD'  :( 0, 'mod',  0),
+  'POWER':( 0, 'pow',  0),
+  'SIN'  :( 0, 'sin',  2),
+  'SINH' :( 0, 'sinh', 2),
+  'SQRT' :( 0, 'sqrt', 0),
+  'TAN'  :( 0, 'tan',  2),
+  'TANH' :( 0, 'tanh', 2),
+  'AVERAGE':( 0, 'average', 0),
+  'COUNT':( 0, 'count',0),
+  'MAX'  :( 0, 'max',  0),
+  'MIN'  :( 0, 'min',  0),
+  'STDEVA':( 0, 'stddev',0),
+  'SUM'  :( 0, 'sum',  0),
+  'PI'   :( 0, 'pi',   1) 
  }


 class exprNode(object):
-  ''' This defines a tree class for expression parsing'''   
-  def __init__(self, parent, state):
-    self.state = state #see comment: State machine for expression parsing
+  ''' This defines a tree class for expression parsing.
+  A tree is build, to step down into the levels of the expression.'''   
+  def __init__(self, parent, state, actIndex):
+    self.state = state #see comment: State used for Angle-functions and IF-function
    self.parent = parent # Parent tree node
+    self.lIndex = actIndex # Index to the list of tokens
    self.result = ''


 class FormulaTranslator(object):
  ''' This class tranlates a cell-formula from Excel to FreeCAD.'''
  def __init__(self):
-    self.theTList = ['=']
+    self.tokenList = ['=']

  def translateForm(self, actExpr):
    self.getNextToken(actExpr)
-    #print "tokenList: ", self.theTList
-    self.resultTree = exprNode(None, 5)
-    self.resultTree.result = self.resultTree.result + self.theTList[0]
-    self.parseExpr(self.theTList, 1, self.resultTree)
-    #print 'parseResult: ', self.resultTree.result
-    return self.replaceFunc(self.resultTree.result)
+    #print("tokenList: ", self.tokenList)
+    self.resultTree = exprNode(None, 0, 1)
+    self.resultTree.result = self.tokenList[0]
+    self.parseExpr(self.resultTree)
+    #print('parseResult: ', self.resultTree.result)
+    return self.resultTree.result

-
-  def replaceFunc(self, cellFormula):
-    for funcKey in funcDic:
-      if funcKey in cellFormula:
-        cellFormula = cellFormula.replace(funcKey, funcDic[funcKey])
-    return cellFormula
-  
-  
  def getNextToken(self, theExpr):
-    #print 'next Token theExpr: ', theExpr
-    #print 'actTList: ', self.theTList
+    ''' This is the recursive tokenizer for an excel formula.
+    It appends all identified tokens to self.tokenList.'''
+    #print('next Token theExpr: ', theExpr)
+    #print('actTList: ', self.tokenList)
    tokenComplete = False
    keyToken = False
    if len(theExpr)>0:
      theTok = theExpr[0]
      theExpr = theExpr[1:]
-      if theTok in treeToken:
+      if theTok in sepToken:
        keyToken = True
-        branch = treeToken[theTok]
+        branch = sepToken[theTok]
        while branch:
-          #print branch, ' theExpr[0]: ',theExpr[0] 
+          #print(branch, ' theExpr[0]: ',theExpr[0])
          if theExpr[0] in treeDict[branch]:
            branch = treeDict[branch][theExpr[0]]
            theTok = theTok + theExpr[0]
@ -196,7 +202,7 @@ class FormulaTranslator(object):
          else:
            branch= None
        tokenComplete = True
-        self.theTList.append(theTok)
+        self.tokenList.append(theTok)
        self.getNextToken(theExpr)
      else:
        if len(theExpr)>0:
@ -208,21 +214,21 @@ class FormulaTranslator(object):
                tokenComplete = True
            else:
              tokenComplete = True
-        self.theTList.append(theTok)
+        self.tokenList.append(theTok)
        self.getNextToken(theExpr)
          
    
  def isKey(self, theExpr):
-    #print 'look up: ', theExpr
+    #print('look up: ', theExpr)
    keyToken = False
    lenExpr = len(theExpr)
-    if theExpr[0] in treeToken:
-      branch = treeToken[theExpr[0]]
+    if theExpr[0] in sepToken:
+      branch = sepToken[theExpr[0]]
      
      if branch == None:
        keyToken = True
      else:
-        #print 'There is a branch. look up: ', theExpr[1]
+        #print('There is a branch. look up: ', theExpr[1])
        if (lenExpr > 1) and (theExpr[1] in treeDict[branch]):
          branch = treeDict[branch][theExpr[0]]
          if branch == None:
@ -235,60 +241,79 @@ class FormulaTranslator(object):
    return keyToken
          
  
-  # State machine for expression parsing
-  # 0 in ifsubexpression
-  # 1 in conditional
-  # 2 in truecase
-  # 3 in falsecase
-  # 4 in subexpression
-  # 5 toplevel '='
-  
-  def parseExpr(self, tokenList, index, theTree):
-    token = tokenList[index]
-    #print 'state: ', theTree.state, ' ', token
-    nextIdx = index + 1
+
+  def parseExpr(self, treeNode):
+    token = self.tokenList[treeNode.lIndex]
+    treeNode.lIndex += 1
    if token in tokenDic:
-      lChange, newToken, specialList = tokenDic[token]
+      lChange, newToken, funcState = tokenDic[token]
    else:
      lChange = 0
      newToken = token
-      specialList = None
-    
-    if lChange == 1:
-      theTree.result = theTree.result + newToken
-      if token == '(':
-        state = 4
-      else:
-        state = 1
-      newNode = exprNode(theTree, state)
-      self.parseExpr(tokenList, nextIdx, newNode)
-    else:
-      if lChange == 0:
-        if theTree.state > 2:
-          theTree.result = theTree.result + newToken
-        else:
-          if (theTree.state == 1):
-            if specialList:
-              theTree.result = theTree.result + specialList[0]
-              theTree.state = 2
-            else:
-              theTree.result = theTree.result + newToken
-          else:
-            if (theTree.state == 2):
-              if specialList:
-                theTree.result = theTree.result + specialList[1]
-                theTree.state = 3
-              else:
-                theTree.result = theTree.result + newToken
-          
-        if nextIdx < len(tokenList):
-          self.parseExpr(tokenList, nextIdx, theTree)
-      else:
-        theTree.parent.result = theTree.parent.result + theTree.result + newToken
-        if nextIdx < len(tokenList):
-          self.parseExpr(tokenList, nextIdx, theTree.parent)
+      funcState = 0
+    #print('treeNode.state: ', treeNode.state, ' my.index: ', treeNode.lIndex-1, ' ', token, ' fState: ', funcState)
      
-  # End of Formula Translator
+    if token == ',':
+      if (treeNode.state == 4):
+        newToken = ':'
+        treeNode.state = 6      
+      if (treeNode.state == 3):
+        newToken = '?'
+        treeNode.state = 4
+      
+    if funcState == 3:
+      funcState = 0
+      newNode = exprNode(treeNode, 3, treeNode.lIndex)
+      self.parseIF(newNode) 
+    else:
+      treeNode.result = treeNode.result + newToken
+      
+    if funcState == 2:
+      funcState = 0
+      newNode = exprNode(treeNode, 2, treeNode.lIndex)
+      self.parseAngle(newNode) 
+      treeNode.result = treeNode.result + ')'
+    elif funcState == 1: 
+      treeNode.lIndex += 2  # do skip the 2 parentheses of the PI()
+      
+    if lChange == -1:
+      #print 'state: ', treeNode.state, 'parent.result: ', treeNode.parent.result, ' mine: ', treeNode.result
+      treeNode.parent.result = treeNode.parent.result + treeNode.result
+      treeNode.parent.lIndex = treeNode.lIndex
+      #print('Go one level up, state: ', treeNode.state)
+      if (treeNode.state < 2):
+        #print(' Look up more token above')
+        if treeNode.lIndex < len(self.tokenList):
+          self.parseExpr(treeNode.parent)
+        
+    elif lChange == 1:
+      #print('Go one level down')
+      newNode = exprNode(treeNode, 1, treeNode.lIndex)
+      self.parseExpr(newNode) 
+      treeNode.lIndex = newNode.lIndex 
+    else:
+      if treeNode.lIndex < len(self.tokenList):
+        #print('parse to the end')
+        self.parseExpr(treeNode)
+
+
+
+  def parseIF(self, treeNode):
+    #print('IF state: ', treeNode.state)
+    treeNode.result = treeNode.result + '('
+    treeNode.lIndex += 1
+    self.parseExpr(treeNode) 
+    #print('IF result: ', treeNode.result)
+    return
+
+  def parseAngle(self, treeNode):
+    #print('Angle state: ', treeNode.state)
+    treeNode.result = treeNode.result + '(1rad*('
+    treeNode.lIndex += 1
+    self.parseExpr(treeNode) 
+    #print('angle result: ', treeNode.result)
+
+

 def getText(nodelist):
  rc = []
@ -316,37 +341,50 @@ def handleCells(cellList, actCellSheet, sList):
    else:
      cellType = 'n'   # fix me some cells dont have t and s attributes

-    #print "reference: ", ref, ' Cell type: ', cellType
+    #print("reference: ", ref, ' Cell type: ', cellType)
+    
+    if cellType == 'inlineStr':
+      iStringList = cell.getElementsByTagName("is")
+      #print('iString: ', iStringList)
+      for stringEle in iStringList:
+        tElement = stringEle.getElementsByTagName('t')[0]
+        theString = getText(tElement.childNodes)
+        
+        #print('theString: ', theString)
+        actCellSheet.set(ref, theString.encode('utf8'))

    formulaRef = cell.getElementsByTagName("f")
    if len(formulaRef)==1:
      theFormula = getText(formulaRef[0].childNodes)
-      #print "theFormula: ", theFormula
+      #print("theFormula: ", theFormula)
      fTrans = FormulaTranslator()
      actCellSheet.set(ref, fTrans.translateForm(theFormula))

    else:
-      valueRef = cell.getElementsByTagName("v")[0]
-      if valueRef:
-        theValue = getText(valueRef.childNodes)
-        #print "theValue: ", theValue
-        if cellType == 'n':
-          actCellSheet.set(ref, theValue)
-        if cellType == 's':
-          actCellSheet.set(ref, (sList[int(theValue)]).encode('utf8'))
+      valueRef = cell.getElementsByTagName("v")
+      #print('valueRef: ', valueRef)
+      if len(valueRef)==1:
+        valueRef = cell.getElementsByTagName("v")[0]
+        if valueRef:
+          theValue = getText(valueRef.childNodes)
+          #print("theValue: ", theValue)
+          if cellType == 'n':
+            actCellSheet.set(ref, theValue)
+          if cellType == 's':
+            actCellSheet.set(ref, (sList[int(theValue)]).encode('utf8'))


 def handleWorkBook(theBook, sheetDict, Doc):
  theSheets = theBook.getElementsByTagName("sheet")
-  #print "theSheets: ", theSheets
+  #print("theSheets: ", theSheets)
  for sheet in theSheets:
    sheetAtts = sheet.attributes
    nameRef = sheetAtts.getNamedItem("name")
    sheetName = getText(nameRef.childNodes)
-    #print "table name: ", sheetName
+    #print("table name: ", sheetName)
    idRef = sheetAtts.getNamedItem("sheetId")
    sheetFile = "sheet" + getText(idRef.childNodes) + '.xml'
-    #print "sheetFile: ", sheetFile
+    #print("sheetFile: ", sheetFile)
    # add FreeCAD-spreadsheet
    sheetDict[sheetName] = (Doc.addObject('Spreadsheet::Sheet', sheetName), sheetFile)
    
@ -355,15 +393,15 @@ def handleWorkBook(theBook, sheetDict, Doc):
    aliAtts = theAlias.attributes
    nameRef = aliAtts.getNamedItem("name")
    aliasName = getText(nameRef.childNodes)
-    #print "aliasName: ", aliasName
+    #print("aliasName: ", aliasName)

    aliasRef = getText(theAlias.childNodes)
    if '$' in aliasRef:
      refList = aliasRef.split('!$')
      adressList = refList[1].split('$')
-      #print "aliasRef: ", aliasRef
-      #print 'Sheet Name: ', refList[0]
-      #print 'Adress: ', adressList[0] + adressList[1]
+      #print("aliasRef: ", aliasRef)
+      #print('Sheet Name: ', refList[0])
+      #print('Adress: ', adressList[0] + adressList[1])
      actSheet, sheetFile = sheetDict[refList[0]]
      actSheet.setAlias(adressList[0]+adressList[1], aliasName.encode('utf8'))

@ -396,7 +434,7 @@ def open(nameXLSX):
      theStrings.unlink()
    
    for sheetSpec in sheetDict:
-      #print "sheetSpec: ", sheetSpec
+      #print("sheetSpec: ", sheetSpec)
      theSheet, sheetFile = sheetDict[sheetSpec]
      f=z.open('xl/worksheets/' + sheetFile)
      myDom = xml.dom.minidom.parse(f)
@ -434,7 +472,7 @@ def insert(nameXLSX,docname):
    theStrings.unlink()
  
  for sheetSpec in sheetDict:
-    #print "sheetSpec: ", sheetSpec
+    #print("sheetSpec: ", sheetSpec)
    theSheet, sheetFile = sheetDict[sheetSpec]
    f=z.open('xl/worksheets/' + sheetFile)
    myDom = xml.dom.minidom.parse(f)