#1
  1. No Profile Picture
    Registered User
    Devshed Newbie (0 - 499 posts)

    Join Date
    Nov 2014
    Posts
    9
    Rep Power
    0

    get parent node using etree


    Code:
    from xml.etree import ElementTree as etree
    from time import gmtime, strftime
    import time
    
    import sys
    import os
    
    noofFiles = 0
    timeStart = time.time()
    
    executionReportFile = "d:\Python\ComparisonResult.txt"
    file_object_Report = open(executionReportFile, "w")
    
    def getfileName():
    	
    	strConfigPath="D:\Python\Config.txt"
    	# This configuration file contains the folder names, from where xml files will be picked(of same name) and compared in following format
    	# Both the folders are seperated by ";"
    	# d:\Python\XmlOld;d:\Python\XmlNew
    	global strIgnoreNode
    	global noofFiles
    	# Check if the config file exists, and then read out the folder path for .xml files to be compared
    	if os.path.exists(strConfigPath):
    		file_object = open(strConfigPath, "r")
    		strResult = file_object.readline()
    		strFlag =0
    		if  (len(strResult.split(";"))) ==2:
    			xmlPath1=strResult.split(";")[0]
    			xmlPath2=strResult.split(";")[1]
    			strIgnoreNode  =""
    			strFlag =1
    		elif (len(strResult.split(";"))) ==3:
    			xmlPath1=strResult.split(";")[0]
    			xmlPath2=strResult.split(";")[1]
    			strIgnoreNode  = strResult.split(";")[2]
    			strReport = "Following nodes are ignored while comparison : "+ strIgnoreNode + "\n\n\n"
    			file_object_Report.write(strReport)
    			strFlag =1
    		
    		#xmlPath1 will have path of old files
    		#xmlPath2 will have path of new files
    		
    		if strFlag ==1:	
    		#____check if the directory path mentioned in the config file exist------							
    			if os.path.exists(xmlPath1):
    				if not os.path.exists(xmlPath2):
    					#print ("Path mentioned in the config file does not exist : ", xmlPath2)
    					strReport = "Path mentioned in the config file does not exist : "+ xmlPath2 + "\n"
    					file_object_Report.write(strReport)
    					sys.exit()
    			else:
    				strReport= "Path mentioned in the config file does not exist : "+ xmlPath1 + "\n"
    				file_object_Report.write(strReport)
    				
    				sys.exit()
    
    			# Navigate through each xml file in xmlPath1(folder path) 	
    			# Read the xml file, and parse using element tree
    			# find the same name xml file in xmlPath2 and read it
    			# xml_compare(tree1,tree2)
    			for file in os.listdir(xmlPath1):
    				
    				if file.endswith(".xml"):
    					noofFiles=noofFiles+1
    					
    					strPath1 = xmlPath1 + "\\"  + file
    					strPath2 = xmlPath2 + "\\"  + file
    					
    					if os.path.isfile(strPath1):
    						print ("\nComparing two xmls : ", strPath1, "  and  " , strPath2 , " : ")
    						time1 = time.time()
    						file_object_Report.write("\n\n=====================================================================================\n")
    						strReport = "Comparing two xmls : "+ strPath1+ " and " + strPath2 + " : \n" 
    						
    						file_object_Report.write(strReport)
    						if not os.path.isfile(strPath2):
    							print ("Seccond file does not exist :", strPath2)
    							strReport= "\n\tSecond file does not exist :"+ strPath2 + "\n"
    							file_object_Report.write(strReport)
    						else:
    							xml1= open(strPath1,'r').read()
    							xml2=open(strPath2,'r').read()
    								 
    							try: 
    								tree1 = etree.fromstring(xml1.strip())
    							except:
    								strReport= str(sys.exc_info()[0]) + "\n"
    								file_object_Report.write(strReport )
    								strReport = "\n\tError :  XML is not proper \n"
    								file_object_Report.write(strReport)
    								continue
    								
    							try:
    								tree2 = etree.fromstring(xml2.strip())
    							except:
    								strReport= str(sys.exc_info()[0])
    								file_object_Report.write(strReport)
    								continue	
    											
    							xml_compare(tree1,tree2)#,writer);
    							time2 = time.time()
    							strReport = "\tCompleted comparison in  : " + str(format((time2-time1),'.3f')) + " seconds \n\n"
    							file_object_Report.write(strReport)
    							
    							print ("Done")
    					else:
    						print ("File does not exist :", strPath1)
    						
    						
    		else:
    			print ("Configuration file not correct !!")
    	else:
    		print ("Configuration file not found")
    		sys.exit()
    	
    def findHierarchy(node):
    	global strPar
    	strPar=""
    	#while node.parentNode:
    	#	strPar = node.parentNode.nodeName + "/" + strPar    
    	#	if node.parentNode.nodeName == "#document":
    	#		return strPar
    	#	node = node.parentNode
    
    
    
    def text_compare(t1, t2):
    	#print ("Comparing values ", "   and ", t2)
    	if  t1!=None and t2!=None:
    		#if not t1 and not t2:
    		if t1!=t2:
    			#print ("Mismatch at ", t1, "  and  ", t2)
    			return False
    		else:
    
    			return True
    
    
    def xml_compare(x1,x2):#,reporter=None):
    
    	Flag =0
    	#print (x1.tag, " ", x2.tag)
    	if x1.tag != x2.tag:
    
    		strReport="\n\tTags do not match :" + x1.tag + " , " + x2.tag + "\n"
    		file_object_Report.write(strReport)
    		Flag =1
    
    	else:
    		#print (x1.tag, " - " , strIgnoreNode)
    		if x1.tag.strip() == strIgnoreNode:
    
    			return True
    	
    	for name, value in x1.attrib.items():
    		#print ("in for loop ",x2.attrib.get(name), " " , value)
    		if x2.attrib.get(name) != value:
    		#	if reporter:
    			strReport="\n\tAttributes do not match : " +  name + " : " + value + " ' " + name + x2.attrib.get(name) + "\n"
    			file_object_Report.write(strReport)
    			#return False
    			Flag =1
    
    		
    	strVal = text_compare(x1.text, x2.text)
    	if strVal==False:
    		file_object_Report.write("\n\tMismatch found : \n")
    		#print (findHierarchy(x1))
    		
    		strReport=  "\t\t" + x1.tag + " : " + x1.text + "\n" 
    		file_object_Report.write(strReport)
    		strReport=  "\t\t" + x2.tag + " : " + x2.text + "\n"
    		file_object_Report.write(strReport)
    
    	
    	cl1 = x1.getchildren()
    	cl2 = x2.getchildren()
    
    	if len(cl1) != len(cl2):
    
    		print ("\tChildren length differ ")
    
    	i = 0
    	strPath =""
    	for c1, c2 in zip(cl1, cl2):
    		i += 1
    		
    		#strPath = strPath + '\\' +  str(c1.tag)
    		blnResult = xml_compare(c1, c2)#, reporter=reporter)
    		if blnResult != True and blnResult != None:
    
    			print ( 'Followig tags of both the files do not match : ',c1.tag,c2.tag)
    			
    	#print (strPath)
    
    
    getfileName();
    
    timeEnd = time.time()
    TotTime = timeEnd - timeStart 
    file_object_Report.write("\n\n\nTime taken to compare all the " + str(noofFiles) + " file " + str(format(TotTime,".3f")))
  2. #2
  3. Administrator
    Devshed Newbie (0 - 499 posts)

    Join Date
    Jan 2015
    Posts
    82
    Rep Power
    10
    Hello,

    What's the context for posting this code? Are you looking for second opinions, or a critique, or is it not working like you'd expect it to be?
  4. #3
  5. No Profile Picture
    Registered User
    Devshed Newbie (0 - 499 posts)

    Join Date
    Nov 2014
    Posts
    9
    Rep Power
    0
    Apologies for incomplete post, here is what I am looking for :


    I have written this code to compare two XML files, and have used Element Tree. I am reading the xml files and then parsing it using Element tree.

    First I am getting the root node of two xml files and then pass them to the function to compare the child nodes recursively.

    The problem I am facing is, I am not able to list the parent node of node whose value is mismatching with other xml file. I found on net that getting a parent node is not supported by Element tree, but I am looking for some work around to do so.

    Any help will be much appreciated.

    Comments on this post

    • markroberts agrees : No problem! Thanks for the context!

IMN logo majestic logo threadwatch logo seochat tools logo