Difference between revisions of "ProvaPratica 2013.06.21"

From Sistemi Operativi
Jump to navigation Jump to search
Line 71: Line 71:
 
if __name__ == "__main__":
 
if __name__ == "__main__":
 
     sys.exit(Main(sys.argv))
 
     sys.exit(Main(sys.argv))
 +
</syntaxhighlight>
 +
 +
 +
<syntaxhighlight lang="Python">
 +
import os, hashlib
 +
 +
def fileCurrDir():#restituisce una lista res di file e solo file presenti nella directory
 +
fcd = os.listdir('.')
 +
res = []
 +
for ott in fcd:
 +
if os.path.isfile('{0}'.format(ott)):res.append(ott)
 +
else:continue
 +
return res
 +
 +
def dictsize(fl=fileCurrDir()):#restituisce un dizionario rez[filename]=filesize
 +
res = {}
 +
for f in fl:
 +
if os.path.getsize('{0}'.format(f)) in list(res.keys()):res[os.path.getsize('{0}'.format(f))].append(f);continue
 +
else:pass
 +
res[os.path.getsize('{0}'.format(f))] = list()
 +
res[os.path.getsize('{0}'.format(f))].append(f)
 +
return res
 +
 +
def dictremsa(a=dictsize()):#data un dizionario rimuove da key a lista rimuove tutti gli item la cui len di lista sia unitaria
 +
tmp = list(a.keys())
 +
for tmpkey in tmp:
 +
if len(a[tmpkey]) < 2: a.pop(tmpkey)
 +
else:continue
 +
return a
 +
 +
def hashcontrolinsl(l1):
 +
res = []
 +
while l1 != []:
 +
toTest = l1.pop()
 +
for tmp in l1:
 +
#res.append(list[toTest,tmp])
 +
#return res
 +
hasher = hashlib.md5()
 +
hasher2 = hashlib.md5()
 +
f = open('{0}'.format(toTest), 'rb')
 +
toHash = f.read()
 +
hasher.update(toHash)
 +
toTesthash = hasher.hexdigest()
 +
f.close()
 +
f = open('{0}'.format(tmp), 'rb')
 +
toHash2 = f.read()
 +
hasher2.update(toHash2)
 +
tmphash = hasher2.hexdigest()
 +
if tmphash==toTesthash: print('{0}  \t e  \t {1}  sono uguali\n'.format(toTest,tmp))
 +
else:continue
 +
 +
 +
 +
 +
 +
def hashcontroltoMajorTom(a=dictremsa()):
 +
hasher = hashlib.md5()
 +
obviouslyequal = a.pop(0)
 +
print("i seguenti file hanno lo stesso contenuto... NULLA!!!!!:\n")
 +
for oe in obviouslyequal:
 +
print("{0}".format(oe))
 +
values = list(a.values())
 +
print("\nle seguenti coppie invece contengono qualcosa ma sono uguali:\n")
 +
for namelist in values:
 +
hashcontrolinsl(namelist)
 +
 +
 +
hashcontroltoMajorTom()
 
</syntaxhighlight>
 
</syntaxhighlight>

Revision as of 19:59, 23 November 2013

[Python 3]

'''
Prova Pratica di Laboratorio di Sistemi Operativi
20 giugno 2013
Esercizio 3

URL: http://www.cs.unibo.it/~renzo/so/pratiche/2013.06.21.pdf

@author: Tommaso Ognibene
'''

import os, sys, hashlib

def Main(argv):
    # Check number of arguments
    if len(argv) != 1:
        print("The function does not require arguments to be passed in.")
        return
    
    # Build a dictionary with key-value pair {file size - [file name]}
    sameSize = { }
    PopulateSameSize(sameSize)
    
    # Build a dictionary with key-value pair {MD5 hash - [file name]}
    sameContent = { }
    for files in sorted(sameSize.values(), key = len, reverse = True):
        if len(files) < 2:
            break
        PopulateSameContent(files, sameContent)

    # Print results
    PrintResults(sameContent)

    print("Done!")
     
# Populate a dictionary with key-value pair {file size - [file name]}
def PopulateSameSize(sameSize):
    for dirPath, dirNames, fileNames in os.walk(os.getcwd()):
        for fileName in fileNames:
            filePath = os.path.join(dirPath, fileName)
            fileSize = os.path.getsize(filePath)
            sameSize[fileSize] = sameSize.get(fileSize, []) + [filePath]  
 
# Populate a dictionary with key-value pair {MD5 hash - [file name]}
def PopulateSameContent(files, sameContent):
    for filePath in files:
        md5 = Md5Checksum(filePath)
        fileRelPath = os.path.relpath(filePath, os.getcwd())
        sameContent[md5] = sameContent.get(md5, []) + [fileRelPath]

# Compute the MD5 hash of a file
def Md5Checksum(filePath):
    with open(filePath, 'rb') as file:
        m = hashlib.md5()
        while True:
            data = file.read(8192)
            if not data:
                break
            m.update(data)
        return m.hexdigest()

# Printout the lists of files having same content
def PrintResults(sameContent):
    print('List of files having same content:')
    for list in sameContent.values():
        if len(list) > 1:
            print("[{0}]".format(", ".join(str(i) for i in list)))
        
if __name__ == "__main__":
    sys.exit(Main(sys.argv))


import os, hashlib

def fileCurrDir():#restituisce una lista res di file e solo file presenti nella directory
	fcd = os.listdir('.')
	res = []
	for ott in fcd:
		if os.path.isfile('{0}'.format(ott)):res.append(ott)
		else:continue
	return res

def dictsize(fl=fileCurrDir()):#restituisce un dizionario rez[filename]=filesize
	res = {}
	for f in fl:
		if os.path.getsize('{0}'.format(f)) in list(res.keys()):res[os.path.getsize('{0}'.format(f))].append(f);continue
		else:pass
		res[os.path.getsize('{0}'.format(f))] = list()
		res[os.path.getsize('{0}'.format(f))].append(f)
	return res

def dictremsa(a=dictsize()):#data un dizionario rimuove da key a lista rimuove tutti gli item la cui len di lista sia unitaria
	tmp = list(a.keys())
	for tmpkey in tmp:
		if len(a[tmpkey]) < 2: a.pop(tmpkey)
		else:continue
	return a

def hashcontrolinsl(l1):
	res = []
	while l1 != []:
		toTest = l1.pop()
		for tmp in l1:
			#res.append(list[toTest,tmp])
	#return res
			hasher = hashlib.md5()
			hasher2 = hashlib.md5()
			f = open('{0}'.format(toTest), 'rb')
			toHash = f.read()
			hasher.update(toHash)
			toTesthash = hasher.hexdigest()
			f.close()
			f = open('{0}'.format(tmp), 'rb')
			toHash2 = f.read()
			hasher2.update(toHash2)
			tmphash = hasher2.hexdigest()
			if tmphash==toTesthash: print('{0}   \t e   \t {1}  sono uguali\n'.format(toTest,tmp))
			else:continue
		

		


def hashcontroltoMajorTom(a=dictremsa()):
	hasher = hashlib.md5()
	obviouslyequal = a.pop(0)
	print("i seguenti file hanno lo stesso contenuto... NULLA!!!!!:\n")
	for oe in obviouslyequal:
		print("{0}".format(oe))
	values = list(a.values())
	print("\nle seguenti coppie invece contengono qualcosa ma sono uguali:\n")
	for namelist in values:
		hashcontrolinsl(namelist)


hashcontroltoMajorTom()