ProvaPratica 2013.06.21
Jump to navigation
Jump to search
[Python 3]
'''
Prova Pratica di Laboratorio di Sistemi Operativi
20 giugno 2013
Esercizio 3
URL: http://www.cs.unibo.it/~renzo/so/pratiche/2013.06.21.pdf
@author: Tommaso Ognibene
'''
import os, sys, hashlib
def Main(argv):
# Check number of arguments
if len(argv) != 1:
print("The function does not require arguments to be passed in.")
return
# Build a dictionary with key-value pair {file size - [file name]}
sameSize = { }
PopulateSameSize(sameSize)
# Build a dictionary with key-value pair {MD5 hash - [file name]}
sameContent = { }
for files in sorted(sameSize.values(), key = len, reverse = True):
if len(files) < 2:
break
PopulateSameContent(files, sameContent)
# Print results
PrintResults(sameContent)
print("Done!")
# Populate a dictionary with key-value pair {file size - [file name]}
def PopulateSameSize(sameSize):
for dirPath, dirNames, fileNames in os.walk(os.getcwd()):
for fileName in fileNames:
filePath = os.path.join(dirPath, fileName)
fileSize = os.path.getsize(filePath)
sameSize[fileSize] = sameSize.get(fileSize, []) + [filePath]
# Populate a dictionary with key-value pair {MD5 hash - [file name]}
def PopulateSameContent(files, sameContent):
for filePath in files:
md5 = Md5Checksum(filePath)
fileRelPath = os.path.relpath(filePath, os.getcwd())
sameContent[md5] = sameContent.get(md5, []) + [fileRelPath]
# Compute the MD5 hash of a file
def Md5Checksum(filePath):
with open(filePath, 'rb') as file:
m = hashlib.md5()
while True:
data = file.read(8192)
if not data:
break
m.update(data)
return m.hexdigest()
# Printout the lists of files having same content
def PrintResults(sameContent):
print('List of files having same content:')
for list in sameContent.values():
if len(list) > 1:
print("[{0}]".format(", ".join(str(i) for i in list)))
if __name__ == "__main__":
sys.exit(Main(sys.argv))
ecco la mia versione:
import os, hashlib
def fileCurrDir():#restituisce una lista res di file presenti nella directory
fcd = os.listdir('.')
res = []
for ott in fcd:
if os.path.isfile('{0}'.format(ott)):res.append(ott)
else:continue
return res
def dictsize(fl=fileCurrDir()):#restituisce un dizionario con key filesize e value lista di filenames aventi size di filesize
res = {}
for f in fl:
if os.path.getsize('{0}'.format(f)) in list(res.keys()):res[os.path.getsize('{0}'.format(f))].append(f);continue
else:pass
res[os.path.getsize('{0}'.format(f))] = list()
res[os.path.getsize('{0}'.format(f))].append(f)
return res
def dictremsa(a=dictsize()):#data un dizionario key::list rimuove tutti gli item la cui len di lista sia unitaria
tmp = list(a.keys())
for tmpkey in tmp:
if len(a[tmpkey]) < 2: a.pop(tmpkey)
else:continue
return a
def hashcontrolinsl(l1): #data una lista di nomi di file compara l hash di tutte le possibili coppie dentro res = []
while l1 != []:
toTest = l1.pop()
del res[:]
res.append(toTest)
for tmp in l1:
#res.append(list[toTest,tmp])
#return res
hasher = hashlib.md5()
hasher2 = hashlib.md5()
f = open('{0}'.format(toTest), 'rb')
toHash = f.read()
hasher.update(toHash)
toTesthash = hasher.hexdigest()
f.close()
f = open('{0}'.format(tmp), 'rb')
toHash2 = f.read()
hasher2.update(toHash2)
tmphash = hasher2.hexdigest()
if tmphash==toTesthash: #print('{0} e {1} sono uguali\n'.format(toTest,tmp))
res.append(tmp)
else:continue
if len(res)>1:
print(res);res.pop(0)
for j in res:
l1.pop(l1.index(j))
def hashcontroltoMajorTom(a=dictremsa()):#fa in modo che vengano "hashate" solo delle liste di file che abbiano passato il "stessadimenzione" test
hasher = hashlib.md5()
try:
obviouslyequal = a.pop(0)
print("i seguenti file hanno lo stesso contenuto... NULLA!!!!!:\n")
for oe in obviouslyequal:
print("{0}".format(oe))
except KeyError:pass
values = list(a.values())
print("\ni seguenti file contengono qualcosa ma sono uguali:\n")
for namelist in values:
hashcontrolinsl(namelist)
hashcontroltoMajorTom()
-fede