ProvaPratica 2013.06.21
Jump to navigation
Jump to search
[Python 3]
'''
Prova Pratica di Laboratorio di Sistemi Operativi
20 giugno 2013
Esercizio 3
URL: http://www.cs.unibo.it/~renzo/so/pratiche/2013.06.21.pdf
@author: Tommaso Ognibene
'''
import os, sys, hashlib
def Main(argv):
# Check number of arguments
if len(argv) != 1:
print("The function does not require arguments to be passed in.")
return
# Build a dictionary with key-value pair {file size - [file name]}
sameSize = { }
PopulateSameSize(sameSize)
# Build a dictionary with key-value pair {MD5 hash - [file name]}
sameContent = { }
for files in sorted(sameSize.values(), key = len, reverse = True):
if len(files) < 2:
break
PopulateSameContent(files, sameContent)
# Print results
PrintResults(sameContent)
print("Done!")
# Populate a dictionary with key-value pair {file size - file name}
def PopulateSameSize(sameSize):
for dirPath, dirNames, fileNames in os.walk(os.getcwd()):
for fileName in fileNames:
filePath = os.path.join(dirPath, fileName)
fileSize = os.path.getsize(filePath)
sameSize[fileSize] = sameSize.get(fileSize, []) + [filePath]
# Walk through the directory tree and populate the dictionary
def PopulateSameContent(files, sameContent):
for filePath in files:
md5 = Md5Checksum(filePath)
fileRelPath = os.path.relpath(filePath, os.getcwd())
sameContent[md5] = sameContent.get(md5, []) + [fileRelPath]
# Compute the MD5 hash of a file
def Md5Checksum(filePath):
with open(filePath, 'rb') as file:
m = hashlib.md5()
while True:
data = file.read(8192)
if not data:
break
m.update(data)
return m.hexdigest()
# Printout the lists of files having same content
def PrintResults(sameContent):
print('List of files having same content:')
for list in sameContent.values():
if len(list) > 1:
print("[{0}]".format(", ".join(str(i) for i in list)))
if __name__ == "__main__":
sys.exit(Main(sys.argv))