Difference between revisions of "ProvaPratica 2013.06.21"
Jump to navigation
Jump to search
Line 1: | Line 1: | ||
+ | == Esercizio 1 == | ||
+ | <syntaxhighlight lang="C"> | ||
+ | /* | ||
+ | Prova Pratica di Laboratorio di Sistemi Operativi | ||
+ | 20 giugno 2013 | ||
+ | Esercizio 1 | ||
+ | |||
+ | URL: http://www.cs.unibo.it/~renzo/so/pratiche/2013.06.21.pdf | ||
+ | |||
+ | @author: Tommaso Ognibene | ||
+ | */ | ||
+ | |||
+ | #include <stdio.h> | ||
+ | #include <stdlib.h> | ||
+ | #include <dirent.h> | ||
+ | #include <errno.h> | ||
+ | #include <unistd.h> | ||
+ | |||
+ | /* Comparison function for iNode | ||
+ | * Input: 2 dirent pointers | ||
+ | * Output: | ||
+ | * 1, if iNode[a] > iNode[b] | ||
+ | * 0, if iNode[a] == iNode[b] | ||
+ | * -1, else */ | ||
+ | int iNodeComparison(const struct dirent **a, const struct dirent **b) | ||
+ | { | ||
+ | long iNodeA = (long)(*a)->d_ino; | ||
+ | long iNodeB = (long)(*b)->d_ino; | ||
+ | return (iNodeA > iNodeB) - (iNodeA < iNodeB); | ||
+ | } | ||
+ | |||
+ | /* Scan a directory -> | ||
+ | * Order the entries by iNode number -> | ||
+ | * Print name and iNode of each file. */ | ||
+ | void scanDirectory(char *dir) | ||
+ | { | ||
+ | int result, iterator; | ||
+ | struct dirent **files; | ||
+ | |||
+ | result = scandir(dir, &files, NULL, iNodeComparison); | ||
+ | |||
+ | // Check if any errors occurred | ||
+ | if (result < 0) | ||
+ | { | ||
+ | perror("scandir()"); | ||
+ | exit(EXIT_FAILURE); | ||
+ | } | ||
+ | |||
+ | // Loop through directory entries | ||
+ | for(iterator = 0; iterator < result; iterator++) | ||
+ | { | ||
+ | printf("%s/%s %li\n", dir, files[iterator]->d_name, (long)files[iterator]->d_ino); | ||
+ | |||
+ | // Garbage collection | ||
+ | free(files[iterator]); | ||
+ | } | ||
+ | free(files); | ||
+ | } | ||
+ | |||
+ | // Entry point | ||
+ | int main(int argc, char *argv[]) | ||
+ | { | ||
+ | char cwd[1024]; | ||
+ | |||
+ | // if (number of parameters | ||
+ | switch(argc) | ||
+ | { | ||
+ | // == 0) => Use the current directory | ||
+ | case 1: | ||
+ | if (!getcwd(cwd, sizeof(cwd))) | ||
+ | { | ||
+ | perror("getcwd()"); | ||
+ | exit(EXIT_FAILURE); | ||
+ | } | ||
+ | scanDirectory(cwd); | ||
+ | exit(EXIT_SUCCESS); | ||
+ | |||
+ | // == 1) => Use the given directory | ||
+ | case 2: | ||
+ | scanDirectory(argv[1]); | ||
+ | exit(EXIT_SUCCESS); | ||
+ | |||
+ | // > 1) => Wrong input | ||
+ | default: | ||
+ | printf("The function requires 0 or 1 parameters.\n"); | ||
+ | exit(EXIT_FAILURE); | ||
+ | } | ||
+ | } | ||
+ | </syntaxhighlight> | ||
+ | |||
+ | == Esercizio 3 == | ||
+ | |||
[Python 3] | [Python 3] | ||
Revision as of 15:41, 27 November 2013
Esercizio 1
/*
Prova Pratica di Laboratorio di Sistemi Operativi
20 giugno 2013
Esercizio 1
URL: http://www.cs.unibo.it/~renzo/so/pratiche/2013.06.21.pdf
@author: Tommaso Ognibene
*/
#include <stdio.h>
#include <stdlib.h>
#include <dirent.h>
#include <errno.h>
#include <unistd.h>
/* Comparison function for iNode
* Input: 2 dirent pointers
* Output:
* 1, if iNode[a] > iNode[b]
* 0, if iNode[a] == iNode[b]
* -1, else */
int iNodeComparison(const struct dirent **a, const struct dirent **b)
{
long iNodeA = (long)(*a)->d_ino;
long iNodeB = (long)(*b)->d_ino;
return (iNodeA > iNodeB) - (iNodeA < iNodeB);
}
/* Scan a directory ->
* Order the entries by iNode number ->
* Print name and iNode of each file. */
void scanDirectory(char *dir)
{
int result, iterator;
struct dirent **files;
result = scandir(dir, &files, NULL, iNodeComparison);
// Check if any errors occurred
if (result < 0)
{
perror("scandir()");
exit(EXIT_FAILURE);
}
// Loop through directory entries
for(iterator = 0; iterator < result; iterator++)
{
printf("%s/%s %li\n", dir, files[iterator]->d_name, (long)files[iterator]->d_ino);
// Garbage collection
free(files[iterator]);
}
free(files);
}
// Entry point
int main(int argc, char *argv[])
{
char cwd[1024];
// if (number of parameters
switch(argc)
{
// == 0) => Use the current directory
case 1:
if (!getcwd(cwd, sizeof(cwd)))
{
perror("getcwd()");
exit(EXIT_FAILURE);
}
scanDirectory(cwd);
exit(EXIT_SUCCESS);
// == 1) => Use the given directory
case 2:
scanDirectory(argv[1]);
exit(EXIT_SUCCESS);
// > 1) => Wrong input
default:
printf("The function requires 0 or 1 parameters.\n");
exit(EXIT_FAILURE);
}
}
Esercizio 3
[Python 3]
'''
Prova Pratica di Laboratorio di Sistemi Operativi
20 giugno 2013
Esercizio 3
URL: http://www.cs.unibo.it/~renzo/so/pratiche/2013.06.21.pdf
@author: Tommaso Ognibene
'''
import os, sys, hashlib
def Main(argv):
# Check number of arguments
if len(argv) != 1:
print("The function does not require arguments to be passed in.")
return
# Build a dictionary with key-value pair {file size - [file name]}
sameSize = { }
PopulateSameSize(sameSize)
# Build a dictionary with key-value pair {MD5 hash - [file name]}
sameContent = { }
for filePaths in sorted(sameSize.values(), key = len, reverse = True):
# No files with same size => No files with same content
if len(filePaths) < 2: break
PopulateSameContent(filePaths, sameContent)
# Print results
PrintResults(sameContent)
print("Done!")
# Populate a dictionary with key-value pair {file size - [file name]}
def PopulateSameSize(sameSize):
for dirPath, _, fileNames in os.walk(os.getcwd()):
for fileName in fileNames:
filePath = os.path.join(dirPath, fileName)
fileSize = os.path.getsize(filePath)
sameSize[fileSize] = sameSize.get(fileSize, []) + [filePath]
# Populate a dictionary with key-value pair {MD5 hash - [file name]}
def PopulateSameContent(filePaths, sameContent):
for filePath in filePaths:
md5 = GetMd5Hash(filePath)
fileRelPath = os.path.relpath(filePath, os.getcwd())
sameContent[md5] = sameContent.get(md5, []) + [fileRelPath]
# Get the MD5 hash without loading the whole file to memory
# Break the file in chunks whose size is a multiple of 128
# This takes advantage of the fact that MD5 has 128-byte digest blocks
def GetMd5Hash(filePath, blockSize = 2 ** 20):
digest = hashlib.md5()
with open(filePath, "rb") as file:
for chunk in iter(lambda: file.read(blockSize), b''):
digest.update(chunk)
return digest.hexdigest()
# Printout the lists of files having same content
def PrintResults(sameContent):
print("Lists of files having same content:")
for files in sorted(sameContent.values(), key = len, reverse = True):
if len(files) < 2: break
print("[{0}]".format(", ".join(file for file in files)))
if __name__ == "__main__":
sys.exit(Main(sys.argv))
ecco la mia versione:
import os, hashlib
def fileCurrDir():#restituisce una lista res di file presenti nella directory
fcd = os.listdir('.')
res = []
for ott in fcd:
if os.path.isfile('{0}'.format(ott)):res.append(ott)
else:continue
return res
def dictsize(fl=fileCurrDir()):#restituisce un dizionario con key filesize e value lista di filenames aventi size di filesize
res = {}
for f in fl:
if os.path.getsize('{0}'.format(f)) in list(res.keys()):res[os.path.getsize('{0}'.format(f))].append(f);continue
else:pass
res[os.path.getsize('{0}'.format(f))] = list()
res[os.path.getsize('{0}'.format(f))].append(f)
return res
def dictremsa(a=dictsize()):#data un dizionario key::list rimuove tutti gli item la cui len di lista sia unitaria
tmp = list(a.keys())
for tmpkey in tmp:
if len(a[tmpkey]) < 2: a.pop(tmpkey)
else:continue
return a
def hashcontrolinsl(l1): #data una lista di nomi di file compara l hash di tutte le possibili coppie dentro l1
while l1 != []:
toTest = l1.pop()
del res[:]
res.append(toTest)
for tmp in l1:
#res.append(list[toTest,tmp])
#return res
hasher = hashlib.md5()
hasher2 = hashlib.md5()
f = open('{0}'.format(toTest), 'rb')
toHash = f.read()
hasher.update(toHash)
toTesthash = hasher.hexdigest()
f.close()
f = open('{0}'.format(tmp), 'rb')
toHash2 = f.read()
hasher2.update(toHash2)
tmphash = hasher2.hexdigest()
if tmphash==toTesthash: #print('{0} e {1} sono uguali\n'.format(toTest,tmp))
res.append(tmp)
else:continue
if len(res)>1:
print(res);res.pop(0)
for j in res:
l1.pop(l1.index(j))
def hashcontroltoMajorTom(a=dictremsa()):#fa in modo che vengano "hashate" solo delle liste di file che abbiano passato il "stessadimenzione" test
hasher = hashlib.md5()
try:
obviouslyequal = a.pop(0)
print("i seguenti file hanno lo stesso contenuto... NULLA!!!!!:\n")
for oe in obviouslyequal:
print("{0}".format(oe))
except KeyError:pass
values = list(a.values())
print("\ni seguenti file contengono qualcosa ma sono uguali:\n")
for namelist in values:
hashcontrolinsl(namelist)
hashcontroltoMajorTom()
-fede
Bash
Qualche idea per Bash...
Si potrebbero usare:
Per la dimensione in byte dei <file>:
fileSize=$(stat --format=%s <file>)
Per avere l'output del solo hash md5 (senza l'ausilio di altri comandi):
fileHashMd5=$(md5sum <file> | while read fileHash fileName; do echo $fileHash; done)
#Fede&Alessio#
for file in *; do
if [[ -f "$file" ]] ; then
filesize=$(stat -c%s "$file")
for file2 in *; do
if [[ -f "$file2" ]] ; then
if [[ "$file" = "$file2" ]]; then
continue
fi
filesize2=$(stat -c%s "$file2")
if [[ $filesize -eq $filesize2 ]]; then
diff "$file" "$file2" && echo ""$file" == "$file2"" ###
fi
fi
done
fi
done
#Purtroppo non siamo riusciti a finire e abbiamo optato per una soluzione poco efficiente con un diff, poi continueremo per rendarlà un po migliore