Dekker, Peterson e Test&Set in C
Tutti questi esempi hanno NPROC thread che svolgono un loop MAX volte sommando 1 ad ogni iterazione ad una variabile globale condivisa var;
Se tutto va bene il risultato finale deve essere NPROC*MAX.
Senza sezioni critiche
Partiamo dall'esempio *SENZA* sezioni critiche e quindi errato perhcé ha una race condition:
#include<stdio.h>
#include<stdint.h>
#include<pthread.h>
#include<unistd.h>
#define MAX 4000000
#define STEP 1000000
#define NPROC 4
volatile long val = 0;
void *codet(void *arg) {
uintptr_t id = (uintptr_t) arg;
int count;
for (count=0; count < MAX; count++) {
/* enter */
/* /enter */
val = val + 1;
if (count % STEP == 0)
printf("%d %ld\n",id,val);
/* exit */
/* /exit */
}
}
int main(int argc, char *argv[]) {
uintptr_t i;
pthread_t t[NPROC];
for (i=0; i<NPROC; i++)
pthread_create(&t[i], NULL, codet, (void *) i);
for (i=0; i<NPROC; i++)
pthread_join(t[i], NULL);
printf("%d %d\n",val,NPROC*MAX);
}
Test&Set
Ora proviamo con test&set (che viene fornito dal compilatore C)
#include<stdio.h>
#include<stdint.h>
#include<pthread.h>
#include<unistd.h>
#include<sched.h>
#define MAX 4000000
#define STEP 1000000
#define NPROC 4
volatile long val = 0;
volatile long lock = 0;
void *codet(void *arg) {
uintptr_t id = (uintptr_t) arg;
int count;
for (count=0; count < MAX; count++) {
/* enter */
while (__sync_lock_test_and_set(&lock,1))
/*sched_yield()*/;
/* /enter */
val = val + 1;
if (count % STEP == 0)
printf("%d %ld\n",id,val);
/* exit */
__sync_lock_release(&lock);
/* /exit */
}
}
int main(int argc, char *argv[]) {
uintptr_t i;
pthread_t t[NPROC];
for (i=0; i<NPROC; i++)
pthread_create(&t[i], NULL, codet, (void *) i);
for (i=0; i<NPROC; i++)
pthread_join(t[i], NULL);
printf("%d %d\n",val,NPROC*MAX);
}
questo funziona bene! (se si decommenta la chiamata (di system call) sched_yield() l'esecuzione è molto più veloce, perch%eacute;?)
Peterson
Proviamo con Peterson
#include<stdio.h>
#include<stdint.h>
#include<pthread.h>
#include<unistd.h>
#include<stdatomic.h>
#define MAX 4000000
#define STEP 1000000
#define NPROC 2
volatile long val = 0;
volatile long need[NPROC];
volatile long turn;
void *codet(void *arg) {
uintptr_t id = (uintptr_t) arg;
int count;
for (count=0; count < MAX; count++) {
/* enter */
need[id] = 1;
turn = 1 - id;
atomic_thread_fence(memory_order_seq_cst);
while (need[1 - id] && turn != id)
;
/* /enter */
val = val + 1;
if (count % STEP == 0)
printf("%d %ld\n",id,val);
/* exit */
need[id] = 0;
/* /exit */
}
}
int main(int argc, char *argv[]) {
uintptr_t i;
pthread_t t[NPROC];
for (i=0; i<NPROC; i++)
pthread_create(&t[i], NULL, codet, (void *) i);
for (i=0; i<NPROC; i++)
pthread_join(t[i], NULL);
printf("%d %d\n",val,NPROC*MAX);
}
La funzione di stdatomic.h "atomic_thread_fence(memory_order_seq_cst);" (che nella macchine Intel genera l'istuzione assembler "__asm__ __volatile__("mfence");") è necessaria per x86 multicore perché altrimenti le cache dei processori potrebbero non essere allineate.
Dekker
E ora Dekker.
#include<stdio.h>
#include<stdint.h>
#include<pthread.h>
#include<unistd.h>
#include<stdatomic.h>
#define MAX 4000000
#define STEP 1000000
#define NPROC 2
volatile long val = 0;
volatile long need[NPROC];
volatile long turn;
void *codet(void *arg) {
uintptr_t id = (uintptr_t) arg;
int count;
for (count=0; count < MAX; count++) {
/* enter */
need[id] = 1;
atomic_thread_fence(memory_order_seq_cst);
while (need[1 - id]) {
atomic_thread_fence(memory_order_seq_cst);
need[id] = 0;
while (need[1 - id] && turn != id)
;
need[id] = 1;
atomic_thread_fence(memory_order_seq_cst);
}
/* /enter */
val = val + 1;
if (count % STEP == 0)
printf("%d %ld\n",id,val);
/* exit */
need[id] = 0;
turn = 1 - id;
/* /exit */
}
}
int main(int argc, char *argv[]) {
uintptr_t i;
pthread_t t[NPROC];
for (i=0; i<NPROC; i++)
pthread_create(&t[i], NULL, codet, (void *) i);
for (i=0; i<NPROC; i++)
pthread_join(t[i], NULL);
printf("%d %d\n",val,NPROC*MAX);
}
(Qui sono state necessari epiù istruzioni "siepe" fence)
Peterson per N processi
#include<stdio.h>
#include<stdint.h>
#include<pthread.h>
#include<unistd.h>
#include<stdatomic.h>
#define MAX 4000000
#define NPROC 4
volatile long val = 0;
volatile long turn;
volatile long stage[NPROC];
volatile long last[NPROC];
void *codet(void *arg) {
uintptr_t id = (uintptr_t) arg;
int count;
int j, k;
for (count=0; count < MAX; count++) {
/* enter */
for (j=0; j<NPROC; j++) {
stage[id] = j+1; last[j] = id;
atomic_thread_fence(memory_order_seq_cst);
for (k=0; k<NPROC; k++) {
if (id != k)
while (stage[k] >= stage[id] && last[j] == id)
;
}
}
/* /enter */
val = val + 1;
if ((count & ((1<<20) - 1)) == 0)
printf("%d %ld\n",id,val);
/* exit */
stage[id] = 0;
/* /exit */
}
}
int main(int argc, char *argv[]) {
uintptr_t i;
pthread_t t[NPROC];
for (i=0; i<NPROC; i++)
pthread_create(&t[i], NULL, codet, (void *) i);
for (i=0; i<NPROC; i++)
pthread_join(t[i], NULL);
printf("%d %d\n",val,NPROC*MAX);
}