Dekker, Peterson e Test&Set in C

From Sistemi Operativi
Jump to navigation Jump to search

Tutti questi esempi hanno NPROC thread che svolgono un loop MAX volte sommando 1 ad ogni iterazione ad una variabile globale condivisa var;

Se tutto va bene il risultato finale deve essere NPROC*MAX.

Senza sezioni critiche

Partiamo dall'esempio *SENZA* sezioni critiche e quindi errato perhcé ha una race condition:

#include<stdio.h>
#include<stdint.h>
#include<pthread.h>
#include<unistd.h>

#define MAX 4000000
#define STEP 1000000
#define NPROC 4

volatile long val = 0;

void *codet(void *arg) {
        uintptr_t id = (uintptr_t) arg;
        int count;
        for (count=0; count  < MAX; count++) {
                /* enter */
                /* /enter */
                val = val + 1;
                if (count % STEP == 0)
                        printf("%d %ld\n",id,val);
                /* exit */
                /* /exit */
        }
}

int main(int argc, char *argv[]) {
        uintptr_t i;
        pthread_t t[NPROC];
        for (i=0; i<NPROC; i++)
                pthread_create(&t[i], NULL, codet, (void *) i);
        for (i=0; i<NPROC; i++)
                pthread_join(t[i], NULL);
        printf("%d %d\n",val,NPROC*MAX);
}

Test&Set

Ora proviamo con test&set (che viene fornito dal compilatore C)

#include<stdio.h>
#include<stdint.h>
#include<pthread.h>
#include<unistd.h>
#include<sched.h>

#define MAX 4000000
#define STEP 1000000
#define NPROC 4

volatile long val = 0;
volatile long lock = 0;

void *codet(void *arg) {
  uintptr_t id = (uintptr_t) arg;
  int count;
  for (count=0; count  < MAX; count++) {
    /* enter */
    while (__sync_lock_test_and_set(&lock,1))
      /*sched_yield()*/;
    /* /enter */
    val = val + 1;
    if (count % STEP == 0)
      printf("%d %ld\n",id,val);
    /* exit */
    __sync_lock_release(&lock);
    /* /exit */
  }
}

int main(int argc, char *argv[]) {
  uintptr_t i;
  pthread_t t[NPROC];
  for (i=0; i<NPROC; i++)
    pthread_create(&t[i], NULL, codet, (void *) i);
  for (i=0; i<NPROC; i++)
    pthread_join(t[i], NULL);
  printf("%d %d\n",val,NPROC*MAX);
}

questo funziona bene! (se si decommenta la chiamata (di system call) sched_yield() l'esecuzione è molto pi&ugrave veloce, perch%eacute;?)

Peterson

Proviamo con Peterson

#include<stdio.h>
#include<stdint.h>
#include<pthread.h>
#include<unistd.h>
#include<stdatomic.h>

#define MAX 4000000
#define STEP 1000000
#define NPROC 2

volatile long val = 0;
volatile long need[NPROC];
volatile long turn;

void *codet(void *arg) {
  uintptr_t id = (uintptr_t) arg;
  int count;
  for (count=0; count  < MAX; count++) {
    /* enter */
    need[id] = 1;
    turn = 1 - id;
    atomic_thread_fence(memory_order_seq_cst);
    while (need[1 - id] && turn != id)
      ;
    /* /enter */
    val = val + 1;
    if (count % STEP == 0)
      printf("%d %ld\n",id,val);
    /* exit */
    need[id] = 0;
    /* /exit */
  }
}

int main(int argc, char *argv[]) {
  uintptr_t i;
  pthread_t t[NPROC];
  for (i=0; i<NPROC; i++)
    pthread_create(&t[i], NULL, codet, (void *) i);
  for (i=0; i<NPROC; i++)
    pthread_join(t[i], NULL);
  printf("%d %d\n",val,NPROC*MAX);
}

La funzione di stdatomic.h "atomic_thread_fence(memory_order_seq_cst);" (che nella macchine Intel genera l'istuzione assembler "__asm__ __volatile__("mfence");") è necessaria per x86 multicore perché altrimenti le cache dei processori potrebbero non essere allineate.

Dekker

E ora Dekker.

#include<stdio.h>
#include<stdint.h>
#include<pthread.h>
#include<unistd.h>
#include<stdatomic.h>

#define MAX 4000000
#define STEP 1000000
#define NPROC 2

volatile long val = 0;
volatile long need[NPROC];
volatile long turn;

void *codet(void *arg) {
  uintptr_t id = (uintptr_t) arg;
  int count;
  for (count=0; count  < MAX; count++) {
    /* enter */
    need[id] = 1;
    atomic_thread_fence(memory_order_seq_cst);
    while (need[1 - id]) {
      atomic_thread_fence(memory_order_seq_cst);
      need[id] = 0;
      while (need[1 - id] && turn != id)
        ;
      need[id] = 1;
      atomic_thread_fence(memory_order_seq_cst);
    }
    /* /enter */
    val = val + 1;
    if (count % STEP == 0)
      printf("%d %ld\n",id,val);
    /* exit */
    need[id] = 0;
    turn = 1 - id;
    /* /exit */
  }
}

int main(int argc, char *argv[]) {
  uintptr_t i;
  pthread_t t[NPROC];
  for (i=0; i<NPROC; i++)
    pthread_create(&t[i], NULL, codet, (void *) i);
  for (i=0; i<NPROC; i++)
    pthread_join(t[i], NULL);
  printf("%d %d\n",val,NPROC*MAX);
}

(Qui sono state necessari epiù istruzioni "siepe" fence)

Peterson per N processi

#include<stdio.h>
#include<stdint.h>
#include<pthread.h>
#include<unistd.h>
#include<stdatomic.h>

#define MAX 4000000
#define NPROC 4

volatile long val = 0;
volatile long turn;
volatile long stage[NPROC];
volatile long last[NPROC];

void *codet(void *arg) {
  uintptr_t id = (uintptr_t) arg;
  int count;
  int j, k;
  for (count=0; count  < MAX; count++) {
    /* enter */
    for (j=0; j<NPROC; j++) {
      stage[id] = j+1; last[j] = id;
      atomic_thread_fence(memory_order_seq_cst);
      for (k=0; k<NPROC; k++) {
        if (id != k)
          while (stage[k] >= stage[id] && last[j] == id)
            ;
      }
    }
    /* /enter */
    val = val + 1;
    if ((count & ((1<<20) - 1)) == 0)
      printf("%d %ld\n",id,val);
    /* exit */
    stage[id] = 0;
    /* /exit */
  }
}

int main(int argc, char *argv[]) {
  uintptr_t i;
  pthread_t t[NPROC];
  for (i=0; i<NPROC; i++)
    pthread_create(&t[i], NULL, codet, (void *) i);
  for (i=0; i<NPROC; i++)
    pthread_join(t[i], NULL);
  printf("%d %d\n",val,NPROC*MAX);
}