===== Vectorizing compiler (sample code) ===== This is the code for matrix multiplication we used during the lesson. To be compiled with the **-O3** flag (Gnu compiler suite). Vectorization details can be obtained with the **-ftree-vectorizer-verbose=NN** with NN being 1 to 9 (see Gnu gcc/g++ compiler [[http://gcc.gnu.org/onlinedocs/|manual]]) #include #include #include #include float a[N][N]; float b[N][N]; float c[N][N]; struct timespec diff(struct timespec t0, struct timespec t1); int main(int argc, char * argv[]) { // timer resolution struct timespec res; struct timespec t0, t1; clock_getres(CLOCCHE, &res); printf("Clock resolution is %ld nsec\n", res.tv_nsec); // init matrixes srand(getpid()); for(int i=0; i %d <---\n", ((int) sum)%2); return(0); } timespec diff(timespec start, timespec end) { timespec temp; if ((end.tv_nsec-start.tv_nsec)<0) { temp.tv_sec = end.tv_sec-start.tv_sec-1; temp.tv_nsec = 1000000000+end.tv_nsec-start.tv_nsec; } else { temp.tv_sec = end.tv_sec-start.tv_sec; temp.tv_nsec = end.tv_nsec-start.tv_nsec; } printf("Time elapsed: %ld sec, %ld usec\n", temp.tv_sec, temp.tv_nsec/1000); return temp; } #include #include #include #include float a[N][N]; float b[N][N]; float c[N][N]; struct timespec diff(struct timespec t0, struct timespec t1); int main(int argc, char * argv[]) { // timer resolution struct timespec res; struct timespec t0, t1; clock_getres(CLOCCHE, &res); printf("Clock resolution is %ld nsec\n", res.tv_nsec); // init matrixes srand(getpid()); for(int i=0; i %d <---\n", ((int) sum)%2); return(0); } timespec diff(timespec start, timespec end) { timespec temp; if ((end.tv_nsec-start.tv_nsec)<0) { temp.tv_sec = end.tv_sec-start.tv_sec-1; temp.tv_nsec = 1000000000+end.tv_nsec-start.tv_nsec; } else { temp.tv_sec = end.tv_sec-start.tv_sec; temp.tv_nsec = end.tv_nsec-start.tv_nsec; } printf("Time elapsed: %ld sec, %ld usec\n", temp.tv_sec, temp.tv_nsec/1000); return temp; } CC = g++ CFLAGS = -DN=128 -DCLOCCHE=CLOCK_THREAD_CPUTIME_ID LDFLAGS = -lrt OBJS = mm mmo3 mmkj mmkjo3 all: mm mmo3 mmkj mmkjo3 mm: mm.c $(CC) $(CFLAGS) mm.c -o mm $(LDFLAGS) -ftree-vectorizer-verbose=2 mmo3: mm.c $(CC) $(CFLAGS) mm.c -o mmo3 $(LDFLAGS) -ftree-vectorizer-verbose=2 -O3 mmkj: mm.c $(CC) $(CFLAGS) mmkj.c -o mmkj $(LDFLAGS) -ftree-vectorizer-verbose=2 mmkjo3: mm.c $(CC) $(CFLAGS) mmkj.c -o mmkjo3 $(LDFLAGS) -ftree-vectorizer-verbose=2 -O3