/* Check cache miss rates with valgrind --tool=cachegrind ./a.out */ #define N 512 #define M 512 typedef struct { short r, g, b, a; } pixel; pixel image[N][M]; void fade() { int i, j; for (i = 0; i < N; i++) for (j = 0; j < M; j++) { image[i][j].a = image[i][j].a >> 1; } } /* M = 512 N = 512 B = 64 S = 64 E = 8 C = 32768 Expected miss rate = 12.5% */ void fade_slower() { int i, j; for (j = 0; j < M; j++) for (i = 0; i < N; i++) { image[i][j].a = image[i][j].a >> 1; } } /* M = 512 N = 512 B = 64 S = 64 E = 8 C = 32768 Expected miss rate = 100% because cache lines have been replaced by the time iteration returns to the first row */ /* M = 511 N = 512 B = 64 S = 64 E = 8 C = 32768 Expected miss rate = 12.5% because a block-width set of columns fits into the cache, and since a row isn't a multiple of a block size, each column cycles through all the available sets and therefore all the available cache lines (8 per set) */ void blur_red() { int i, j; for (i = 0; i < N; i++) for (j = 0; j < M; j++) { image[i][j].r = ((image[i][j].r + image[i][j+1].r + image[i+1][j].r + image[i+1][j+1].r) >> 2); } } /* M = 512 N = 512 B = 64 S = 64 E = 8 Expected miss rate = 3.1% (= 1/32) because each row pulls in the next row, so there's only one miss per 8 iterations on average, and each iteration has 4 accesses */ /* ************************************************************ */ #include static unsigned long long get_ticks(void); #define ITERS 1000 #define SCALE (3.0 / 2.6) int main(int argc, char **argv) { int i; unsigned long long start_ticks, end_ticks; start_ticks = get_ticks(); for (i = 0; i < ITERS; i++) { fade(); } end_ticks = get_ticks(); printf("%f\n", ((double)(end_ticks - start_ticks) / ((long)M * N * ITERS)) * SCALE); return 0; } static inline unsigned long long get_ticks(void) { unsigned int lo, hi; // RDTSC copies contents of 64-bit TSC into EDX:EAX asm volatile("rdtsc" : "=a" (lo), "=d" (hi)); return (unsigned long long)hi << 32 | lo; }