stabilizercharlie/files/stabilizer... · 2015-01-06 · memory layout affects performance...
TRANSCRIPT
STABILIZER
Charlie Curtsinger and Emery D. Berger
Statistically SoundPerformance Evaluation
We all care about performance evaluation
STABILIZER
We all care about performance evaluationWe’ve been doing it wrong
STABILIZER
We all care about performance evaluationWe’ve been doing it wrong
STABILIZER
Repeated runs and error bars are not enough
We all care about performance evaluationWe’ve been doing it wrong
STABILIZER
Repeated runs and error bars are not enough
We’re not measuring what we thought
STABILIZERWe’ve been doing it wrong
STABILIZER
Memory layout affects performance
We’ve been doing it wrong
changing a program will change its layout
STABILIZER
Memory layout affects performance
We’ve been doing it wrong
changing a program will change its layout
STABILIZER
Memory layout affects performance
no way to measure effect of change in isolation
We’ve been doing it wrong
changing a program will change its layout
STABILIZER
Memory layout affects performance
STABILIZER eliminates the effect of layout
no way to measure effect of change in isolation
We’ve been doing it wrong
changing a program will change its layout
STABILIZER
Memory layout affects performance
STABILIZER eliminates the effect of layout
no way to measure effect of change in isolation
enables sound performance evaluation
We’ve been doing it wrong
changing a program will change its layout
STABILIZER
Memory layout affects performance
STABILIZER eliminates the effect of layout
no way to measure effect of change in isolation
Case Studies
enables sound performance evaluation
We’ve been doing it wrong
changing a program will change its layout
STABILIZER
Memory layout affects performance
STABILIZER eliminates the effect of layout
no way to measure effect of change in isolation
evaluation of LLVM’s optimizations with STABILIZERCase Studies
enables sound performance evaluation
We’ve been doing it wrong
STABILIZERWe’ve been doing it wrong
changing a program will change its layoutMemory layout affects performance
STABILIZER eliminates the effect of layout
no way to measure effect of change in isolation
evaluation of LLVM’s optimizations with STABILIZERCase Studies
enables sound performance evaluation
A
Unsound performance evaluation
int main(int argc, char **argv) { topFrame = (void**)__builtin_frame_address(0); setHandler(Trap::TrapSignal, onTrap); setHandler(SIGALRM, onTimer); setHandler(SIGSEGV, onFault); for(Function* f: functions) { f->setTrap(); } setTimer(interval); int r = stabilizer_main(argc, argv); return r;}
void setTimer(int msec) { struct itimerval timer;
timer.it_value.tv_sec = (msec - msec % 1000) / 1000; timer.it_value.tv_usec = 1000 * (msec % 1000); timer.it_interval.tv_sec = 0; timer.it_interval.tv_usec = 0; setitimer(ITIMER_REAL, &timer, 0);}
static void flush_icache(void* begin, size_t size) { uintptr_t p = (uintptr_t)begin & ~15UL; for (size_t i = 0; i < size; i += 16) { asm("icbi 0,%0" : : "r"(p)); p += 16; } asm("isync");}
DataHeapType* getDataHeap() { static char buf[sizeof(DataHeapType)]; static DataHeapType* _theDataHeap = new (buf) DataHeapType; return _theDataHeap;}
int main(int argc, char **argv) { topFrame = (void**)__builtin_frame_address(0); setHandler(Trap::TrapSignal, onTrap); setHandler(SIGALRM, onTimer); setHandler(SIGSEGV, onFault); for(Function* f: functions) { f->setTrap(); } setTimer(interval); int r = stabilizer_main(argc, argv); return r;}
void setTimer(int msec) { struct itimerval timer;
timer.it_value.tv_sec = (msec - msec % 1000) / 1000; timer.it_value.tv_usec = 1000 * (msec % 1000); timer.it_interval.tv_sec = 0; timer.it_interval.tv_usec = 0; setitimer(ITIMER_REAL, &timer, 0);}
A
Unsound performance evaluation
static void flush_icache(void* begin, size_t size) { uintptr_t p = (uintptr_t)begin & ~15UL; for (size_t i = 0; i < size; i += 16) { asm("icbi 0,%0" : : "r"(p)); p += 16; } asm("isync");}
DataHeapType* getDataHeap() { static char buf[sizeof(DataHeapType)]; static DataHeapType* _theDataHeap = new (buf) DataHeapType; return _theDataHeap;}
int main(int argc, char **argv) { topFrame = (void**)__builtin_frame_address(0); setHandler(Trap::TrapSignal, onTrap); setHandler(SIGALRM, onTimer); setHandler(SIGSEGV, onFault); for(Function* f: functions) { f->setTrap(); } setTimer(interval); int r = stabilizer_main(argc, argv); return r;}
void setTimer(int msec) { struct itimerval timer;
timer.it_value.tv_sec = (msec - msec % 1000) / 1000; timer.it_value.tv_usec = 1000 * (msec % 1000); timer.it_interval.tv_sec = 0; timer.it_interval.tv_usec = 0; setitimer(ITIMER_REAL, &timer, 0);}
A
Unsound performance evaluation
static void flush_icache(void* begin, size_t size) { uintptr_t p = (uintptr_t)begin & ~15UL; size_t meaning_of_life=42; for (size_t i = 0; i < size; i += 32) { asm("icbi 0,%0" : : "r"(p)); p += 32; } for (size_t i = 16; i < size; i += 32) { asm("icbi 0,%0" : : "r"(p)); p += 32; } asm("isync");}
DataHeapType* getDataHeap() { static char buf[sizeof(DataHeapType)]; static DataHeapType* _theDataHeap = new (buf) DataHeapType; return _theDataHeap;}
int main(int argc, char **argv) { topFrame = (void**)__builtin_frame_address(0); setHandler(Trap::TrapSignal, onTrap); setHandler(SIGALRM, onTimer); setHandler(SIGSEGV, onFault); for(Function* f: functions) { f->setTrap(); } setTimer(interval); int r = stabilizer_main(argc, argv); return r;}
void setTimer(int msec) { struct itimerval timer;
timer.it_value.tv_sec = (msec - msec % 1000) / 1000; timer.it_value.tv_usec = 1000 * (msec % 1000); timer.it_interval.tv_sec = 0; timer.it_interval.tv_usec = 0; setitimer(ITIMER_REAL, &timer, 0);}
A
Unsound performance evaluation
static void flush_icache(void* begin, size_t size) { uintptr_t p = (uintptr_t)begin & ~15UL; size_t meaning_of_life=42; for (size_t i = 0; i < size; i += 32) { asm("icbi 0,%0" : : "r"(p)); p += 32; } for (size_t i = 16; i < size; i += 32) { asm("icbi 0,%0" : : "r"(p)); p += 32; } asm("isync");}
DataHeapType* getDataHeap() { static char buf[sizeof(DataHeapType)]; static DataHeapType* _theDataHeap = new (buf) DataHeapType; return _theDataHeap;}
int main(int argc, char **argv) { topFrame = (void**)__builtin_frame_address(0); setHandler(Trap::TrapSignal, onTrap); setHandler(SIGALRM, onTimer); setHandler(SIGSEGV, onFault); for(Function* f: functions) { f->setTrap(); } setTimer(interval); int r = stabilizer_main(argc, argv); return r;}
void setTimer(int msec) { struct itimerval timer;
timer.it_value.tv_sec = (msec - msec % 1000) / 1000; timer.it_value.tv_usec = 1000 * (msec % 1000); timer.it_interval.tv_sec = 0; timer.it_interval.tv_usec = 0; setitimer(ITIMER_REAL, &timer, 0);}
A
Unsound performance evaluation
DataHeapType* getDataHeap() { static char buf[sizeof(DataHeapType)]; static DataHeapType* _theDataHeap = new (buf) DataHeapType; return _theDataHeap;}
static void flush_icache(void* begin, size_t size) { uintptr_t p = (uintptr_t)begin & ~15UL; size_t meaning_of_life=42; for (size_t i = 0; i < size; i += 32) { asm("icbi 0,%0" : : "r"(p)); p += 32; } for (size_t i = 16; i < size; i += 32) { asm("icbi 0,%0" : : "r"(p)); p += 32; } asm("isync");}
int main(int argc, char **argv) { topFrame = (void**)__builtin_frame_address(0); setHandler(Trap::TrapSignal, onTrap); setHandler(SIGALRM, onTimer); setHandler(SIGSEGV, onFault); for(Function* f: functions) { f->setTrap(); } setTimer(interval); int r = stabilizer_main(argc, argv); return r;}
A
Unsound performance evaluation
DataHeapType* getDataHeap() { static char buf[sizeof(DataHeapType)]; static DataHeapType* _theDataHeap = new (buf) DataHeapType; return _theDataHeap;}
void setTimer(int msec) { struct itimerval timer;
timer.it_value.tv_sec = (msec - msec % 1000) / 1000; timer.it_value.tv_usec = 1000 * (msec % 1000); timer.it_interval.tv_sec = 0; timer.it_interval.tv_usec = 0; setitimer(ITIMER_REAL, &timer, 0);}
static void flush_icache(void* begin, size_t size) { uintptr_t p = (uintptr_t)begin & ~15UL; size_t meaning_of_life=42; for (size_t i = 0; i < size; i += 32) { asm("icbi 0,%0" : : "r"(p)); p += 32; } for (size_t i = 16; i < size; i += 32) { asm("icbi 0,%0" : : "r"(p)); p += 32; } asm("isync");}
A′int main(int argc, char **argv) { topFrame = (void**)__builtin_frame_address(0); setHandler(Trap::TrapSignal, onTrap); setHandler(SIGALRM, onTimer); setHandler(SIGSEGV, onFault); for(Function* f: functions) { f->setTrap(); } setTimer(interval); int r = stabilizer_main(argc, argv); return r;}
A
Unsound performance evaluation
DataHeapType* getDataHeap() { static char buf[sizeof(DataHeapType)]; static DataHeapType* _theDataHeap = new (buf) DataHeapType; return _theDataHeap;}
void setTimer(int msec) { struct itimerval timer;
timer.it_value.tv_sec = (msec - msec % 1000) / 1000; timer.it_value.tv_usec = 1000 * (msec % 1000); timer.it_interval.tv_sec = 0; timer.it_interval.tv_usec = 0; setitimer(ITIMER_REAL, &timer, 0);}
static void flush_icache(void* begin, size_t size) { uintptr_t p = (uintptr_t)begin & ~15UL; size_t meaning_of_life=42; for (size_t i = 0; i < size; i += 32) { asm("icbi 0,%0" : : "r"(p)); p += 32; } for (size_t i = 16; i < size; i += 32) { asm("icbi 0,%0" : : "r"(p)); p += 32; } asm("isync");}
A′A
Unsound performance evaluation
int main(int argc, char **argv) { topFrame = (void**)__builtin_frame_address(0); setHandler(Trap::TrapSignal, onTrap); setHandler(SIGALRM, onTimer); setHandler(SIGSEGV, onFault); for(Function* f: functions) { f->setTrap(); } setTimer(interval); int r = stabilizer_main(argc, argv); return r;}
DataHeapType* getDataHeap() { static char buf[sizeof(DataHeapType)]; static DataHeapType* _theDataHeap = new (buf) DataHeapType; return _theDataHeap;}
void setTimer(int msec) { struct itimerval timer;
timer.it_value.tv_sec = (msec - msec % 1000) / 1000; timer.it_value.tv_usec = 1000 * (msec % 1000); timer.it_interval.tv_sec = 0; timer.it_interval.tv_usec = 0; setitimer(ITIMER_REAL, &timer, 0);}
static void flush_icache(void* begin, size_t size) { uintptr_t p = (uintptr_t)begin & ~15UL; size_t meaning_of_life=42; for (size_t i = 0; i < size; i += 32) { asm("icbi 0,%0" : : "r"(p)); p += 32; } for (size_t i = 16; i < size; i += 32) { asm("icbi 0,%0" : : "r"(p)); p += 32; } asm("isync");}
0.00
0.25
0.50
0.75
1.00
85.0 87.5 90.0 92.5 95.0Time (s)
Num
ber o
f run
s
VersionAA'
A′A ×1 ×1
Which is faster?
0.00
0.25
0.50
0.75
1.00
85.0 87.5 90.0 92.5 95.0Time (s)
Num
ber o
f run
s
VersionAA'
Is faster than ? A′ A
0.00
0.25
0.50
0.75
1.00
85.0 87.5 90.0 92.5 95.0Time (s)
Num
ber o
f run
s
VersionAA'
Is faster than ? A′ A
0.00
0.25
0.50
0.75
1.00
85.0 87.5 90.0 92.5 95.0Time (s)
Num
ber o
f run
s
VersionAA'
Is faster than ? A′ A
0.00
0.25
0.50
0.75
1.00
85.0 87.5 90.0 92.5 95.0Time (s)
Num
ber o
f run
s
VersionAA'
Is faster than ? A′ A
0.00
0.25
0.50
0.75
1.00
85.0 87.5 90.0 92.5 95.0Time (s)
Num
ber o
f run
s
VersionAA'
Is faster than ? A′ A
2.8% faster
0.00
0.25
0.50
0.75
1.00
85.0 87.5 90.0 92.5 95.0Time (s)
Num
ber o
f run
s
VersionAA'
Is faster than ? A′ A
what about variance?
2.8% faster
0
5
10
15
85.0 87.5 90.0 92.5 95.0Time (s)
Num
ber o
f run
s
VersionAA'
Which is faster?
A′A ×30 ×30
0
5
10
15
85.0 87.5 90.0 92.5 95.0Time (s)
Num
ber o
f run
s
VersionAA'
Is faster than ? A′ A
0
5
10
15
85.0 87.5 90.0 92.5 95.0Time (s)
Num
ber o
f run
s
VersionAA'
Is faster than ? A′ A
still2.8% faster
Why is faster than ? A′ A
Why is faster than ? A′ A
int main(int argc, char **argv) { topFrame = (void**)__builtin_frame_address(0); setHandler(Trap::TrapSignal, onTrap); setHandler(SIGALRM, onTimer); setHandler(SIGSEGV, onFault); for(Function* f: functions) { f->setTrap(); } setTimer(interval); int r = stabilizer_main(argc, argv); return r;}
static void flush_icache(void* begin, size_t size) { uintptr_t p = (uintptr_t)begin & ~15UL; for (size_t i = 0; i < size; i += 16) { asm("icbi 0,%0" : : "r"(p)); p += 16; }}
DataHeapType* getDataHeap() { static char buf[sizeof(DataHeapType)]; static DataHeapType* _theDataHeap = new (buf) DataHeapType; return _theDataHeap;}
void setTimer(int msec) { struct itimerval timer;
timer.it_value.tv_sec = (msec - msec % 1000) / 1000; timer.it_value.tv_usec = 1000 * (msec % 1000); timer.it_interval.tv_sec = 0; timer.it_interval.tv_usec = 0; setitimer(ITIMER_REAL, &timer, 0);}
int main(int argc, char **argv) { topFrame = (void**)__builtin_frame_address(0); setHandler(Trap::TrapSignal, onTrap); setHandler(SIGALRM, onTimer); setHandler(SIGSEGV, onFault); for(Function* f: functions) { f->setTrap(); } setTimer(interval); int r = stabilizer_main(argc, argv); return r;}
void setTimer(int msec) { struct itimerval timer;
timer.it_value.tv_sec = (msec - msec % 1000) / 1000; timer.it_value.tv_usec = 1000 * (msec % 1000); timer.it_interval.tv_sec = 0; timer.it_interval.tv_usec = 0; setitimer(ITIMER_REAL, &timer, 0);}
static void flush_icache(void* begin, size_t size) { uintptr_t p = (uintptr_t)begin & ~15UL; for (size_t i = 0; i < size; i += 16) { asm("icbi 0,%0" : : "r"(p)); p += 16; } asm("isync");}
DataHeapType* getDataHeap() { static char buf[sizeof(DataHeapType)]; static DataHeapType* _theDataHeap = new (buf) DataHeapType; return _theDataHeap;}
Why is faster than ? A′ A
int main(int argc, char **argv) { topFrame = (void**)__builtin_frame_address(0); setHandler(Trap::TrapSignal, onTrap); setHandler(SIGALRM, onTimer); setHandler(SIGSEGV, onFault); for(Function* f: functions) { f->setTrap(); } setTimer(interval); int r = stabilizer_main(argc, argv); return r;}
static void flush_icache(void* begin, size_t size) { uintptr_t p = (uintptr_t)begin & ~15UL; for (size_t i = 0; i < size; i += 16) { asm("icbi 0,%0" : : "r"(p)); p += 16; }}
DataHeapType* getDataHeap() { static char buf[sizeof(DataHeapType)]; static DataHeapType* _theDataHeap = new (buf) DataHeapType; return _theDataHeap;}
void setTimer(int msec) { struct itimerval timer;
timer.it_value.tv_sec = (msec - msec % 1000) / 1000; timer.it_value.tv_usec = 1000 * (msec % 1000); timer.it_interval.tv_sec = 0; timer.it_interval.tv_usec = 0; setitimer(ITIMER_REAL, &timer, 0);}
int main(int argc, char **argv) { topFrame = (void**)__builtin_frame_address(0); setHandler(Trap::TrapSignal, onTrap); setHandler(SIGALRM, onTimer); setHandler(SIGSEGV, onFault); for(Function* f: functions) { f->setTrap(); } setTimer(interval); int r = stabilizer_main(argc, argv); return r;}
void setTimer(int msec) { struct itimerval timer;
timer.it_value.tv_sec = (msec - msec % 1000) / 1000; timer.it_value.tv_usec = 1000 * (msec % 1000); timer.it_interval.tv_sec = 0; timer.it_interval.tv_usec = 0; setitimer(ITIMER_REAL, &timer, 0);}
static void flush_icache(void* begin, size_t size) { uintptr_t p = (uintptr_t)begin & ~15UL; for (size_t i = 0; i < size; i += 16) { asm("icbi 0,%0" : : "r"(p)); p += 16; } asm("isync");}
DataHeapType* getDataHeap() { static char buf[sizeof(DataHeapType)]; static DataHeapType* _theDataHeap = new (buf) DataHeapType; return _theDataHeap;}
Was it the code change?
Why is faster than ? A′ A
int main(int argc, char **argv) { topFrame = (void**)__builtin_frame_address(0); setHandler(Trap::TrapSignal, onTrap); setHandler(SIGALRM, onTimer); setHandler(SIGSEGV, onFault); for(Function* f: functions) { f->setTrap(); } setTimer(interval); int r = stabilizer_main(argc, argv); return r;}
static void flush_icache(void* begin, size_t size) { uintptr_t p = (uintptr_t)begin & ~15UL; for (size_t i = 0; i < size; i += 16) { asm("icbi 0,%0" : : "r"(p)); p += 16; }}
DataHeapType* getDataHeap() { static char buf[sizeof(DataHeapType)]; static DataHeapType* _theDataHeap = new (buf) DataHeapType; return _theDataHeap;}
void setTimer(int msec) { struct itimerval timer;
timer.it_value.tv_sec = (msec - msec % 1000) / 1000; timer.it_value.tv_usec = 1000 * (msec % 1000); timer.it_interval.tv_sec = 0; timer.it_interval.tv_usec = 0; setitimer(ITIMER_REAL, &timer, 0);}
int main(int argc, char **argv) { topFrame = (void**)__builtin_frame_address(0); setHandler(Trap::TrapSignal, onTrap); setHandler(SIGALRM, onTimer); setHandler(SIGSEGV, onFault); for(Function* f: functions) { f->setTrap(); } setTimer(interval); int r = stabilizer_main(argc, argv); return r;}
void setTimer(int msec) { struct itimerval timer;
timer.it_value.tv_sec = (msec - msec % 1000) / 1000; timer.it_value.tv_usec = 1000 * (msec % 1000); timer.it_interval.tv_sec = 0; timer.it_interval.tv_usec = 0; setitimer(ITIMER_REAL, &timer, 0);}
static void flush_icache(void* begin, size_t size) { uintptr_t p = (uintptr_t)begin & ~15UL; for (size_t i = 0; i < size; i += 16) { asm("icbi 0,%0" : : "r"(p)); p += 16; } asm("isync");}
DataHeapType* getDataHeap() { static char buf[sizeof(DataHeapType)]; static DataHeapType* _theDataHeap = new (buf) DataHeapType; return _theDataHeap;}
Was it the code change?
Why is faster than ? A′ A
int main(int argc, char **argv) { topFrame = (void**)__builtin_frame_address(0); setHandler(Trap::TrapSignal, onTrap); setHandler(SIGALRM, onTimer); setHandler(SIGSEGV, onFault); for(Function* f: functions) { f->setTrap(); } setTimer(interval); int r = stabilizer_main(argc, argv); return r;}
static void flush_icache(void* begin, size_t size) { uintptr_t p = (uintptr_t)begin & ~15UL; for (size_t i = 0; i < size; i += 16) { asm("icbi 0,%0" : : "r"(p)); p += 16; }}
DataHeapType* getDataHeap() { static char buf[sizeof(DataHeapType)]; static DataHeapType* _theDataHeap = new (buf) DataHeapType; return _theDataHeap;}
void setTimer(int msec) { struct itimerval timer;
timer.it_value.tv_sec = (msec - msec % 1000) / 1000; timer.it_value.tv_usec = 1000 * (msec % 1000); timer.it_interval.tv_sec = 0; timer.it_interval.tv_usec = 0; setitimer(ITIMER_REAL, &timer, 0);}
int main(int argc, char **argv) { topFrame = (void**)__builtin_frame_address(0); setHandler(Trap::TrapSignal, onTrap); setHandler(SIGALRM, onTimer); setHandler(SIGSEGV, onFault); for(Function* f: functions) { f->setTrap(); } setTimer(interval); int r = stabilizer_main(argc, argv); return r;}
void setTimer(int msec) { struct itimerval timer;
timer.it_value.tv_sec = (msec - msec % 1000) / 1000; timer.it_value.tv_usec = 1000 * (msec % 1000); timer.it_interval.tv_sec = 0; timer.it_interval.tv_usec = 0; setitimer(ITIMER_REAL, &timer, 0);}
static void flush_icache(void* begin, size_t size) { uintptr_t p = (uintptr_t)begin & ~15UL; for (size_t i = 0; i < size; i += 16) { asm("icbi 0,%0" : : "r"(p)); p += 16; } asm("isync");}
DataHeapType* getDataHeap() { static char buf[sizeof(DataHeapType)]; static DataHeapType* _theDataHeap = new (buf) DataHeapType; return _theDataHeap;}
Or was it the new layout?
Why is faster than ? A′ A
int main(int argc, char **argv) { topFrame = (void**)__builtin_frame_address(0); setHandler(Trap::TrapSignal, onTrap); setHandler(SIGALRM, onTimer); setHandler(SIGSEGV, onFault); for(Function* f: functions) { f->setTrap(); } setTimer(interval); int r = stabilizer_main(argc, argv); return r;}
static void flush_icache(void* begin, size_t size) { uintptr_t p = (uintptr_t)begin & ~15UL; for (size_t i = 0; i < size; i += 16) { asm("icbi 0,%0" : : "r"(p)); p += 16; }}
DataHeapType* getDataHeap() { static char buf[sizeof(DataHeapType)]; static DataHeapType* _theDataHeap = new (buf) DataHeapType; return _theDataHeap;}
void setTimer(int msec) { struct itimerval timer;
timer.it_value.tv_sec = (msec - msec % 1000) / 1000; timer.it_value.tv_usec = 1000 * (msec % 1000); timer.it_interval.tv_sec = 0; timer.it_interval.tv_usec = 0; setitimer(ITIMER_REAL, &timer, 0);}
int main(int argc, char **argv) { topFrame = (void**)__builtin_frame_address(0); setHandler(Trap::TrapSignal, onTrap); setHandler(SIGALRM, onTimer); setHandler(SIGSEGV, onFault); for(Function* f: functions) { f->setTrap(); } setTimer(interval); int r = stabilizer_main(argc, argv); return r;}
void setTimer(int msec) { struct itimerval timer;
timer.it_value.tv_sec = (msec - msec % 1000) / 1000; timer.it_value.tv_usec = 1000 * (msec % 1000); timer.it_interval.tv_sec = 0; timer.it_interval.tv_usec = 0; setitimer(ITIMER_REAL, &timer, 0);}
static void flush_icache(void* begin, size_t size) { uintptr_t p = (uintptr_t)begin & ~15UL; for (size_t i = 0; i < size; i += 16) { asm("icbi 0,%0" : : "r"(p)); p += 16; } asm("isync");}
DataHeapType* getDataHeap() { static char buf[sizeof(DataHeapType)]; static DataHeapType* _theDataHeap = new (buf) DataHeapType; return _theDataHeap;}
Or was it the new layout?
Why is faster than ? A′ A
int main(int argc, char **argv) { topFrame = (void**)__builtin_frame_address(0); setHandler(Trap::TrapSignal, onTrap); setHandler(SIGALRM, onTimer); setHandler(SIGSEGV, onFault); for(Function* f: functions) { f->setTrap(); } setTimer(interval); int r = stabilizer_main(argc, argv); return r;}
static void flush_icache(void* begin, size_t size) { uintptr_t p = (uintptr_t)begin & ~15UL; for (size_t i = 0; i < size; i += 16) { asm("icbi 0,%0" : : "r"(p)); p += 16; }}
DataHeapType* getDataHeap() { static char buf[sizeof(DataHeapType)]; static DataHeapType* _theDataHeap = new (buf) DataHeapType; return _theDataHeap;}
void setTimer(int msec) { struct itimerval timer;
timer.it_value.tv_sec = (msec - msec % 1000) / 1000; timer.it_value.tv_usec = 1000 * (msec % 1000); timer.it_interval.tv_sec = 0; timer.it_interval.tv_usec = 0; setitimer(ITIMER_REAL, &timer, 0);}
int main(int argc, char **argv) { topFrame = (void**)__builtin_frame_address(0); setHandler(Trap::TrapSignal, onTrap); setHandler(SIGALRM, onTimer); setHandler(SIGSEGV, onFault); for(Function* f: functions) { f->setTrap(); } setTimer(interval); int r = stabilizer_main(argc, argv); return r;}
void setTimer(int msec) { struct itimerval timer;
timer.it_value.tv_sec = (msec - msec % 1000) / 1000; timer.it_value.tv_usec = 1000 * (msec % 1000); timer.it_interval.tv_sec = 0; timer.it_interval.tv_usec = 0; setitimer(ITIMER_REAL, &timer, 0);}
static void flush_icache(void* begin, size_t size) { uintptr_t p = (uintptr_t)begin & ~15UL; for (size_t i = 0; i < size; i += 16) { asm("icbi 0,%0" : : "r"(p)); p += 16; } asm("isync");}
DataHeapType* getDataHeap() { static char buf[sizeof(DataHeapType)]; static DataHeapType* _theDataHeap = new (buf) DataHeapType; return _theDataHeap;}
Mytkowicz et al. (ASPLOS’09)Layout biases measurement
Layout biases measurementMytkowicz et al. (ASPLOS’09)
Layout biases measurementMytkowicz et al. (ASPLOS’09)
Link OrderChanges function addresses
Layout biases measurementMytkowicz et al. (ASPLOS’09)
Link Order
Environment Variable Size
Changes function addresses
Moves the program stack
Layout biases measurementMytkowicz et al. (ASPLOS’09)
Link Order
Environment Variable Size
Changes function addresses
Moves the program stack
Larger than the impact of -O3
Blame the cache
int main(int argc, char **argv) { topFrame = (void**)__builtin_frame_address(0); setHandler(Trap::TrapSignal, onTrap); setHandler(SIGALRM, onTimer); setHandler(SIGSEGV, onFault); for(Function* f: functions) { f->setTrap(); } setTimer(interval); int r = stabilizer_main(argc, argv); return r;}
void setTimer(int msec) { struct itimerval timer;
timer.it_value.tv_sec = (msec - msec % 1000) / 1000; timer.it_value.tv_usec = 1000 * (msec % 1000); timer.it_interval.tv_sec = 0; timer.it_interval.tv_usec = 0; setitimer(ITIMER_REAL, &timer, 0);}
static void flush_icache(void* begin, size_t size) { uintptr_t p = (uintptr_t)begin & ~15UL; for (size_t i = 0; i < size; i += 16) { asm("icbi 0,%0" : : "r"(p)); p += 16; } asm("isync");}
DataHeapType* getDataHeap() { static char buf[sizeof(DataHeapType)]; static DataHeapType* _theDataHeap = new (buf) DataHeapType; return _theDataHeap;}
A
Blame the cacheA
int main(int argc, char **argv) { topFrame = (void**)__builtin_frame_address(0); setHandler(Trap::TrapSignal, onTrap); setHandler(SIGALRM, onTimer); setHandler(SIGSEGV, onFault); for(Function* f: functions) { f->setTrap(); } setTimer(interval); int r = stabilizer_main(argc, argv); return r;}
void setTimer(int msec) { struct itimerval timer;
timer.it_value.tv_sec = (msec - msec % 1000) / 1000; timer.it_value.tv_usec = 1000 * (msec % 1000); timer.it_interval.tv_sec = 0; timer.it_interval.tv_usec = 0; setitimer(ITIMER_REAL, &timer, 0);}
static void flush_icache(void* begin, size_t size) { uintptr_t p = (uintptr_t)begin & ~15UL; for (size_t i = 0; i < size; i += 16) { asm("icbi 0,%0" : : "r"(p)); p += 16; } asm("isync");}
DataHeapType* getDataHeap() { static char buf[sizeof(DataHeapType)]; static DataHeapType* _theDataHeap = new (buf) DataHeapType; return _theDataHeap;}
int main(int argc, char **argv) { topFrame = (void**)__builtin_frame_address(0); setHandler(Trap::TrapSignal, onTrap); setHandler(SIGALRM, onTimer); setHandler(SIGSEGV, onFault); for(Function* f: functions) { f->setTrap(); } setTimer(interval); int r = stabilizer_main(argc, argv); return r;}
void setTimer(int msec) { struct itimerval timer;
timer.it_value.tv_sec = (msec - msec % 1000) / 1000; timer.it_value.tv_usec = 1000 * (msec % 1000); timer.it_interval.tv_sec = 0; timer.it_interval.tv_usec = 0; setitimer(ITIMER_REAL, &timer, 0);}
static void flush_icache(void* begin, size_t size) { uintptr_t p = (uintptr_t)begin & ~15UL; for (size_t i = 0; i < size; i += 16) { asm("icbi 0,%0" : : "r"(p)); p += 16; } asm("isync");}
DataHeapType* getDataHeap() { static char buf[sizeof(DataHeapType)]; static DataHeapType* _theDataHeap = new (buf) DataHeapType; return _theDataHeap;}
map to same cache set
Blame the cacheA
int main(int argc, char **argv) { topFrame = (void**)__builtin_frame_address(0); setHandler(Trap::TrapSignal, onTrap); setHandler(SIGALRM, onTimer); setHandler(SIGSEGV, onFault); for(Function* f: functions) { f->setTrap(); } setTimer(interval); int r = stabilizer_main(argc, argv); return r;}
void setTimer(int msec) { struct itimerval timer;
timer.it_value.tv_sec = (msec - msec % 1000) / 1000; timer.it_value.tv_usec = 1000 * (msec % 1000); timer.it_interval.tv_sec = 0; timer.it_interval.tv_usec = 0; setitimer(ITIMER_REAL, &timer, 0);}
static void flush_icache(void* begin, size_t size) { uintptr_t p = (uintptr_t)begin & ~15UL; for (size_t i = 0; i < size; i += 16) { asm("icbi 0,%0" : : "r"(p)); p += 16; } asm("isync");}
DataHeapType* getDataHeap() { static char buf[sizeof(DataHeapType)]; static DataHeapType* _theDataHeap = new (buf) DataHeapType; return _theDataHeap;}
int main(int argc, char **argv) { topFrame = (void**)__builtin_frame_address(0); setHandler(Trap::TrapSignal, onTrap); setHandler(SIGALRM, onTimer); setHandler(SIGSEGV, onFault); for(Function* f: functions) { f->setTrap(); } setTimer(interval); int r = stabilizer_main(argc, argv); return r;}
void setTimer(int msec) { struct itimerval timer;
timer.it_value.tv_sec = (msec - msec % 1000) / 1000; timer.it_value.tv_usec = 1000 * (msec % 1000); timer.it_interval.tv_sec = 0; timer.it_interval.tv_usec = 0; setitimer(ITIMER_REAL, &timer, 0);}
static void flush_icache(void* begin, size_t size) { uintptr_t p = (uintptr_t)begin & ~15UL; for (size_t i = 0; i < size; i += 16) { asm("icbi 0,%0" : : "r"(p)); p += 16; } asm("isync");}
DataHeapType* getDataHeap() { static char buf[sizeof(DataHeapType)]; static DataHeapType* _theDataHeap = new (buf) DataHeapType; return _theDataHeap;}
conflict
map to same cache set
A′
Blame the cacheA
int main(int argc, char **argv) { topFrame = (void**)__builtin_frame_address(0); setHandler(Trap::TrapSignal, onTrap); setHandler(SIGALRM, onTimer); setHandler(SIGSEGV, onFault); for(Function* f: functions) { f->setTrap(); } setTimer(interval); int r = stabilizer_main(argc, argv); return r;}
static void flush_icache(void* begin, size_t size) { uintptr_t p = (uintptr_t)begin & ~15UL; for (size_t i = 0; i < size; i += 16) { asm("icbi 0,%0" : : "r"(p)); p += 16; }}
DataHeapType* getDataHeap() { static char buf[sizeof(DataHeapType)]; static DataHeapType* _theDataHeap = new (buf) DataHeapType; return _theDataHeap;}
void setTimer(int msec) { struct itimerval timer;
timer.it_value.tv_sec = (msec - msec % 1000) / 1000; timer.it_value.tv_usec = 1000 * (msec % 1000); timer.it_interval.tv_sec = 0; timer.it_interval.tv_usec = 0; setitimer(ITIMER_REAL, &timer, 0);}
int main(int argc, char **argv) { topFrame = (void**)__builtin_frame_address(0); setHandler(Trap::TrapSignal, onTrap); setHandler(SIGALRM, onTimer); setHandler(SIGSEGV, onFault); for(Function* f: functions) { f->setTrap(); } setTimer(interval); int r = stabilizer_main(argc, argv); return r;}
void setTimer(int msec) { struct itimerval timer;
timer.it_value.tv_sec = (msec - msec % 1000) / 1000; timer.it_value.tv_usec = 1000 * (msec % 1000); timer.it_interval.tv_sec = 0; timer.it_interval.tv_usec = 0; setitimer(ITIMER_REAL, &timer, 0);}
static void flush_icache(void* begin, size_t size) { uintptr_t p = (uintptr_t)begin & ~15UL; for (size_t i = 0; i < size; i += 16) { asm("icbi 0,%0" : : "r"(p)); p += 16; } asm("isync");}
DataHeapType* getDataHeap() { static char buf[sizeof(DataHeapType)]; static DataHeapType* _theDataHeap = new (buf) DataHeapType; return _theDataHeap;}
Blame the cacheA
int main(int argc, char **argv) { topFrame = (void**)__builtin_frame_address(0); setHandler(Trap::TrapSignal, onTrap); setHandler(SIGALRM, onTimer); setHandler(SIGSEGV, onFault); for(Function* f: functions) { f->setTrap(); } setTimer(interval); int r = stabilizer_main(argc, argv); return r;}
static void flush_icache(void* begin, size_t size) { uintptr_t p = (uintptr_t)begin & ~15UL; for (size_t i = 0; i < size; i += 16) { asm("icbi 0,%0" : : "r"(p)); p += 16; }}
DataHeapType* getDataHeap() { static char buf[sizeof(DataHeapType)]; static DataHeapType* _theDataHeap = new (buf) DataHeapType; return _theDataHeap;}
void setTimer(int msec) { struct itimerval timer;
timer.it_value.tv_sec = (msec - msec % 1000) / 1000; timer.it_value.tv_usec = 1000 * (msec % 1000); timer.it_interval.tv_sec = 0; timer.it_interval.tv_usec = 0; setitimer(ITIMER_REAL, &timer, 0);}
int main(int argc, char **argv) { topFrame = (void**)__builtin_frame_address(0); setHandler(Trap::TrapSignal, onTrap); setHandler(SIGALRM, onTimer); setHandler(SIGSEGV, onFault); for(Function* f: functions) { f->setTrap(); } setTimer(interval); int r = stabilizer_main(argc, argv); return r;}
void setTimer(int msec) { struct itimerval timer;
timer.it_value.tv_sec = (msec - msec % 1000) / 1000; timer.it_value.tv_usec = 1000 * (msec % 1000); timer.it_interval.tv_sec = 0; timer.it_interval.tv_usec = 0; setitimer(ITIMER_REAL, &timer, 0);}
static void flush_icache(void* begin, size_t size) { uintptr_t p = (uintptr_t)begin & ~15UL; for (size_t i = 0; i < size; i += 16) { asm("icbi 0,%0" : : "r"(p)); p += 16; } asm("isync");}
DataHeapType* getDataHeap() { static char buf[sizeof(DataHeapType)]; static DataHeapType* _theDataHeap = new (buf) DataHeapType; return _theDataHeap;}
A′
Blame the cacheA
int main(int argc, char **argv) { topFrame = (void**)__builtin_frame_address(0); setHandler(Trap::TrapSignal, onTrap); setHandler(SIGALRM, onTimer); setHandler(SIGSEGV, onFault); for(Function* f: functions) { f->setTrap(); } setTimer(interval); int r = stabilizer_main(argc, argv); return r;}
static void flush_icache(void* begin, size_t size) { uintptr_t p = (uintptr_t)begin & ~15UL; for (size_t i = 0; i < size; i += 16) { asm("icbi 0,%0" : : "r"(p)); p += 16; }}
DataHeapType* getDataHeap() { static char buf[sizeof(DataHeapType)]; static DataHeapType* _theDataHeap = new (buf) DataHeapType; return _theDataHeap;}
void setTimer(int msec) { struct itimerval timer;
timer.it_value.tv_sec = (msec - msec % 1000) / 1000; timer.it_value.tv_usec = 1000 * (msec % 1000); timer.it_interval.tv_sec = 0; timer.it_interval.tv_usec = 0; setitimer(ITIMER_REAL, &timer, 0);}
int main(int argc, char **argv) { topFrame = (void**)__builtin_frame_address(0); setHandler(Trap::TrapSignal, onTrap); setHandler(SIGALRM, onTimer); setHandler(SIGSEGV, onFault); for(Function* f: functions) { f->setTrap(); } setTimer(interval); int r = stabilizer_main(argc, argv); return r;}
void setTimer(int msec) { struct itimerval timer;
timer.it_value.tv_sec = (msec - msec % 1000) / 1000; timer.it_value.tv_usec = 1000 * (msec % 1000); timer.it_interval.tv_sec = 0; timer.it_interval.tv_usec = 0; setitimer(ITIMER_REAL, &timer, 0);}
static void flush_icache(void* begin, size_t size) { uintptr_t p = (uintptr_t)begin & ~15UL; for (size_t i = 0; i < size; i += 16) { asm("icbi 0,%0" : : "r"(p)); p += 16; } asm("isync");}
DataHeapType* getDataHeap() { static char buf[sizeof(DataHeapType)]; static DataHeapType* _theDataHeap = new (buf) DataHeapType; return _theDataHeap;}
map to same set
A′
Blame the cacheA
int main(int argc, char **argv) { topFrame = (void**)__builtin_frame_address(0); setHandler(Trap::TrapSignal, onTrap); setHandler(SIGALRM, onTimer); setHandler(SIGSEGV, onFault); for(Function* f: functions) { f->setTrap(); } setTimer(interval); int r = stabilizer_main(argc, argv); return r;}
static void flush_icache(void* begin, size_t size) { uintptr_t p = (uintptr_t)begin & ~15UL; for (size_t i = 0; i < size; i += 16) { asm("icbi 0,%0" : : "r"(p)); p += 16; }}
DataHeapType* getDataHeap() { static char buf[sizeof(DataHeapType)]; static DataHeapType* _theDataHeap = new (buf) DataHeapType; return _theDataHeap;}
void setTimer(int msec) { struct itimerval timer;
timer.it_value.tv_sec = (msec - msec % 1000) / 1000; timer.it_value.tv_usec = 1000 * (msec % 1000); timer.it_interval.tv_sec = 0; timer.it_interval.tv_usec = 0; setitimer(ITIMER_REAL, &timer, 0);}
int main(int argc, char **argv) { topFrame = (void**)__builtin_frame_address(0); setHandler(Trap::TrapSignal, onTrap); setHandler(SIGALRM, onTimer); setHandler(SIGSEGV, onFault); for(Function* f: functions) { f->setTrap(); } setTimer(interval); int r = stabilizer_main(argc, argv); return r;}
void setTimer(int msec) { struct itimerval timer;
timer.it_value.tv_sec = (msec - msec % 1000) / 1000; timer.it_value.tv_usec = 1000 * (msec % 1000); timer.it_interval.tv_sec = 0; timer.it_interval.tv_usec = 0; setitimer(ITIMER_REAL, &timer, 0);}
static void flush_icache(void* begin, size_t size) { uintptr_t p = (uintptr_t)begin & ~15UL; for (size_t i = 0; i < size; i += 16) { asm("icbi 0,%0" : : "r"(p)); p += 16; } asm("isync");}
DataHeapType* getDataHeap() { static char buf[sizeof(DataHeapType)]; static DataHeapType* _theDataHeap = new (buf) DataHeapType; return _theDataHeap;}
map to same set
A′
Nothing here
Blame the cacheA
int main(int argc, char **argv) { topFrame = (void**)__builtin_frame_address(0); setHandler(Trap::TrapSignal, onTrap); setHandler(SIGALRM, onTimer); setHandler(SIGSEGV, onFault); for(Function* f: functions) { f->setTrap(); } setTimer(interval); int r = stabilizer_main(argc, argv); return r;}
static void flush_icache(void* begin, size_t size) { uintptr_t p = (uintptr_t)begin & ~15UL; for (size_t i = 0; i < size; i += 16) { asm("icbi 0,%0" : : "r"(p)); p += 16; }}
DataHeapType* getDataHeap() { static char buf[sizeof(DataHeapType)]; static DataHeapType* _theDataHeap = new (buf) DataHeapType; return _theDataHeap;}
void setTimer(int msec) { struct itimerval timer;
timer.it_value.tv_sec = (msec - msec % 1000) / 1000; timer.it_value.tv_usec = 1000 * (msec % 1000); timer.it_interval.tv_sec = 0; timer.it_interval.tv_usec = 0; setitimer(ITIMER_REAL, &timer, 0);}
int main(int argc, char **argv) { topFrame = (void**)__builtin_frame_address(0); setHandler(Trap::TrapSignal, onTrap); setHandler(SIGALRM, onTimer); setHandler(SIGSEGV, onFault); for(Function* f: functions) { f->setTrap(); } setTimer(interval); int r = stabilizer_main(argc, argv); return r;}
void setTimer(int msec) { struct itimerval timer;
timer.it_value.tv_sec = (msec - msec % 1000) / 1000; timer.it_value.tv_usec = 1000 * (msec % 1000); timer.it_interval.tv_sec = 0; timer.it_interval.tv_usec = 0; setitimer(ITIMER_REAL, &timer, 0);}
static void flush_icache(void* begin, size_t size) { uintptr_t p = (uintptr_t)begin & ~15UL; for (size_t i = 0; i < size; i += 16) { asm("icbi 0,%0" : : "r"(p)); p += 16; } asm("isync");}
DataHeapType* getDataHeap() { static char buf[sizeof(DataHeapType)]; static DataHeapType* _theDataHeap = new (buf) DataHeapType; return _theDataHeap;}
map to same set
A′
Nothing here
no conflict
A′
Blame the cacheA
int main(int argc, char **argv) { topFrame = (void**)__builtin_frame_address(0); setHandler(Trap::TrapSignal, onTrap); setHandler(SIGALRM, onTimer); setHandler(SIGSEGV, onFault); for(Function* f: functions) { f->setTrap(); } setTimer(interval); int r = stabilizer_main(argc, argv); return r;}
static void flush_icache(void* begin, size_t size) { uintptr_t p = (uintptr_t)begin & ~15UL; for (size_t i = 0; i < size; i += 16) { asm("icbi 0,%0" : : "r"(p)); p += 16; }}
DataHeapType* getDataHeap() { static char buf[sizeof(DataHeapType)]; static DataHeapType* _theDataHeap = new (buf) DataHeapType; return _theDataHeap;}
void setTimer(int msec) { struct itimerval timer;
timer.it_value.tv_sec = (msec - msec % 1000) / 1000; timer.it_value.tv_usec = 1000 * (msec % 1000); timer.it_interval.tv_sec = 0; timer.it_interval.tv_usec = 0; setitimer(ITIMER_REAL, &timer, 0);}
int main(int argc, char **argv) { topFrame = (void**)__builtin_frame_address(0); setHandler(Trap::TrapSignal, onTrap); setHandler(SIGALRM, onTimer); setHandler(SIGSEGV, onFault); for(Function* f: functions) { f->setTrap(); } setTimer(interval); int r = stabilizer_main(argc, argv); return r;}
void setTimer(int msec) { struct itimerval timer;
timer.it_value.tv_sec = (msec - msec % 1000) / 1000; timer.it_value.tv_usec = 1000 * (msec % 1000); timer.it_interval.tv_sec = 0; timer.it_interval.tv_usec = 0; setitimer(ITIMER_REAL, &timer, 0);}
static void flush_icache(void* begin, size_t size) { uintptr_t p = (uintptr_t)begin & ~15UL; for (size_t i = 0; i < size; i += 16) { asm("icbi 0,%0" : : "r"(p)); p += 16; } asm("isync");}
DataHeapType* getDataHeap() { static char buf[sizeof(DataHeapType)]; static DataHeapType* _theDataHeap = new (buf) DataHeapType; return _theDataHeap;}
A′
Blame the cacheA
int main(int argc, char **argv) { topFrame = (void**)__builtin_frame_address(0); setHandler(Trap::TrapSignal, onTrap); setHandler(SIGALRM, onTimer); setHandler(SIGSEGV, onFault); for(Function* f: functions) { f->setTrap(); } setTimer(interval); int r = stabilizer_main(argc, argv); return r;}
static void flush_icache(void* begin, size_t size) { uintptr_t p = (uintptr_t)begin & ~15UL; for (size_t i = 0; i < size; i += 16) { asm("icbi 0,%0" : : "r"(p)); p += 16; }}
DataHeapType* getDataHeap() { static char buf[sizeof(DataHeapType)]; static DataHeapType* _theDataHeap = new (buf) DataHeapType; return _theDataHeap;}
void setTimer(int msec) { struct itimerval timer;
timer.it_value.tv_sec = (msec - msec % 1000) / 1000; timer.it_value.tv_usec = 1000 * (msec % 1000); timer.it_interval.tv_sec = 0; timer.it_interval.tv_usec = 0; setitimer(ITIMER_REAL, &timer, 0);}
int main(int argc, char **argv) { topFrame = (void**)__builtin_frame_address(0); setHandler(Trap::TrapSignal, onTrap); setHandler(SIGALRM, onTimer); setHandler(SIGSEGV, onFault); for(Function* f: functions) { f->setTrap(); } setTimer(interval); int r = stabilizer_main(argc, argv); return r;}
void setTimer(int msec) { struct itimerval timer;
timer.it_value.tv_sec = (msec - msec % 1000) / 1000; timer.it_value.tv_usec = 1000 * (msec % 1000); timer.it_interval.tv_sec = 0; timer.it_interval.tv_usec = 0; setitimer(ITIMER_REAL, &timer, 0);}
static void flush_icache(void* begin, size_t size) { uintptr_t p = (uintptr_t)begin & ~15UL; for (size_t i = 0; i < size; i += 16) { asm("icbi 0,%0" : : "r"(p)); p += 16; } asm("isync");}
DataHeapType* getDataHeap() { static char buf[sizeof(DataHeapType)]; static DataHeapType* _theDataHeap = new (buf) DataHeapType; return _theDataHeap;}
or branch predictor
A′
Blame the cacheA
int main(int argc, char **argv) { topFrame = (void**)__builtin_frame_address(0); setHandler(Trap::TrapSignal, onTrap); setHandler(SIGALRM, onTimer); setHandler(SIGSEGV, onFault); for(Function* f: functions) { f->setTrap(); } setTimer(interval); int r = stabilizer_main(argc, argv); return r;}
static void flush_icache(void* begin, size_t size) { uintptr_t p = (uintptr_t)begin & ~15UL; for (size_t i = 0; i < size; i += 16) { asm("icbi 0,%0" : : "r"(p)); p += 16; }}
DataHeapType* getDataHeap() { static char buf[sizeof(DataHeapType)]; static DataHeapType* _theDataHeap = new (buf) DataHeapType; return _theDataHeap;}
void setTimer(int msec) { struct itimerval timer;
timer.it_value.tv_sec = (msec - msec % 1000) / 1000; timer.it_value.tv_usec = 1000 * (msec % 1000); timer.it_interval.tv_sec = 0; timer.it_interval.tv_usec = 0; setitimer(ITIMER_REAL, &timer, 0);}
int main(int argc, char **argv) { topFrame = (void**)__builtin_frame_address(0); setHandler(Trap::TrapSignal, onTrap); setHandler(SIGALRM, onTimer); setHandler(SIGSEGV, onFault); for(Function* f: functions) { f->setTrap(); } setTimer(interval); int r = stabilizer_main(argc, argv); return r;}
void setTimer(int msec) { struct itimerval timer;
timer.it_value.tv_sec = (msec - msec % 1000) / 1000; timer.it_value.tv_usec = 1000 * (msec % 1000); timer.it_interval.tv_sec = 0; timer.it_interval.tv_usec = 0; setitimer(ITIMER_REAL, &timer, 0);}
static void flush_icache(void* begin, size_t size) { uintptr_t p = (uintptr_t)begin & ~15UL; for (size_t i = 0; i < size; i += 16) { asm("icbi 0,%0" : : "r"(p)); p += 16; } asm("isync");}
DataHeapType* getDataHeap() { static char buf[sizeof(DataHeapType)]; static DataHeapType* _theDataHeap = new (buf) DataHeapType; return _theDataHeap;}
or branch predictor
or TLB
A′
Blame the cacheA
int main(int argc, char **argv) { topFrame = (void**)__builtin_frame_address(0); setHandler(Trap::TrapSignal, onTrap); setHandler(SIGALRM, onTimer); setHandler(SIGSEGV, onFault); for(Function* f: functions) { f->setTrap(); } setTimer(interval); int r = stabilizer_main(argc, argv); return r;}
static void flush_icache(void* begin, size_t size) { uintptr_t p = (uintptr_t)begin & ~15UL; for (size_t i = 0; i < size; i += 16) { asm("icbi 0,%0" : : "r"(p)); p += 16; }}
DataHeapType* getDataHeap() { static char buf[sizeof(DataHeapType)]; static DataHeapType* _theDataHeap = new (buf) DataHeapType; return _theDataHeap;}
void setTimer(int msec) { struct itimerval timer;
timer.it_value.tv_sec = (msec - msec % 1000) / 1000; timer.it_value.tv_usec = 1000 * (msec % 1000); timer.it_interval.tv_sec = 0; timer.it_interval.tv_usec = 0; setitimer(ITIMER_REAL, &timer, 0);}
int main(int argc, char **argv) { topFrame = (void**)__builtin_frame_address(0); setHandler(Trap::TrapSignal, onTrap); setHandler(SIGALRM, onTimer); setHandler(SIGSEGV, onFault); for(Function* f: functions) { f->setTrap(); } setTimer(interval); int r = stabilizer_main(argc, argv); return r;}
void setTimer(int msec) { struct itimerval timer;
timer.it_value.tv_sec = (msec - msec % 1000) / 1000; timer.it_value.tv_usec = 1000 * (msec % 1000); timer.it_interval.tv_sec = 0; timer.it_interval.tv_usec = 0; setitimer(ITIMER_REAL, &timer, 0);}
static void flush_icache(void* begin, size_t size) { uintptr_t p = (uintptr_t)begin & ~15UL; for (size_t i = 0; i < size; i += 16) { asm("icbi 0,%0" : : "r"(p)); p += 16; } asm("isync");}
DataHeapType* getDataHeap() { static char buf[sizeof(DataHeapType)]; static DataHeapType* _theDataHeap = new (buf) DataHeapType; return _theDataHeap;}
or branch predictor
or TLB
or branch target predictor
A′
Blame the cacheA
int main(int argc, char **argv) { topFrame = (void**)__builtin_frame_address(0); setHandler(Trap::TrapSignal, onTrap); setHandler(SIGALRM, onTimer); setHandler(SIGSEGV, onFault); for(Function* f: functions) { f->setTrap(); } setTimer(interval); int r = stabilizer_main(argc, argv); return r;}
static void flush_icache(void* begin, size_t size) { uintptr_t p = (uintptr_t)begin & ~15UL; for (size_t i = 0; i < size; i += 16) { asm("icbi 0,%0" : : "r"(p)); p += 16; }}
DataHeapType* getDataHeap() { static char buf[sizeof(DataHeapType)]; static DataHeapType* _theDataHeap = new (buf) DataHeapType; return _theDataHeap;}
void setTimer(int msec) { struct itimerval timer;
timer.it_value.tv_sec = (msec - msec % 1000) / 1000; timer.it_value.tv_usec = 1000 * (msec % 1000); timer.it_interval.tv_sec = 0; timer.it_interval.tv_usec = 0; setitimer(ITIMER_REAL, &timer, 0);}
int main(int argc, char **argv) { topFrame = (void**)__builtin_frame_address(0); setHandler(Trap::TrapSignal, onTrap); setHandler(SIGALRM, onTimer); setHandler(SIGSEGV, onFault); for(Function* f: functions) { f->setTrap(); } setTimer(interval); int r = stabilizer_main(argc, argv); return r;}
void setTimer(int msec) { struct itimerval timer;
timer.it_value.tv_sec = (msec - msec % 1000) / 1000; timer.it_value.tv_usec = 1000 * (msec % 1000); timer.it_interval.tv_sec = 0; timer.it_interval.tv_usec = 0; setitimer(ITIMER_REAL, &timer, 0);}
static void flush_icache(void* begin, size_t size) { uintptr_t p = (uintptr_t)begin & ~15UL; for (size_t i = 0; i < size; i += 16) { asm("icbi 0,%0" : : "r"(p)); p += 16; } asm("isync");}
DataHeapType* getDataHeap() { static char buf[sizeof(DataHeapType)]; static DataHeapType* _theDataHeap = new (buf) DataHeapType; return _theDataHeap;}
or branch predictor
or TLB
or prefetcher
or branch target predictor
Blame the hashA
int main(int argc, char **argv) { topFrame = (void**)__builtin_frame_address(0); setHandler(Trap::TrapSignal, onTrap); setHandler(SIGALRM, onTimer); setHandler(SIGSEGV, onFault); for(Function* f: functions) { f->setTrap(); } setTimer(interval); int r = stabilizer_main(argc, argv); return r;}
static void flush_icache(void* begin, size_t size) { uintptr_t p = (uintptr_t)begin & ~15UL; for (size_t i = 0; i < size; i += 16) { asm("icbi 0,%0" : : "r"(p)); p += 16; }}
DataHeapType* getDataHeap() { static char buf[sizeof(DataHeapType)]; static DataHeapType* _theDataHeap = new (buf) DataHeapType; return _theDataHeap;}
void setTimer(int msec) { struct itimerval timer;
timer.it_value.tv_sec = (msec - msec % 1000) / 1000; timer.it_value.tv_usec = 1000 * (msec % 1000); timer.it_interval.tv_sec = 0; timer.it_interval.tv_usec = 0; setitimer(ITIMER_REAL, &timer, 0);}
int main(int argc, char **argv) { topFrame = (void**)__builtin_frame_address(0); setHandler(Trap::TrapSignal, onTrap); setHandler(SIGALRM, onTimer); setHandler(SIGSEGV, onFault); for(Function* f: functions) { f->setTrap(); } setTimer(interval); int r = stabilizer_main(argc, argv); return r;}
void setTimer(int msec) { struct itimerval timer;
timer.it_value.tv_sec = (msec - msec % 1000) / 1000; timer.it_value.tv_usec = 1000 * (msec % 1000); timer.it_interval.tv_sec = 0; timer.it_interval.tv_usec = 0; setitimer(ITIMER_REAL, &timer, 0);}
static void flush_icache(void* begin, size_t size) { uintptr_t p = (uintptr_t)begin & ~15UL; for (size_t i = 0; i < size; i += 16) { asm("icbi 0,%0" : : "r"(p)); p += 16; } asm("isync");}
DataHeapType* getDataHeap() { static char buf[sizeof(DataHeapType)]; static DataHeapType* _theDataHeap = new (buf) DataHeapType; return _theDataHeap;}
A′
Is faster than ? A′ A
it’s faster
Is faster than ? A′ A
Let’s do a poll
it’s faster it’s faster
it’s faster
it’s faster
Is faster than ? A′ A
Let’s do a poll
Do we trust this?
it’s faster it’s faster
it’s faster
it’s faster
But it’s faster
Is faster than ? A′ A
it’s faster it’s slower they’re the same
But it’s faster
Is faster than ? A′ A
it’s faster it’s slower they’re the same
But it ran faster!
it’s faster it’s slower they’re the same
But it ran faster!What if we only talk to Bob?
it’s faster
But it ran faster!What if we only use this layout?
it’s slower they’re the same
it’s faster
But it ran faster!What if we only use this layout?
it’s faster
But it ran faster!What if we only use this layout?
Upgrade libc
it’s faster
But it ran faster!What if we only use this layout?
Upgrade libcChanges layout
it’s faster
But it ran faster!What if we only use this layout?
Change Username
it’s faster
But it ran faster!What if we only use this layout?
Change UsernameChanges layout
Layout is Brittle
it’s faster
But it ran faster!What if we only use this layout?
Run in a new directory
Layout is Brittle
it’s faster
But it ran faster!What if we only use this layout?
Run in a new directory
Changes layout
Layout is Brittle
But it ran faster!What if we only use this layout?
Layout is Brittle
But it ran faster!What if we only use this layout?
Layout biases measurementMytkowicz et al. (ASPLOS’09)
Layout is Brittle
But it ran faster!What if we only use this layout?
Layout biases measurementMytkowicz et al. (ASPLOS’09)
Can we eliminate the effect of layout?
But it ran faster!What if we only use this layout?
Layout biases measurement
Can we eliminate the effect of layout?
YES
STABILIZERMemory layout affects performance
STABILIZER eliminates the effect of layoutenables sound performance evaluation
evaluation of LLVM’s optimizations with STABILIZERCase Studies
makes performance evaluation difficult
STABILIZERMemory layout affects performance
STABILIZER eliminates the effect of layoutenables sound performance evaluation
evaluation of LLVM’s optimizations with STABILIZERCase Studies
makes performance evaluation difficult
Layout biases measurement
STABILIZERrandomizes layout
Layout biases measurement
STABILIZER
function addresses
randomizes layout
Layout biases measurement
STABILIZER
function addresses stack frame sizes
randomizes layout
Layout biases measurement
STABILIZER
function addresses stack frame sizesheap allocations
randomizes layout
STABILIZERrandomizes layout
function addresses stack frame sizesheap allocations
repeatedly
Layout biases measurement
STABILIZERrandomizes layout
function addresses stack frame sizesheap allocations
repeatedly
Layout biases measurement
during execution
STABILIZERrandomizes layout
function addresses stack frame sizesheap allocations
repeatedly
Layout biases measurement
STABILIZERrandomizes layout
function addresses stack frame sizesheap allocations
repeatedly
Layout biases measurementa completely random layout
cannot bias results
A′A ×30 ×30
Sound Performance Evaluation
A ×30 ×30
0%
10%
20%
30%
40%
85.0 87.5 90.0 92.5 95.0Time (s)
Perc
ent o
f Obs
erve
d Ru
ntim
es
A′
Sound Performance Evaluation
Is faster than ? A′ A
0%
10%
20%
30%
40%
85.0 87.5 90.0 92.5 95.0Time (s)
Perc
ent o
f Obs
erve
d Ru
ntim
es
Is faster than ?A
0%
10%
20%
30%
40%
85.0 87.5 90.0 92.5 95.0Time (s)
Perc
ent o
f Obs
erve
d Ru
ntim
es
A′
Is faster than ?A
0%
10%
20%
30%
40%
85.0 87.5 90.0 92.5 95.0Time (s)
Perc
ent o
f Obs
erve
d Ru
ntim
es
A′
Is faster than ?A
0%
10%
20%
30%
40%
85.0 87.5 90.0 92.5 95.0Time (s)
Perc
ent o
f Obs
erve
d Ru
ntim
es
A′
Is faster than ?A
0%
10%
20%
30%
40%
85.0 87.5 90.0 92.5 95.0Time (s)
Perc
ent o
f Obs
erve
d Ru
ntim
es
A′
0%
10%
20%
30%
40%
85.0 87.5 90.0 92.5 95.0Time (s)
Perc
ent o
f Obs
erve
d Ru
ntim
es
A A′
The Statistical Approach
If =
0%
10%
20%
30%
40%
85.0 87.5 90.0 92.5 95.0Time (s)
Perc
ent o
f Obs
erve
d Ru
ntim
es
A A′
The Statistical Approach
If =
hypothesis testing
0%
10%
20%
30%
40%
85.0 87.5 90.0 92.5 95.0Time (s)
Perc
ent o
f Obs
erve
d Ru
ntim
es
A A′what is the probability of measuring
a speedup this large by chance?
The Statistical Approach
If =
hypothesis testing
what is the probability of measuring a speedup this large by chance?
0%
10%
20%
30%
40%
85.0 87.5 90.0 92.5 95.0Time (s)
Perc
ent o
f Obs
erve
d Ru
ntim
es
easy to compute for the normal distribution
A A′If =
easy to compute for the normal distribution
A A′If =
STABILIZERrandomizes layoutrepeatedly
0%
10%
20%
30%
40%
85.0 87.5 90.0 92.5 95.0Time (s)
Perc
ent o
f Obs
erve
d Ru
ntim
es
what is the probability of measuring a speedup this large by chance?
STABILIZERrandomizes layoutrepeatedly
0%
10%
20%
30%
40%
85.0 87.5 90.0 92.5 95.0Time (s)
Perc
ent o
f Obs
erve
d Ru
ntim
es
what is the probability of measuring a speedup this large by chance?
if there is alow probability
STABILIZERrandomizes layoutrepeatedly
this speedup is real
0%
10%
20%
30%
40%
85.0 87.5 90.0 92.5 95.0Time (s)
Perc
ent o
f Obs
erve
d Ru
ntim
es
STABILIZERrandomizes layoutrepeatedly
this speedup is real
0%
10%
20%
30%
40%
85.0 87.5 90.0 92.5 95.0Time (s)
Perc
ent o
f Obs
erve
d Ru
ntim
es
not due to the effect on memory layout
this speedup is real
0%
10%
20%
30%
40%
85.0 87.5 90.0 92.5 95.0Time (s)
Perc
ent o
f Obs
erve
d Ru
ntim
es
Sound Performance EvaluationSTABILIZER
randomizes layoutrepeatedly
what does re-randomization do?
not due to the effect on memory layout
0.0
0.2
0.4
350 360 370 380 390 400Time (s)
Prob
abili
ty D
ensi
tySTABILIZER
randomizes layoutrepeatedly
0.0
0.2
0.4
350 360 370 380 390 400Time (s)
Prob
abili
ty D
ensi
tySTABILIZER
randomizes layoutrepeatedly
one random layout per-run
STABILIZERrandomizes layoutrepeatedly
0.0
0.2
0.4
350 360 370 380 390 400Time (s)
Prob
abili
ty D
ensi
ty
one random layout per-run
STABILIZERrandomizes layoutrepeatedly
0.0
0.2
0.4
350 360 370 380 390 400Time (s)
Prob
abili
ty D
ensi
ty
many random layouts in each run
one random layout per-run
STABILIZERrandomizes layoutrepeatedly
STABILIZER generates a new random layout every ½ second
STABILIZERrandomizes layoutrepeatedly
STABILIZER generates a new random layout every ½ second
Total execution time isthe sum of all periods
STABILIZERrandomizes layoutrepeatedly
STABILIZER generates a new random layout every ½ second
Total execution time isthe sum of all periods
The sum of a sufficient number of independent, identically distributed random
variables is approximately normally distributed.
STABILIZERrandomizes layoutrepeatedly
STABILIZER generates a new random layout every ½ second
Total execution time isthe sum of all periods
The sum of a sufficient number of independent, identically distributed random
variables is approximately normally distributed.
STABILIZERrandomizes layoutrepeatedly
STABILIZER generates a new random layout every ½ second
Total execution time isthe sum of all periods
The sum of a sufficient number of independent, identically distributed random
variables is approximately normally distributed.
STABILIZERrandomizes layoutrepeatedly
Central Limit Theorem
The sum of a sufficient number of independent, identically distributed random
variables is approximately normally distributed.
Central Limit Theoremexecution times are normally distributed
The sum of a sufficient number of independent, identically distributed random
variables is approximately normally distributed.
STABILIZERMemory layout affects performance
STABILIZER eliminates the effect of layoutenables sound performance evaluation
evaluation of LLVM’s optimizations with STABILIZERCase Studies
makes performance evaluation difficult
STABILIZER
makes performance evaluation difficultMemory layout affects performance
STABILIZER eliminates the effect of layoutenables sound performance evaluation
evaluation of LLVM’s optimizations with STABILIZERCase Studies
Case Studies
evaluation of LLVM’s optimizations with STABILIZER
Case Studies
on each benchmark
evaluation of LLVM’s optimizations with STABILIZER
Case Studies
on each benchmark
across the whole benchmark suite
evaluation of LLVM’s optimizations with STABILIZER
Case Studies
on each benchmark
across the whole benchmark suite
evaluation of LLVM’s optimizations with STABILIZER
first, build benchmarks with STABILIZER
Build programs with STABILIZER
Build programs with STABILIZER
> szc main.c
> szc main.c
Build programs with STABILIZER
> szc main.c-Rcode
Build programs with STABILIZER
> szc main.c-Rcode -Rheap -Rstack
Build programs with STABILIZER
> szc main.c-Rcode -Rheap -Rstack
Build programs with STABILIZER
now run the benchmarks
Run benchmarks as usual
A′A ×30 ×30
Run benchmarks as usual
A′A ×30 ×30
drop the results into R
0%
10%
20%
30%
40%
85.0 87.5 90.0 92.5 95.0Time (s)
Perc
ent o
f Obs
erve
d Ru
ntim
es
Run benchmarks as usual
A′A ×30 ×30
drop the results into R
Is faster than ?A
0%
10%
20%
30%
40%
85.0 87.5 90.0 92.5 95.0Time (s)
Perc
ent o
f Obs
erve
d Ru
ntim
es
A′
Is faster than ?A
0%
10%
20%
30%
40%
85.0 87.5 90.0 92.5 95.0Time (s)
Perc
ent o
f Obs
erve
d Ru
ntim
es
A′
A′
0%
10%
20%
30%
40%
85.0 87.5 90.0 92.5 95.0Time (s)
Perc
ent o
f Obs
erve
d Ru
ntim
es
If = A
If = A′
0%
10%
20%
30%
40%
85.0 87.5 90.0 92.5 95.0Time (s)
Percen
t of Ob
served
Runtim
es
A
A′If = A
0%
10%
20%
30%
40%
85.0 87.5 90.0 92.5 95.0Time (s)
Percen
t of Ob
served
Runtim
es
what is the probability of measuring a difference at least this large?
what is the probability of measuring a difference at least this large?
A′
0%
10%
20%
30%
40%
85.0 87.5 90.0 92.5 95.0Time (s)
Percen
t of Ob
served
Runtim
es
The Student’s t-test
If = A
what is the probability of measuring a difference at least this large?
A′
0%
10%
20%
30%
40%
85.0 87.5 90.0 92.5 95.0Time (s)
Percen
t of Ob
served
Runtim
es
The Student’s t-test
If = A
t.test(times. A′, times.A)
what is the probability of measuring a difference at least this large?
A′If = A
0%
10%
20%
30%
40%
85.0 87.5 90.0 92.5 95.0Time (s)
Percen
t of Ob
served
Runtim
es
The Student’s t-testIf p-value
A′
0%
10%
20%
30%
40%
85.0 87.5 90.0 92.5 95.0Time (s)
Percen
t of Ob
served
Runtim
es
The Student’s t-testIf p-value ≤ 5%
If = A
what is the probability of measuring a difference at least this large?
A′
0%
10%
20%
30%
40%
85.0 87.5 90.0 92.5 95.0Time (s)
Percen
t of Ob
served
Runtim
es
The Student’s t-testIf p-value ≤ 5%
If = A
95% Confidence
what is the probability of measuring a difference at least this large?
If = A′
0%
10%
20%
30%
40%
85.0 87.5 90.0 92.5 95.0Time (s)
Percen
t of Ob
served
Runtim
es
A
The Student’s t-testIf p-value ≤ 5%
we reject the null hypothesis
what is the probability of measuring a difference at least this large?
≠ A
The Student’s t-testIf p-value ≤ 5%
we reject the null hypothesis
0%
10%
20%
30%
40%
85.0 87.5 90.0 92.5 95.0Time (s)
Percen
t of Ob
served
Runtim
es
Random chance not responsible for the measured difference
A′
≠ A
The difference is real
A′
−10%
0%
10%
20%
libquantum
milc
bzip2
sphinx3
namd lbm
perlbench
hmmer
h264ref
cactusADM wrf
sjeng
gobmk
gromacs
zeusmp mcf
gcc
astar
Speedup Significant
Yes
No
Speedup of -O2 over -O1
−10%
0%
10%
20%
libquantum
milc
bzip2
sphinx3
namd lbm
perlbench
hmmer
h264ref
cactusADM wrf
sjeng
gobmk
gromacs
zeusmp mcf
gcc
astar
Speedup Significant
Yes
No
Speedup of -O2 over -O1
−10%
0%
10%
20%
libquantum
milc
bzip2
sphinx3
namd lbm
perlbench
hmmer
h264ref
cactusADM wrf
sjeng
gobmk
gromacs
zeusmp mcf
gcc
astar
Speedup Significant
Yes
No
Speedup of -O2 over -O1
−10%
0%
10%
20%
libquantum
milc
bzip2
sphinx3
namd lbm
perlbench
hmmer
h264ref
cactusADM wrf
sjeng
gobmk
gromacs
zeusmp mcf
gcc
astar
Speedup Significant
Yes
No
Speedup of -O2 over -O1
−10%
0%
10%
20%
bzip2
gobmk
zeusmp
libquantum wrf
astar mcf
hmmer
milc
namd gcc
lbmgromacs
h264ref
cactusADM
perlbench
sphinx3
sjeng
Speedup Significant
Yes
No
Speedup of -O3 over -O2
Speedup of -O3 over -O2
0.0%
0.5%
1.0%
1.5%
bzip2
gobmk
zeusmp
libquantum wrf
astar mcf
hmmer
milc
namd gcc
lbmgromacs
h264ref
cactusADM
perlbench
sphinx3
sjeng
Speedup Significant
Yes
No
0.0%
0.5%
1.0%
1.5%
bzip2
gobmk
zeusmp
libquantum wrf
astar mcf
hmmer
milc
namd gcc
lbmgromacs
h264ref
cactusADM
perlbench
sphinx3
sjeng
Speedup Significant
Yes
No
Speedup of -O3 over -O2
0.0%
0.5%
1.0%
1.5%
bzip2
gobmk
zeusmp
libquantum wrf
astar mcf
hmmer
milc
namd gcc
lbmgromacs
h264ref
cactusADM
perlbench
sphinx3
sjeng
Speedup Significant
Yes
No
Speedup of -O3 over -O2
0.0%
0.5%
1.0%
1.5%
bzip2
gobmk
zeusmp
libquantum wrf
astar mcf
hmmer
milc
namd gcc
lbmgromacs
h264ref
cactusADM
perlbench
sphinx3
sjeng
Speedup Significant
Yes
No
Speedup of -O3 over -O2
0.0%
0.5%
1.0%
1.5%
bzip2
gobmk
zeusmp
libquantum wrf
astar mcf
hmmer
milc
namd gcc
lbmgromacs
h264ref
cactusADM
perlbench
sphinx3
sjeng
Speedup Significant
Yes
No
What do the results mean?
Comparing optimizations
-O2 -O3×30 ×30
-O2 -O3
×30 ×30lbm lbm
×30 ×30
Comparing optimizations
-O2 -O3
×30 ×30lbm lbm
×30 ×30astar astar
×30 ×30
Comparing optimizations
-O2 -O3
×30 ×30lbm lbm
×30 ×30astar astar
...
×30 ×30
Comparing optimizations
-O2 -O3×30 ×30
0%
1%
2%
3%
4%
0 25 50 75 100Time (s)
Perc
ent o
f Obs
erve
d Ru
ntim
es
Version−O2−O3
Comparing optimizations
-O2-O3
0%
1%
2%
3%
4%
0 25 50 75 100Time (s)
Perc
ent o
f Obs
erve
d Ru
ntim
es
Version−O2−O3
Is faster than ?
-O2-O3
0%
1%
2%
3%
4%
0 25 50 75 100Time (s)
Perc
ent o
f Obs
erve
d Ru
ntim
es
Version−O2−O3
Is faster than ?
-O2-O3
0%
1%
2%
3%
4%
0 25 50 75 100Time (s)
Perc
ent o
f Obs
erve
d Ru
ntim
es
Version−O2−O3
Is faster than ?
-O2-O3
0%
1%
2%
3%
4%
0 25 50 75 100Time (s)
Perc
ent o
f Obs
erve
d Ru
ntim
es
Version−O2−O3
Is faster than ?
If = -O2-O3
0%
1%
2%
3%
4%
0 25 50 75 100Time (s)
Perc
ent o
f Obs
erve
d Ru
ntim
es
Version−O2−O3
If = -O2-O3
0%
1%
2%
3%
4%
0 25 50 75 100Time (s)
Perc
ent o
f Obs
erve
d Ru
ntim
es
Version−O2−O3
If = -O2-O3
0%
1%
2%
3%
4%
0 25 50 75 100Time (s)
Perc
ent o
f Obs
erve
d Ru
ntim
es
Version−O2−O3
what is the probability of measuring these differences?
If = -O2-O3
0%
1%
2%
3%
4%
0 25 50 75 100Time (s)
Perc
ent o
f Obs
erve
d Ru
ntim
es
Version−O2−O3
what is the probability of measuring these differences?
Analysis of Variance
If = -O2-O3
0%
1%
2%
3%
4%
0 25 50 75 100Time (s)
Perc
ent o
f Obs
erve
d Ru
ntim
es
Version−O2−O3
what is the probability of measuring these differences?
Analysis of Varianceaov(time~opt+Error(benchmark/opt), times)
If = -O2-O3
0%
1%
2%
3%
4%
0 25 50 75 100Time (s)
Perc
ent o
f Obs
erve
d Ru
ntim
es
Version−O2−O3
what is the probability of measuring these differences?
Analysis of VarianceIf p-value
If = -O2-O3
0%
1%
2%
3%
4%
0 25 50 75 100Time (s)
Perc
ent o
f Obs
erve
d Ru
ntim
es
Version−O2−O3
what is the probability of measuring these differences?
Analysis of VarianceIf p-value ≤ 5%
If = -O2-O3
0%
1%
2%
3%
4%
0 25 50 75 100Time (s)
Perc
ent o
f Obs
erve
d Ru
ntim
es
Version−O2−O3
what is the probability of measuring these differences?
Analysis of VarianceIf p-value ≤ 5%
we reject the null hypothesis
Analysis of VarianceIf p-value ≤ 5%
we reject the null hypothesis
-O3 -O2vs
Analysis of VarianceIf p-value ≤ 5%
we reject the null hypothesis
p-value = 26.4%
-O3 -O2vs
Analysis of VarianceIf p-value ≤ 5%
we reject the null hypothesis
p-value = 26.4%
-O3 -O2vs
are we 73.6% confident?
Analysis of VarianceIf p-value ≤ 5%
we reject the null hypothesis
p-value = 26.4%
-O3 -O2vs
are we 73.6% confident?one in four experiments will show an
effect that does not exist!
Analysis of VarianceIf p-value ≤ 5%
we reject the null hypothesis
p-value = 26.4%fail to reject the null hypothesis
-O3 -O2vs
-O3
Analysis of Variance
we reject the null hypothesis
-O2The effect of over is indistinguishable from noise
If p-value ≤ 5%
-O3
Analysis of Variance
we reject the null hypothesis
-O2The effect of over is indistinguishable from noise
If p-value ≤ 5%
Did STABILIZER hide the effect?
Runtime with -O2
Runtime with -O3
Runtime with -O1
Runtime with -O0
Execution Time
Did STABILIZER hide the effect?
STABILIZ
ER
STABILIZ
ER
STABILIZ
ERSTA
BILIZER
Execution Time
Runtime with -O2
Runtime with -O3
Runtime with -O1
Runtime with -O0
Did STABILIZER hide the effect?
STABILIZ
ER
STABILIZ
ER
STABILIZ
ERSTA
BILIZER
Execution Time
Runtime with -O2
Runtime with -O3
Runtime with -O1
Runtime with -O0
speedups
Did STABILIZER hide the effect?
Runtime with -O2
Runtime with -O3
Runtime with -O1
Runtime with -O0
STABILIZ
ER
STABILIZ
ER
STABILIZ
ERSTA
BILIZER
Execution Time
speedups
Did STABILIZER hide the effect?
SPECint SPECfp Summary
0%
10%
20%
30%
40%
perlbe
nch gcc
gobm
kh2
64ref
sjeng
astar
bzip2
libqu
antu
mhm
mer mcfca
ctusA
DMze
usmp
wrfgr
omac
ssp
hinx3 lbm milc
namd
hmea
nmed
ian
% In
crea
se in
Exe
cutio
n Ti
me
STABILIZER is fast enough
STABILIZERMemory layout affects performance
STABILIZER eliminates the effect of layout
showed that -O3 does not have a statistically significant effect across our benchmarks
Case Studies
random layout enables sound performance evaluation
makes performance evaluation difficult
STABILIZERMemory layout affects performance
STABILIZER eliminates the effect of layout
showed that -O3 does not have a statistically significant effect across our benchmarks
Case Studies
random layout enables sound performance evaluation
makes performance evaluation difficult
STABILIZERMemory layout affects performance
STABILIZER eliminates the effect of layout
showed that -O3 does not have a statistically significant effect across our benchmarks
Case Studies
random layout enables sound performance evaluation
makes performance evaluation difficult
STABILIZERMemory layout affects performance
STABILIZER eliminates the effect of layout
showed that -O3 does not have a statistically significant effect across our benchmarks
Case Studies
random layout enables sound performance evaluation
makes performance evaluation difficult
available at
stabilizer-tool.org
STABILIZER