Низкоуровневые оптимизации .net-приложений
Post on 13-Jul-2015
2.483 Views
Preview:
TRANSCRIPT
Низкоуровневые оптимизации .NET-приложений
Андрей Акиньшин
Барнаульское сообщество .NET разработчиков
bug.ineta.ruwww.facebook.com/groups/dotnetbarnaul/
Оптимизировать или неоптимизировать?
Premature optimization is the root of all evil.c©Donald Ervin Knuth
Memory traffic
• Memory traffic — это плохо.• Много объектов → GC тяжело.• Разная цена объектов в Gen0, Gen1, Gen2, LOH.• Лишний memory traffic: LINQ, упаковка, большиеобъекты, отсутствие интернирования и пуловобъектов и многое другое.
Memory traffic: замыкания
private void Foo(Func<int, int> inc){}
private void Run(){
Program.<>c__DisplayClass1 cDisplayClass1 = new Program.<>c__DisplayClass1();cDisplayClass1.y = 1;this.Foo(new Func<int, int>((object) cDisplayClass1, __methodptr(<>b__0)));
}
[CompilerGenerated]private sealed class <>c__DisplayClass1{
public int y;
public <>c__DisplayClass1(){
base..ctor();}
public int <Run>b__0(int x){
return x + this.y;}
}
Memory traffic: замыкания
void Foo(Func<object> before,Func<object> after)
{before();// Some logicafter();
}void Run(){
var a = new object();var b = new object();Foo(() => a, () => b);
}
Memory traffic: замыкания
private void Run(){
Program.<>c__DisplayClass2 cDisplayClass2 = new Program.<>c__DisplayClass2();cDisplayClass2.a = new object();cDisplayClass2.b = new object();this.Foo(new Func<object>((object) cDisplayClass2, __methodptr(<Run>b__0)),
new Func<object>((object) cDisplayClass2, __methodptr(<Run>b__1)));}[CompilerGenerated]private sealed class <>c__DisplayClass2{
public object a;public object b;
public <>c__DisplayClass2(){
base..ctor();}
public object <Run>b__0(){
return this.a;}
public object <Run>b__1(){
return this.b;}
}
Memory traffic: замыкания
void Foo(Func<int, int> inc) { }static int y = 1;static int StaticInc(int x) { return x + y; }
void Run(){
Foo(x => StaticInc(x));Foo(StaticInc);
}
Memory traffic: замыкания
private void Run(){
if (Program.CS$<>9__CachedAnonymousMethodDelegate1 == null){
Program.CS$<>9__CachedAnonymousMethodDelegate1 =new Func<int, int>((object) null, __methodptr(<Run>b__0));
}this.Foo(Program.CS$<>9__CachedAnonymousMethodDelegate1);this.Foo(new Func<int, int>((object) null, __methodptr(StaticInc)));
}[CompilerGenerated]private static int <Run>b__0(int x){
return Program.StaticInc(x);}[CompilerGenerated]private static Func<int, int> CS$<>9__CachedAnonymousMethodDelegate1;
Memory traffic: params
void Foo(params int[] x) { }void Main(){Foo();// IL_0001: ldarg.0// IL_0002: ldc.i4.0// IL_0003: newarr// IL_0008: call}
Memory traffic: yield
private IEnumerable<int> Foo(){
Program.<Foo>d__0 fooD0 = new Program.<Foo>d__0(-2);fooD0.<>4__this = this;return (IEnumerable<int>) fooD0;
}
[CompilerGenerated]private sealed class <Foo>d__0 : IEnumerable<int>, IEnumerable,
CIEnumerator<int>, IEnumerator, IDisposable{
private int <>2__current;private int <>1__state;private int <>l__initialThreadId;public int <i>5__1;public Program <>4__this;...
Memory traffic: List vs IList
void Foo1(List<int> list){
var start = GC.GetTotalMemory(true);foreach (var i in list)
Console.WriteLine(GC.GetTotalMemory(true) - start);}void Foo2(IList<int> list){
var start = GC.GetTotalMemory(true);foreach (var i in list)
Console.WriteLine(GC.GetTotalMemory(true) - start);}void Run(){
var list = new List<int> { 1 };Foo1(list);Foo2(list);
}
Memory traffic: маленький List
struct SmallList<T> : IList<T>{
private T item1;private T item2;private T item3;private List<T> otherItems;public List<T> ToList() { ... }
}
Микробенчмарки
Требования:• Получение метрик• Воспроизводимость• Объективность• Отсутствие сайд-эффектов
Как запускать бенчмарк?
• Release mode• Without debugging• Прогрев• Запуск на одном ядре• Чистое окружение• Многократный запуск в разных окружениях
Нужно помнить про:
• Dead code elmintation• Inlining• Folding• Branch prediction• 600+ других оптимизаций
Interface implementation
interface IFoo{
int Inc(int x);}class FastFoo : IFoo{
public int Inc(int x){
return x + 1;}
}class SlowFoo : IFoo{
public int Inc(int x){
return 1 + x;}
}void DoIt(IFoo foo){
for (int i = 0; i < 1000000000; i++)foo.Inc(0);
}DoIt(new FastFoo());DoIt(new SlowFoo());
Readonly fieldspublic struct Int256{
private readonly long bits0, bits1, bits2, bits3;public Int256(long bits0, long bits1, long bits2, long bits3){
this.bits0 = bits0; this.bits1 = bits1; this.bits2 = bits2; this.bits3 = bits3;}public long Bits0 { get { return bits0; } }public long Bits1 { get { return bits1; } }public long Bits2 { get { return bits2; } }public long Bits3 { get { return bits3; } }
}class Test{
private readonly Int256 value; // private Int256 value;public Test() { value = new Int256(1L, 5L, 10L, 100L); }public long TotalValue { get { return value.Bits0 + value.Bits1 + value.Bits2 + value.Bits3; } }public void RunTest(){
var sample = TotalValue;Stopwatch sw = Stopwatch.StartNew();long total = 0;for (int i = 0; i < 1000000000; i++) total += TotalValue;sw.Stop();Console.WriteLine("Total time: {0}ms", sw.ElapsedMilliseconds);
}static void Main() { new Test().RunTest(); }
}
c©Jon Skeet, Micro-optimization: the surprising inefficiency of readonly fields
Reflection
static MemoryLeakFixer(){
var fields = typeof(DisposableObject).GetFields(BindingFlags.Instance | BindingFlags.NonPublic | BindingFlags.Public);
fieldInfo = fields.FirstOrDefault(f => f.FieldType == typeof(GCHandle));}
public static void FixAfterRelease(DisposableObject obj){
if (obj.IsDisposed){
var dataHandle = (GCHandle)fieldInfo.GetValue(obj);if (dataHandle.IsAllocated)
dataHandle.Free();}
}
StructLayout
[StructLayout(LayoutKind.Explicit)]struct MyStruct{
[FieldOffset(0)]public Int16 Value;[FieldOffset(0)]public Byte LowByte;
}
var s = new MyStruct();s.Value = 256 + 100;Console.WriteLine(s.LowByte); // 100
Чёрная магия
public class MyObject{
public long X;}public class Pumpkin{
public int Y1;public int Y2;
}public unsafe IntPtr GetAddress(object obj){
var typedReference = __makeref(obj);return *(IntPtr*)(&typedReference);
}public unsafe T Convert<T>(IntPtr address){
var fakeInstance = default(T);var typedReference = __makeref(fakeInstance);*(IntPtr*)(&typedReference) = address;return __refvalue( typedReference,T);
}public void Run(){
var myObject = new MyObject { X = 1 + (2L << 32) };var pumpkin = Convert<Pumpkin>(GetAddress(myObject));Console.WriteLine(pumpkin.Y1 + " " + pumpkin.Y2); // 1 2myObject.X = 3 + (4L << 32);Console.WriteLine(pumpkin.Y1 + " " + pumpkin.Y2); // 3 4
}
Нужно понимать
// sscli20\clr\src\vm\typehandle.h
// A TypeHandle is the FUNDAMENTAL concept of type identity in the CLR.// That is two types are equal if and only if their type handles// are equal. A TypeHandle, is a pointer sized struture that encodes// everything you need to know to figure out what kind of type you are// actually dealing with.
// At the present time a TypeHandle can point at two possible things//// 1) A MethodTable (Intrinsics, Classes, Value Types and their instantiations)// 2) A TypeDesc (all other cases: arrays, byrefs, pointer types,// function pointers, generic type variables)//// or with IL stubs, a third thing://// 3) A MethodTable for a native value type.
Нужно понимать
private void Print(Type type){
bool isTypeDesc = ((int)type.TypeHandle.Value & 2) > 0;Console.WriteLine("{0}: {1} => {2}",
type.Name.PadRight(10),type.TypeHandle.Value.ToString("X"),(isTypeDesc ? "TypeDesc" : "MethodTable"));
}private void Run(){
Print(typeof(int));Print(typeof(object));Print(typeof(Stream));Print(typeof(int[]));Print(typeof(int[][]));Print(typeof(object[]));
}// Int32 : 65C4C480 => MethodTable// Object : 65C4B060 => MethodTable// Stream : 65C4D954 => MethodTable// Int32[] : 65854C8A => TypeDesc// Int32[][] : 658F6BD6 => TypeDesc// Object[] : 65854D7A => TypeDesc
Циклы
public int Foo997(){
int sum = 0;for (int i = 0; i < 997; i++)
sum += a[i];return sum;
}public int Foo1000(){
int sum = 0;for (int i = 0; i < 1000; i++)
sum += a[i];return sum;
}
Параллелизм инструкций
int iterationCount = 256 * 1024 * 1024;int[] a = new int[2];for (int i = 0; i < iterationCount; i++){
a[0]++;a[0]++;
}for (int i = 0; i < iterationCount; i++){
a[0]++;a[1]++;
}
Кэш процессора
int[] x = new int[64 * 1024 * 1024];for (int i = 0; i < x.Length; i++)
x[i] *= 3;for (int i = 0; i < x.Length; i += 16)
x[i] *= 3;
Кэш процессора
Умножение матриц:// Standardfor (k = 0; k < n; k++)
for (i = 0; i < n; i++)for (j = 0; j < n; j++)
c[k][i] = c[k][i] + a[k][j]*b[j][i];// Optimizedfor (k = 0; k < n; k++)
for (i = 0; i < n; i++)for (j = 0; j < n; j++)
c[i][j] = c[i][j] + a[i][k]*b[k][j];
Кэш процессора
False sharingprivate const int Step = 1;private static int[] x = new int[1024];
private void Foo(int p){
for (int j = 0; j < 1000000000; j++)x[p] = x[p] + 3;
}
private void Run(){
var s = Stopwatch.StartNew();var tasks = new Task[4];tasks[0] = Task.Factory.StartNew(() => Foo(0 * Step));tasks[1] = Task.Factory.StartNew(() => Foo(1 * Step));tasks[2] = Task.Factory.StartNew(() => Foo(2 * Step));tasks[3] = Task.Factory.StartNew(() => Foo(3 * Step));Task.WaitAll(tasks);Console.WriteLine(s.ElapsedMilliseconds);
}
А теперь подумайте про CardTable...
top related