我遇到了一个奇怪的性能损失,我将其归结为以下代码:
[<Struct>]
type Vector3(x: float32, y: float32, z: float32) =
member this.X = x
member this.Y = y
member this.Z = z
type Data(n: int) =
let positions = System.Collections.Generic.List<Vector3>()
let add j = positions.Add (Vector3(j, j, j))
let add1 j = positions.Add (Vector3(j, j, j)); ()
member this.UseAdd () = for i = 1 to n do add (float32 i)
member this.UseAdd1 () = for i = 1 to n do add1 (float32 i)
let timeIt name (f: unit -> unit) =
let timer = System.Diagnostics.Stopwatch.StartNew()
f ()
printfn "%s: %ims" name (int timer.ElapsedMilliseconds)
let test () =
for i = 1 to 3 do timeIt "ADD" (fun () -> Data(1000000).UseAdd())
for i = 1 to 3 do timeIt "ADD1" (fun () -> Data(1000000).UseAdd1())
[<EntryPoint>]
let main argv =
test ()
0
和...之间的不同add
and add1
是额外的()
在最后。
当我在 .NET 4.5.1 上使用 F# 3.1 将其构建为 x64 Release 构建时,我得到以下输出:
ADD: 461ms
ADD: 457ms
ADD: 450ms
ADD1: 25ms
ADD1: 26ms
ADD1: 16ms
由于类型为List<T>.Add
is T -> unit
我希望add
and add1
应该表现相同。
使用 ILdasm 我发现add
编译为(仅包括相关部分)
IL_000a: newobj instance void Program/Vector3::.ctor(float32,
float32,
float32)
IL_000f: tail.
IL_0011: callvirt instance void class [mscorlib]System.Collections.Generic.List`1<valuetype Program/Vector3>::Add(!0)
while add1
into
IL_000a: newobj instance void Program/Vector3::.ctor(float32,
float32,
float32)
IL_000f: callvirt instance void class [mscorlib]System.Collections.Generic.List`1<valuetype Program/Vector3>::Add(!0)
即没有“尾部调用”。因此,当我关闭尾部调用优化时,两者add
and add1
以相同的速度运行。
为什么tail.
指令导致函数调用慢很多?另外,这是一个错误还是一个功能?
编辑:这是原始代码,我注意到了这种行为。当。。。的时候true
最后的值被丢弃,它表现出与上面的代码相同的性能下降。
let makeAtom (ctx: CleanCifContext) (element: CleanCifAtomSiteElement) =
let residue = getResidue ctx element
let position =
Vector3(float32 (element.PositionX.ValueOrFail()), float32 (element.PositionY.ValueOrFail()), float32 (element.PositionZ.ValueOrFail()))
let atom =
CifAtom(id = ctx.Atoms.Count, element = element.ElementSymbol.ValueOrFail(),
residue = residue, serialNumber = element.Id.ValueOrFail(),
name = element.Name.ValueOrFail(), authName = element.AuthName.Value(), altLoc = element.AltLoc.Value(),
occupancy = float32 (element.Occupancy.ValueOrFail()), tempFactor = float32 (element.TempFactor.ValueOrFail()))
ctx.Atoms.Add atom
ctx.Positions.Add position
true