diff --git a/src/dense/low_order_rk_addsteps.jl b/src/dense/low_order_rk_addsteps.jl index 2c8c8e7854..6bec99333f 100644 --- a/src/dense/low_order_rk_addsteps.jl +++ b/src/dense/low_order_rk_addsteps.jl @@ -150,7 +150,7 @@ function ode_addsteps!{calcVal,calcVal2,calcVal3}(k,t,uprev,u,dt,f,cache::Tsit5C end f(t+dt,tmp,k6) for i in uidx - u[i] = uprev[i]+dt*(a71*k1[i]+a72*k2[i]+a73*k3[i]+a74*k4[i]+a75*k5[i]+a76*k6[i]) + tmp[i] = uprev[i]+dt*(a71*k1[i]+a72*k2[i]+a73*k3[i]+a74*k4[i]+a75*k5[i]+a76*k6[i]) end f(t+dt,u,k7) copyat_or_push!(k,1,k1) diff --git a/src/integrators/explicit_rk_integrator.jl b/src/integrators/explicit_rk_integrator.jl index 6a84f59d8e..29a8a82355 100644 --- a/src/integrators/explicit_rk_integrator.jl +++ b/src/integrators/explicit_rk_integrator.jl @@ -14,28 +14,28 @@ end for i = 2:stages-1 utilde = zero(kk[1]) for j = 1:i-1 - utilde += A[j,i]*kk[j] + utilde = @muladd utilde + A[j,i]*kk[j] end - kk[i] = f(t+c[i]*dt,uprev+dt*utilde); + kk[i] = f(@muladd(t+c[i]*dt),@muladd(uprev+dt*utilde)); end #Calc Last utilde = zero(kk[1]) for j = 1:stages-1 - utilde += A[j,end]*kk[j] + utilde = @muladd utilde + A[j,end]*kk[j] end - kk[end] = f(t+c[end]*dt,uprev+dt*utilde); integrator.fsallast = kk[end] # Uses fsallast as temp even if not fsal + kk[end] = f(@muladd(t+c[end]*dt),@muladd(uprev+dt*utilde)); integrator.fsallast = kk[end] # Uses fsallast as temp even if not fsal # Accumulate Result utilde = α[1]*kk[1] for i = 2:stages - utilde += α[i]*kk[i] + utilde = @muladd utilde + α[i]*kk[i] end - u = uprev + dt*utilde + u = @muladd uprev + dt*utilde if integrator.opts.adaptive uEEst = αEEst[1]*kk[1] for i = 2:stages - uEEst += αEEst[i]*kk[i] + uEEst = @muladd uEEst + αEEst[i]*kk[i] end - integrator.EEst = integrator.opts.internalnorm( dt*(utilde-uEEst)/(integrator.opts.abstol+max(abs(uprev),abs(u))*integrator.opts.reltol)) + integrator.EEst = integrator.opts.internalnorm( dt*(utilde-uEEst)/@muladd(integrator.opts.abstol+max(abs(uprev),abs(u))*integrator.opts.reltol)) end if isfsal(integrator.alg.tableau) integrator.fsallast = kk[end] @@ -69,13 +69,13 @@ end end for j = 1:i-1 for l in uidx - utilde[l] += A[j,i]*kk[j][l] + utilde[l] = @muladd utilde[l] + A[j,i]*kk[j][l] end end for l in uidx - tmp[l] = uprev[l]+dt*utilde[l] + tmp[l] = @muladd uprev[l]+dt*utilde[l] end - f(t+c[i]*dt,tmp,kk[i]) + f(@muladd(t+c[i]*dt),tmp,kk[i]) end #Last for l in uidx @@ -83,13 +83,13 @@ end end for j = 1:stages-1 for l in uidx - utilde[l] += A[j,end]*kk[j][l] + utilde[l] = @muladd utilde[l] + A[j,end]*kk[j][l] end end for l in uidx - u[l] = uprev[l]+dt*utilde[l] + u[l] = @muladd uprev[l]+dt*utilde[l] end - f(t+c[end]*dt,u,kk[end]) #fsallast is tmp even if not fsal + f(@muladd(t+c[end]*dt),u,kk[end]) #fsallast is tmp even if not fsal #Accumulate if !isfsal(integrator.alg.tableau) for i in uidx @@ -97,11 +97,11 @@ end end for i = 2:stages for l in uidx - utilde[l] += α[i]*kk[i][l] + utilde[l] = @muladd utilde[l] + α[i]*kk[i][l] end end for i in uidx - u[i] = uprev[i] + dt*utilde[i] + u[i] = @muladd uprev[i] + dt*utilde[i] end end if integrator.opts.adaptive @@ -110,11 +110,11 @@ end end for i = 2:stages for j in uidx - uEEst[j] += αEEst[i]*kk[i][j] + uEEst[j] = @muladd uEEst[j] + αEEst[i]*kk[i][j] end end for i in uidx - atmp[i] = (dt*(utilde[i]-uEEst[i])/(integrator.opts.abstol+max(abs(uprev[i]),abs(u[i]))*integrator.opts.reltol)) + atmp[i] = (dt*(utilde[i]-uEEst[i])/@muladd(integrator.opts.abstol+max(abs(uprev[i]),abs(u[i]))*integrator.opts.reltol)) end integrator.EEst = integrator.opts.internalnorm(atmp) end diff --git a/src/integrators/feagin_rk_integrators.jl b/src/integrators/feagin_rk_integrators.jl index 4ba797c6cf..f87937114b 100644 --- a/src/integrators/feagin_rk_integrators.jl +++ b/src/integrators/feagin_rk_integrators.jl @@ -9,7 +9,7 @@ end @unpack adaptiveConst,a0100,a0200,a0201,a0300,a0302,a0400,a0402,a0403,a0500,a0503,a0504,a0600,a0603,a0604,a0605,a0700,a0704,a0705,a0706,a0800,a0805,a0806,a0807,a0900,a0905,a0906,a0907,a0908,a1000,a1005,a1006,a1007,a1008,a1009,a1100,a1105,a1106,a1107,a1108,a1109,a1110,a1200,a1203,a1204,a1205,a1206,a1207,a1208,a1209,a1210,a1211,a1300,a1302,a1303,a1305,a1306,a1307,a1308,a1309,a1310,a1311,a1312,a1400,a1401,a1404,a1406,a1412,a1413,a1500,a1502,a1514,a1600,a1601,a1602,a1604,a1605,a1606,a1607,a1608,a1609,a1610,a1611,a1612,a1613,a1614,a1615,b1,b2,b3,b4,b5,b6,b7,b8,b9,b10,b11,b12,b13,b14,b15,b16,b17,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11,c12,c13,c14,c15,c16 = cache k1 = integrator.fsalfirst k2 = f(t + c1*dt,uprev + dt*(a0100*k1)) - k3 = f(t + c2*dt ,uprev + dt*(a0200*k1 + a0201*k2)) + k3 = f(t + c2*dt,uprev + dt*(a0200*k1 + a0201*k2)) k4 = f(t + c3*dt,uprev + dt*(a0300*k1 + a0302*k3)) k5 = f(t + c4*dt,uprev + dt*(a0400*k1 + a0402*k3 + a0403*k4)) k6 = f(t + c5*dt,uprev + dt*(a0500*k1 + a0503*k4 + a0504*k5)) @@ -24,8 +24,7 @@ end k15 = f(t + c14*dt,uprev + dt*(a1400*k1 + a1401*k2 + a1404*k5 + a1406*k7 + a1412*k13 + a1413*k14)) k16 = f(t + c15*dt,uprev + dt*(a1500*k1 + a1502*k3 + a1514*k15)) k17 = f(t + c16*dt,uprev + dt*(a1600*k1 + a1601*k2 + a1602*k3 + a1604*k5 + a1605*k6 + a1606*k7 + a1607*k8 + a1608*k9 + a1609*k10 + a1610*k11 + a1611*k12 + a1612*k13 + a1613*k14 + a1614*k15 + a1615*k16)) - tmp = dt*((b1*k1 + b2*k2 + b3*k3 + b5*k5) + (b7*k7 + b9*k9 + b10*k10 + b11*k11) + (b12*k12 + b13*k13 + b14*k14 + b15*k15) + (b16*k16 + b17*k17)) - u = uprev + tmp + u = uprev + dt*(b1*k1 + b2*k2 + b3*k3 + b5*k5 + b7*k7 + b9*k9 + b10*k10 + b11*k11 + b12*k12 + b13*k13 + b14*k14 + b15*k15 + b16*k16 + b17*k17) if integrator.opts.adaptive integrator.EEst = integrator.opts.internalnorm((dt*(k2 - k16) * adaptiveConst)./(integrator.opts.abstol+max(abs(uprev),abs(u))*integrator.opts.reltol)) end diff --git a/src/integrators/implicit_integrators.jl b/src/integrators/implicit_integrators.jl index 145fcdc46b..2f7d248058 100644 --- a/src/integrators/implicit_integrators.jl +++ b/src/integrators/implicit_integrators.jl @@ -6,7 +6,7 @@ type RHS_IE_Scalar{F,uType,tType} <: Function end function (p::RHS_IE_Scalar)(u,resid) - resid[1] = u[1] - p.u_old[1] - p.dt*p.f(p.t+p.dt,u)[1] + resid[1] = u[1] - p.u_old[1] - p.dt*p.f(p.t+p.dt,u[1])[1] end @inline function initialize!(integrator,cache::ImplicitEulerConstantCache,f=integrator.f) @@ -103,7 +103,7 @@ function (p::RHS_Trap)(uprev,resid) du1 = get_du(p.dual_cache, eltype(uprev)) p.f(p.t+p.dt,reshape(uprev,p.sizeu),du1) for i in p.uidx - resid[i] = uprev[i] - p.u_old[i] - (p.dt/2)*(du1[i]+p.f_old[i]) + resid[i] = @muladd uprev[i] - p.u_old[i] - (p.dt/2)*(du1[i]+p.f_old[i]) end end @@ -150,7 +150,7 @@ type RHS_Trap_Scalar{F,uType,rateType,tType} <: Function end function (p::RHS_Trap_Scalar)(uprev,resid) - resid[1] = uprev[1] - p.u_old[1] - (p.dt/2)*(p.f_old + p.f(p.t+p.dt,uprev)[1]) + resid[1] = @muladd uprev[1] - p.u_old[1] - (p.dt/2)*(p.f_old + p.f(p.t+p.dt,uprev[1])[1]) end @inline function initialize!(integrator,cache::TrapezoidConstantCache,f=integrator.f) diff --git a/src/integrators/low_order_rk_integrators.jl b/src/integrators/low_order_rk_integrators.jl index 663bcd0317..ee967fc004 100644 --- a/src/integrators/low_order_rk_integrators.jl +++ b/src/integrators/low_order_rk_integrators.jl @@ -8,13 +8,15 @@ end @unpack t,dt,uprev,u,k = integrator @unpack a21,a32,a41,a42,a43,c1,c2,b1,b2,b3,b4 = cache k1 = integrator.fsalfirst - k2 = f(t+c1*dt,uprev+dt*a21*k1) - k3 = f(t+c2*dt,uprev+dt*a32*k2) - u = uprev+dt*(a41*k1+a42*k2+a43*k3) + a1 = dt*a21 + k2 = f(@muladd(t+c1*dt),@muladd(uprev+a1*k1)) + a2 = dt*a32 + k3 = f(@muladd(t+c2*dt),@muladd(uprev+a2*k2)) + u = @muladd uprev+dt*(a41*k1+a42*k2+a43*k3) k4 = f(t+dt,u); integrator.fsallast = k4 if integrator.opts.adaptive - utilde = uprev + dt*(b1*k1 + b2*k2 + b3*k3 + b4*k4) - integrator.EEst = integrator.opts.internalnorm( ((utilde-u)/(integrator.opts.abstol+max(abs(uprev),abs(u))*integrator.opts.reltol))) + utilde = @muladd uprev + dt*(b1*k1 + b2*k2 + b3*k3 + b4*k4) + integrator.EEst = integrator.opts.internalnorm( ((utilde-u)/@muladd(integrator.opts.abstol+max(abs(uprev),abs(u))*integrator.opts.reltol))) end integrator.k[1] = integrator.fsalfirst integrator.k[2] = integrator.fsallast @@ -37,22 +39,24 @@ end @unpack k2,k3,k4,utilde,tmp,atmp = cache k1 = cache.fsalfirst @unpack a21,a32,a41,a42,a43,c1,c2,b1,b2,b3,b4 = cache.tab + a1 = dt*a21 for i in uidx - tmp[i] = uprev[i]+dt*a21*k1[i] + tmp[i] = @muladd uprev[i]+a1*k1[i] end - f(t+c1*dt,tmp,k2) + f(@muladd(t+c1*dt),tmp,k2) + a2 = dt*a32 for i in uidx - tmp[i] = uprev[i]+dt*a32*k2[i] + tmp[i] = @muladd uprev[i]+a2*k2[i] end - f(t+c2*dt,tmp,k3) + f(@muladd(t+c2*dt),tmp,k3) for i in uidx - u[i] = uprev[i]+dt*(a41*k1[i]+a42*k2[i]+a43*k3[i]) + u[i] = @muladd uprev[i]+dt*(a41*k1[i]+a42*k2[i]+a43*k3[i]) end f(t+dt,u,k4) if integrator.opts.adaptive for i in uidx - utilde[i] = uprev[i] + dt*(b1*k1[i] + b2*k2[i] + b3*k3[i] + b4*k4[i]) - atmp[i] = ((utilde[i]-u[i])/(integrator.opts.abstol+max(abs(uprev[i]),abs(u[i]))*integrator.opts.reltol)) + utilde[i] = @muladd uprev[i] + dt*(b1*k1[i] + b2*k2[i] + b3*k3[i] + b4*k4[i]) + atmp[i] = ((utilde[i]-u[i])/@muladd(integrator.opts.abstol+max(abs(uprev[i]),abs(u[i]))*integrator.opts.reltol)) end integrator.EEst = integrator.opts.internalnorm(atmp) end @@ -69,19 +73,20 @@ end @unpack t,dt,uprev,u,k = integrator @unpack c1,c2,c3,c4,c5,a21,a31,a32,a41,a42,a43,a51,a52,a53,a54,a61,a62,a63,a64,a65,a71,a72,a73,a74,a75,a76,a81,a83,a84,a85,a86,a87,bhat1,bhat3,bhat4,bhat5,bhat6,btilde1,btilde2,btilde3,btilde4,btilde5,btilde6,btilde7,btilde8 = cache k1 = integrator.fsalfirst - k2 = f(t+c1*dt,uprev+dt*a21*k1) - k3 = f(t+c2*dt,uprev+dt*(a31*k1+a32*k2)) - k4 = f(t+c3*dt,uprev+dt*(a41*k1+a42*k2+a43*k3)) - k5 = f(t+c4*dt,uprev+dt*(a51*k1+a52*k2+a53*k3+a54*k4)) - k6 = f(t+c5*dt,uprev+dt*(a61*k1+a62*k2+a63*k3+a64*k4+a65*k5)) - k7 = f(t+dt,uprev+dt*(a71*k1+a72*k2+a73*k3+a74*k4+a75*k5+a76*k6)) - u = uprev+dt*(a81*k1+a83*k3+a84*k4+a85*k5+a86*k6+a87*k7) + a = dt*a21 + k2 = f(@muladd(t+c1*dt),@muladd(uprev+a*k1)) + k3 = f(@muladd(t+c2*dt),@muladd(uprev+dt*(a31*k1+a32*k2))) + k4 = f(@muladd(t+c3*dt),@muladd(uprev+dt*(a41*k1+a42*k2+a43*k3))) + k5 = f(@muladd(t+c4*dt),@muladd(uprev+dt*(a51*k1+a52*k2+a53*k3+a54*k4))) + k6 = f(@muladd(t+c5*dt),@muladd(uprev+dt*(a61*k1+a62*k2+a63*k3+a64*k4+a65*k5))) + k7 = f(t+dt,@muladd(uprev+dt*(a71*k1+a72*k2+a73*k3+a74*k4+a75*k5+a76*k6))) + u = @muladd uprev+dt*(a81*k1+a83*k3+a84*k4+a85*k5+a86*k6+a87*k7) integrator.fsallast = f(t+dt,u); k8 = integrator.fsallast if integrator.opts.adaptive - uhat = dt*(bhat1*k1 + bhat3*k3 + bhat4*k4 + bhat5*k5 + bhat6*k6) - utilde = uprev + dt*(btilde1*k1 + btilde2*k2 + btilde3*k3 + btilde4*k4 + btilde5*k5 + btilde6*k6 + btilde7*k7 + btilde8*k8) - EEst1 = integrator.opts.internalnorm( sum(((uhat)./(integrator.opts.abstol+max(abs(uprev),abs(u))*integrator.opts.reltol)))) - EEst2 = integrator.opts.internalnorm( sum(((utilde-u)./(integrator.opts.abstol+max(abs(uprev),abs(u))*integrator.opts.reltol)))) + uhat = dt*@muladd(bhat1*k1 + bhat3*k3 + bhat4*k4 + bhat5*k5 + bhat6*k6) + utilde = @muladd uprev + dt*(btilde1*k1 + btilde2*k2 + btilde3*k3 + btilde4*k4 + btilde5*k5 + btilde6*k6 + btilde7*k7 + btilde8*k8) + EEst1 = integrator.opts.internalnorm( sum(((uhat)./@muladd(integrator.opts.abstol+max(abs(uprev),abs(u))*integrator.opts.reltol)))) + EEst2 = integrator.opts.internalnorm( sum(((utilde-u)./@muladd(integrator.opts.abstol+max(abs(uprev),abs(u))*integrator.opts.reltol)))) integrator.EEst = max(EEst1,EEst2) end integrator.k[1]=k1; integrator.k[2]=k2; integrator.k[3]=k3;integrator.k[4]=k4;integrator.k[5]=k5;integrator.k[6]=k6;integrator.k[7]=k7;integrator.k[8]=k8 @@ -105,40 +110,41 @@ end uidx = eachindex(integrator.uprev) @unpack k1,k2,k3,k4,k5,k6,k7,k8,utilde,uhat,tmp,atmp,atmptilde = cache @unpack c1,c2,c3,c4,c5,a21,a31,a32,a41,a42,a43,a51,a52,a53,a54,a61,a62,a63,a64,a65,a71,a72,a73,a74,a75,a76,a81,a83,a84,a85,a86,a87,bhat1,bhat3,bhat4,bhat5,bhat6,btilde1,btilde2,btilde3,btilde4,btilde5,btilde6,btilde7,btilde8 = cache.tab + a = dt*a21 for i in uidx - tmp[i] = uprev[i]+dt*a21*k1[i] + tmp[i] = @muladd uprev[i]+a*k1[i] end - f(t+c1*dt,tmp,k2) + f(@muladd(t+c1*dt),tmp,k2) for i in uidx - tmp[i] = uprev[i]+dt*(a31*k1[i]+a32*k2[i]) + tmp[i] = @muladd uprev[i]+dt*(a31*k1[i]+a32*k2[i]) end - f(t+c2*dt,tmp,k3) + f(@muladd(t+c2*dt),tmp,k3) for i in uidx - tmp[i] = uprev[i]+dt*(a41*k1[i]+a42*k2[i]+a43*k3[i]) + tmp[i] = @muladd uprev[i]+dt*(a41*k1[i]+a42*k2[i]+a43*k3[i]) end - f(t+c3*dt,tmp,k4) + f(@muladd(t+c3*dt),tmp,k4) for i in uidx - tmp[i] = uprev[i]+dt*(a51*k1[i]+a52*k2[i]+a53*k3[i]+a54*k4[i]) + tmp[i] = @muladd uprev[i]+dt*(a51*k1[i]+a52*k2[i]+a53*k3[i]+a54*k4[i]) end - f(t+c4*dt,tmp,k5) + f(@muladd(t+c4*dt),tmp,k5) for i in uidx - tmp[i] = uprev[i]+dt*(a61*k1[i]+a62*k2[i]+a63*k3[i]+a64*k4[i]+a65*k5[i]) + tmp[i] = @muladd uprev[i]+dt*(a61*k1[i]+a62*k2[i]+a63*k3[i]+a64*k4[i]+a65*k5[i]) end - f(t+c5*dt,tmp,k6) + f(@muladd(t+c5*dt),tmp,k6) for i in uidx - tmp[i] = uprev[i]+dt*(a71*k1[i]+a72*k2[i]+a73*k3[i]+a74*k4[i]+a75*k5[i]+a76*k6[i]) + tmp[i] = @muladd uprev[i]+dt*(a71*k1[i]+a72*k2[i]+a73*k3[i]+a74*k4[i]+a75*k5[i]+a76*k6[i]) end f(t+dt,tmp,k7) for i in uidx - u[i] = uprev[i]+dt*(a81*k1[i]+a83*k3[i]+a84*k4[i]+a85*k5[i]+a86*k6[i]+a87*k7[i]) + u[i] = @muladd uprev[i]+dt*(a81*k1[i]+a83*k3[i]+a84*k4[i]+a85*k5[i]+a86*k6[i]+a87*k7[i]) end f(t+dt,u,k8) if integrator.opts.adaptive for i in uidx - uhat[i] = dt*(bhat1*k1[i] + bhat3*k3[i] + bhat4*k4[i] + bhat5*k5[i] + bhat6*k6[i]) - utilde[i] = uprev[i] + dt*(btilde1*k1[i] + btilde2*k2[i] + btilde3*k3[i] + btilde4*k4[i] + btilde5*k5[i] + btilde6*k6[i] + btilde7*k7[i] + btilde8*k8[i]) - atmp[i] = ((uhat[i])./(integrator.opts.abstol+max(abs(uprev[i]),abs(u[i]))*integrator.opts.reltol)) - atmptilde[i] = ((utilde[i]-u[i])./(integrator.opts.abstol+max(abs(uprev[i]),abs(u[i]))*integrator.opts.reltol)) + uhat[i] = dt*@muladd(bhat1*k1[i] + bhat3*k3[i] + bhat4*k4[i] + bhat5*k5[i] + bhat6*k6[i]) + utilde[i] = @muladd uprev[i] + dt*(btilde1*k1[i] + btilde2*k2[i] + btilde3*k3[i] + btilde4*k4[i] + btilde5*k5[i] + btilde6*k6[i] + btilde7*k7[i] + btilde8*k8[i]) + atmp[i] = ((uhat[i])./@muladd(integrator.opts.abstol+max(abs(uprev[i]),abs(u[i]))*integrator.opts.reltol)) + atmptilde[i] = ((utilde[i]-u[i])./@muladd(integrator.opts.abstol+max(abs(uprev[i]),abs(u[i]))*integrator.opts.reltol)) end EEst1 = integrator.opts.internalnorm(atmp) EEst2 = integrator.opts.internalnorm(atmptilde) @@ -157,16 +163,17 @@ end @unpack t,dt,uprev,u,k = integrator @unpack c1,c2,c3,c4,c5,c6,a21,a31,a32,a41,a42,a43,a51,a52,a53,a54,a61,a62,a63,a64,a65,a71,a72,a73,a74,a75,a76,b1,b2,b3,b4,b5,b6,b7 = cache k1 = integrator.fsalfirst - k2 = f(t+c1*dt,uprev+dt*(a21*k1)) - k3 = f(t+c2*dt,uprev+dt*(a31*k1+a32*k2)) - k4 = f(t+c3*dt,uprev+dt*(a41*k1+a42*k2+a43*k3)) - k5 = f(t+c4*dt,uprev+dt*(a51*k1+a52*k2+a53*k3+a54*k4)) - k6 = f(t+dt,uprev+dt*(a61*k1+a62*k2+a63*k3+a64*k4+a65*k5)) - u = uprev+dt*(a71*k1+a72*k2+a73*k3+a74*k4+a75*k5+a76*k6) + a = dt*a21 + k2 = f(@muladd(t+c1*dt),@muladd(uprev+a*k1)) + k3 = f(@muladd(t+c2*dt),@muladd(uprev+dt*(a31*k1+a32*k2))) + k4 = f(@muladd(t+c3*dt),@muladd(uprev+dt*(a41*k1+a42*k2+a43*k3))) + k5 = f(@muladd(t+c4*dt),@muladd(uprev+dt*(a51*k1+a52*k2+a53*k3+a54*k4))) + k6 = f(t+dt,@muladd(uprev+dt*(a61*k1+a62*k2+a63*k3+a64*k4+a65*k5))) + u = @muladd uprev+dt*(a71*k1+a72*k2+a73*k3+a74*k4+a75*k5+a76*k6) integrator.fsallast = f(t+dt,u); k7 = integrator.fsallast if integrator.opts.adaptive - utilde = uprev + dt*(b1*k1 + b2*k2 + b3*k3 + b4*k4 + b5*k5 + b6*k6 + b7*k7) - integrator.EEst = integrator.opts.internalnorm(((utilde-u)/(integrator.opts.abstol+max(abs(uprev),abs(u))*integrator.opts.reltol))) + utilde = @muladd uprev + dt*(b1*k1 + b2*k2 + b3*k3 + b4*k4 + b5*k5 + b6*k6 + b7*k7) + integrator.EEst = integrator.opts.internalnorm(((utilde-u)/@muladd(integrator.opts.abstol+max(abs(uprev),abs(u))*integrator.opts.reltol))) end integrator.k[1] = k1 integrator.k[2] = k2 @@ -198,34 +205,35 @@ end uidx = eachindex(integrator.uprev) @unpack c1,c2,c3,c4,c5,c6,a21,a31,a32,a41,a42,a43,a51,a52,a53,a54,a61,a62,a63,a64,a65,a71,a72,a73,a74,a75,a76,b1,b2,b3,b4,b5,b6,b7 = cache.tab @unpack k1,k2,k3,k4,k5,k6,k7,utilde,tmp,atmp = cache + a = dt*a21 for i in uidx - tmp[i] = uprev[i]+dt*(a21*k1[i]) + tmp[i] = @muladd uprev[i]+a*k1[i] end - f(t+c1*dt,tmp,k2) + f(@muladd(t+c1*dt),tmp,k2) for i in uidx - tmp[i] = uprev[i]+dt*(a31*k1[i]+a32*k2[i]) + tmp[i] = @muladd uprev[i]+dt*(a31*k1[i]+a32*k2[i]) end - f(t+c2*dt,tmp,k3) + f(@muladd(t+c2*dt),tmp,k3) for i in uidx - tmp[i] = uprev[i]+dt*(a41*k1[i]+a42*k2[i]+a43*k3[i]) + tmp[i] = @muladd uprev[i]+dt*(a41*k1[i]+a42*k2[i]+a43*k3[i]) end - f(t+c3*dt,tmp,k4) + f(@muladd(t+c3*dt),tmp,k4) for i in uidx - tmp[i] = uprev[i]+dt*(a51*k1[i]+a52*k2[i]+a53*k3[i]+a54*k4[i]) + tmp[i] = @muladd uprev[i]+dt*(a51*k1[i]+a52*k2[i]+a53*k3[i]+a54*k4[i]) end - f(t+c4*dt,tmp,k5) + f(@muladd(t+c4*dt),tmp,k5) for i in uidx - tmp[i] = uprev[i]+dt*(a61*k1[i]+a62*k2[i]+a63*k3[i]+a64*k4[i]+a65*k5[i]) + tmp[i] = @muladd uprev[i]+dt*(a61*k1[i]+a62*k2[i]+a63*k3[i]+a64*k4[i]+a65*k5[i]) end f(t+dt,tmp,k6) for i in uidx - u[i] = uprev[i]+dt*(a71*k1[i]+a72*k2[i]+a73*k3[i]+a74*k4[i]+a75*k5[i]+a76*k6[i]) + u[i] = @muladd uprev[i]+dt*(a71*k1[i]+a72*k2[i]+a73*k3[i]+a74*k4[i]+a75*k5[i]+a76*k6[i]) end f(t+dt,u,k7) if integrator.opts.adaptive for i in uidx - utilde[i] = uprev[i] + dt*(b1*k1[i] + b2*k2[i] + b3*k3[i] + b4*k4[i] + b5*k5[i] + b6*k6[i] + b7*k7[i]) - atmp[i] = ((utilde[i]-u[i])./(integrator.opts.abstol+max(abs(uprev[i]),abs(u[i]))*integrator.opts.reltol)) + utilde[i] = @muladd uprev[i] + dt*(b1*k1[i] + b2*k2[i] + b3*k3[i] + b4*k4[i] + b5*k5[i] + b6*k6[i] + b7*k7[i]) + atmp[i] = ((utilde[i]-u[i])./@muladd(integrator.opts.abstol+max(abs(uprev[i]),abs(u[i]))*integrator.opts.reltol)) end integrator.EEst = integrator.opts.internalnorm(atmp) end @@ -243,23 +251,24 @@ end @unpack a21,a31,a32,a41,a42,a43,a51,a52,a53,a54,a61,a62,a63,a64,a65,a71,a73,a74,a75,a76,b1,b3,b4,b5,b6,b7,c1,c2,c3,c4,c5,c6 = cache @unpack d1,d3,d4,d5,d6,d7 = cache k1 = integrator.fsalfirst - k2 = f(t+c1*dt,uprev+dt*(a21*k1)) - k3 = f(t+c2*dt,uprev+dt*(a31*k1+a32*k2)) - k4 = f(t+c3*dt,uprev+dt*(a41*k1+a42*k2+a43*k3)) - k5 = f(t+c4*dt,uprev+dt*(a51*k1+a52*k2+a53*k3+a54*k4)) - k6 = f(t+dt,uprev+dt*(a61*k1+a62*k2+a63*k3+a64*k4+a65*k5)) - update = a71*k1+a73*k3+a74*k4+a75*k5+a76*k6 - u = uprev+dt*update + a = dt*a21 + k2 = f(@muladd(t+c1*dt),@muladd(uprev+a*k1)) + k3 = f(@muladd(t+c2*dt),@muladd(uprev+dt*(a31*k1+a32*k2))) + k4 = f(@muladd(t+c3*dt),@muladd(uprev+dt*(a41*k1+a42*k2+a43*k3))) + k5 = f(@muladd(t+c4*dt),@muladd(uprev+dt*(a51*k1+a52*k2+a53*k3+a54*k4))) + k6 = f(t+dt,@muladd uprev+dt*(a61*k1+a62*k2+a63*k3+a64*k4+a65*k5)) + update = @muladd a71*k1+a73*k3+a74*k4+a75*k5+a76*k6 + u = @muladd uprev+dt*update integrator.fsallast = f(t+dt,u); k7 = integrator.fsallast if integrator.opts.adaptive utilde = uprev + dt*(b1*k1 + b3*k3 + b4*k4 + b5*k5 + b6*k6 + b7*k7) - integrator.EEst = integrator.opts.internalnorm( ((utilde-u)/(integrator.opts.abstol+max(abs(uprev),abs(u))*integrator.opts.reltol))) + integrator.EEst = integrator.opts.internalnorm( ((utilde-u)/@muladd(integrator.opts.abstol+max(abs(uprev),abs(u))*integrator.opts.reltol))) end integrator.k[1] = update bspl = k1 - update integrator.k[2] = bspl integrator.k[3] = update - k7 - bspl - integrator.k[4] = (d1*k1+d3*k3+d4*k4+d5*k5+d6*k6+d7*k7) + integrator.k[4] = @muladd (d1*k1+d3*k3+d4*k4+d5*k5+d6*k6+d7*k7) @pack integrator = t,dt,u,k end @@ -276,42 +285,43 @@ end @unpack a21,a31,a32,a41,a42,a43,a51,a52,a53,a54,a61,a62,a63,a64,a65,a71,a73,a74,a75,a76,b1,b3,b4,b5,b6,b7,c1,c2,c3,c4,c5,c6 = cache.tab @unpack k1,k2,k3,k4,k5,k6,k7,dense_tmp3,dense_tmp4,update,bspl,utilde,tmp,atmp = cache @unpack d1,d3,d4,d5,d6,d7 = cache.tab + a = dt*a21 for i in uidx - tmp[i] = uprev[i]+dt*(a21*k1[i]) + tmp[i] = @muladd uprev[i]+a*k1[i] end - f(t+c1*dt,tmp,k2) + f(@muladd(t+c1*dt),tmp,k2) for i in uidx - tmp[i] = uprev[i]+dt*(a31*k1[i]+a32*k2[i]) + tmp[i] = @muladd uprev[i]+dt*(a31*k1[i]+a32*k2[i]) end - f(t+c2*dt,tmp,k3) + f(@muladd(t+c2*dt),tmp,k3) for i in uidx - tmp[i] = uprev[i]+dt*(a41*k1[i]+a42*k2[i]+a43*k3[i]) + tmp[i] = @muladd uprev[i]+dt*(a41*k1[i]+a42*k2[i]+a43*k3[i]) end - f(t+c3*dt,tmp,k4) + f(@muladd(t+c3*dt),tmp,k4) for i in uidx - tmp[i] =uprev[i]+dt*(a51*k1[i]+a52*k2[i]+a53*k3[i]+a54*k4[i]) + tmp[i] = @muladd uprev[i]+dt*(a51*k1[i]+a52*k2[i]+a53*k3[i]+a54*k4[i]) end - f(t+c4*dt,tmp,k5) + f(@muladd(t+c4*dt),tmp,k5) for i in uidx - tmp[i] = uprev[i]+dt*(a61*k1[i]+a62*k2[i]+a63*k3[i]+a64*k4[i]+a65*k5[i]) + tmp[i] = @muladd uprev[i]+dt*(a61*k1[i]+a62*k2[i]+a63*k3[i]+a64*k4[i]+a65*k5[i]) end f(t+dt,tmp,k6) for i in uidx - update[i] = a71*k1[i]+a73*k3[i]+a74*k4[i]+a75*k5[i]+a76*k6[i] - u[i] = uprev[i]+dt*update[i] + update[i] = @muladd a71*k1[i]+a73*k3[i]+a74*k4[i]+a75*k5[i]+a76*k6[i] + u[i] = @muladd uprev[i]+dt*update[i] end f(t+dt,u,k7); if integrator.opts.adaptive for i in uidx - utilde[i] = uprev[i] + dt*(b1*k1[i] + b3*k3[i] + b4*k4[i] + b5*k5[i] + b6*k6[i] + b7*k7[i]) - atmp[i] = ((utilde[i]-u[i])/(integrator.opts.abstol+max(abs(uprev[i]),abs(u[i]))*integrator.opts.reltol)) + utilde[i] = @muladd uprev[i] + dt*(b1*k1[i] + b3*k3[i] + b4*k4[i] + b5*k5[i] + b6*k6[i] + b7*k7[i]) + atmp[i] = ((utilde[i]-u[i])/@muladd(integrator.opts.abstol+max(abs(uprev[i]),abs(u[i]))*integrator.opts.reltol)) end integrator.EEst = integrator.opts.internalnorm(atmp) end for i in uidx bspl[i] = k1[i] - update[i] integrator.k[3][i] = update[i] - k7[i] - bspl[i] - integrator.k[4][i] = (d1*k1[i]+d3*k3[i]+d4*k4[i]+d5*k5[i]+d6*k6[i]+d7*k7[i]) + integrator.k[4][i] = @muladd(d1*k1[i]+d3*k3[i]+d4*k4[i]+d5*k5[i]+d6*k6[i]+d7*k7[i]) end @pack integrator = t,dt,u,k end diff --git a/src/integrators/threaded_rk_integrators.jl b/src/integrators/threaded_rk_integrators.jl index 6de0df57da..2ddd3284d0 100644 --- a/src/integrators/threaded_rk_integrators.jl +++ b/src/integrators/threaded_rk_integrators.jl @@ -12,13 +12,13 @@ end @unpack k1,k2,k3,k4,k5,k6,k7,dense_tmp3,dense_tmp4,update,bspl,utilde,tmp,atmp = cache @unpack d1,d3,d4,d5,d6,d7 = cache.tab dp5threaded_loop1(dt,tmp,uprev,a21,k1,uidx) - f(t+c1*dt,tmp,k2) + f(@muladd(t+c1*dt),tmp,k2) dp5threaded_loop2(dt,tmp,uprev,a31,k1,a32,k2,uidx) - f(t+c2*dt,tmp,k3) + f(@muladd(t+c2*dt),tmp,k3) dp5threaded_loop3(dt,tmp,uprev,a41,k1,a42,k2,a43,k3,uidx) - f(t+c3*dt,tmp,k4) + f(@muladd(t+c3*dt),tmp,k4) dp5threaded_loop4(dt,tmp,uprev,a51,k1,a52,k2,a53,k3,a54,k4,uidx) - f(t+c4*dt,tmp,k5) + f(@muladd(t+c4*dt),tmp,k5) dp5threaded_loop5(dt,tmp,uprev,a61,k1,a62,k2,a63,k3,a64,k4,a65,k5,uidx) f(t+dt,tmp,k6) dp5threaded_loop6(dt,u,uprev,a71,k1,a73,k3,a74,k4,a75,k5,a76,k6,update,uidx) @@ -40,45 +40,46 @@ end end @noinline function dp5threaded_loop1(dt,tmp,uprev,a21,k1,uidx) + a = dt*a21 Threads.@threads for i in uidx - tmp[i] = uprev[i]+dt*(a21*k1[i]) + tmp[i] = @muladd uprev[i]+a*k1[i] end end @noinline function dp5threaded_loop2(dt,tmp,uprev,a31,k1,a32,k2,uidx) Threads.@threads for i in uidx - tmp[i] = uprev[i]+dt*(a31*k1[i]+a32*k2[i]) + tmp[i] = @muladd uprev[i]+dt*(a31*k1[i]+a32*k2[i]) end end @noinline function dp5threaded_loop3(dt,tmp,uprev,a41,k1,a42,k2,a43,k3,uidx) Threads.@threads for i in uidx - tmp[i] = uprev[i]+dt*(a41*k1[i]+a42*k2[i]+a43*k3[i]) + tmp[i] = @muladd uprev[i]+dt*(a41*k1[i]+a42*k2[i]+a43*k3[i]) end end @noinline function dp5threaded_loop4(dt,tmp,uprev,a51,k1,a52,k2,a53,k3,a54,k4,uidx) Threads.@threads for i in uidx - tmp[i] =uprev[i]+dt*(a51*k1[i]+a52*k2[i]+a53*k3[i]+a54*k4[i]) + tmp[i] = @muladd uprev[i]+dt*(a51*k1[i]+a52*k2[i]+a53*k3[i]+a54*k4[i]) end end @noinline function dp5threaded_loop5(dt,tmp,uprev,a61,k1,a62,k2,a63,k3,a64,k4,a65,k5,uidx) Threads.@threads for i in uidx - tmp[i] = uprev[i]+dt*(a61*k1[i]+a62*k2[i]+a63*k3[i]+a64*k4[i]+a65*k5[i]) + tmp[i] = @muladd uprev[i]+dt*(a61*k1[i]+a62*k2[i]+a63*k3[i]+a64*k4[i]+a65*k5[i]) end end @noinline function dp5threaded_loop6(dt,u,uprev,a71,k1,a73,k3,a74,k4,a75,k5,a76,k6,update,uidx) Threads.@threads for i in uidx - update[i] = a71*k1[i]+a73*k3[i]+a74*k4[i]+a75*k5[i]+a76*k6[i] - u[i] = uprev[i]+dt*update[i] + update[i] = @muladd a71*k1[i]+a73*k3[i]+a74*k4[i]+a75*k5[i]+a76*k6[i] + u[i] = @muladd uprev[i]+dt*update[i] end end @noinline function dp5threaded_adaptiveloop(dt,utilde,uprev,b1,k1,b3,k3,b4,k4,b5,k5,b6,k6,b7,k7,atmp,u,abstol,reltol,uidx) Threads.@threads for i in uidx - utilde[i] = uprev[i] + dt*(b1*k1[i] + b3*k3[i] + b4*k4[i] + b5*k5[i] + b6*k6[i] + b7*k7[i]) - atmp[i] = ((utilde[i]-u[i])/(abstol+max(abs(uprev[i]),abs(u[i]))*reltol)) + utilde[i] = @muladd uprev[i] + dt*(b1*k1[i] + b3*k3[i] + b4*k4[i] + b5*k5[i] + b6*k6[i] + b7*k7[i]) + atmp[i] = ((utilde[i]-u[i])/@muladd(abstol+max(abs(uprev[i]),abs(u[i]))*reltol)) end end diff --git a/src/misc_utils.jl b/src/misc_utils.jl index 897df97da7..765c3c3e85 100644 --- a/src/misc_utils.jl +++ b/src/misc_utils.jl @@ -56,18 +56,35 @@ end function to_muladd(ex) is_add_operation(ex) || return ex - operands = collect(zip( - to_muladd.((x->x.args[2]).(ex.args[2:end])), - to_muladd.((x->x.args[3]).(ex.args[2:end])))) - last_operation = :($(operands[end][1]) * $(operands[end][2])) + all_operands = ex.args[2:end] + mul_operands = filter(is_mul_operation, all_operands) + odd_operands = filter(x->!is_mul_operation(x), all_operands) - foldr(last_operation, operands[1:end-1]) do xs, r + muladd_operands = collect(zip( + to_muladd.((x->x.args[2]).(mul_operands)), + to_muladd.((x->x.args[3]).(mul_operands)))) + + if isempty(odd_operands) + to_be_muladded = muladd_operands[1:end-1] + last_operation = :($(muladd_operands[end][1]) * $(muladd_operands[end][2])) + else + to_be_muladded = muladd_operands + last_operation = make_addition(odd_operands) + end + + foldr(last_operation, to_be_muladded) do xs, r :($(Base.muladd)($(xs[1]), $(xs[2]), $r)) end end -is_add_operation(ex::Expr) = ex.head == :call && !isempty(ex.args) && ex.args[1] == :+ -is_add_operation(ex) = false + +is_operation(ex::Expr, op::Symbol) = ex.head == :call && !isempty(ex.args) && ex.args[1] == op +is_operation(ex, op::Symbol) = false + +is_add_operation(ex) = is_operation(ex, :+) +is_mul_operation(ex) = is_operation(ex, :*) + +make_addition(args) = length(args) == 1 ? args[1] : Expr(:call, :+, args...) realtype{T}(::Type{T}) = T realtype{T}(::Type{Complex{T}}) = T diff --git a/test/ode/ode_convergence_tests.jl b/test/ode/ode_convergence_tests.jl index db0e586d10..5fc650de95 100644 --- a/test/ode/ode_convergence_tests.jl +++ b/test/ode/ode_convergence_tests.jl @@ -1,5 +1,5 @@ # This definitely needs cleaning -using OrdinaryDiffEq, DiffEqDevTools, DiffEqBase, DiffEqProblemLibrary +using OrdinaryDiffEq, DiffEqDevTools, DiffEqBase, DiffEqProblemLibrary, Base.Test probArr = Vector{ODETestProblem}(2) probArr[1] = prob_ode_linear @@ -9,7 +9,6 @@ srand(100) println("Convergence Test on Linear") dts = 1.//2.^(8:-1:4) testTol = 0.2 -superduperbool = Vector{Bool}(2) for i = 1:2 prob = probArr[i]