I have been trying to optimize my to compute array operations in Matlab using parfor, however, when I tried using par for on the outer loop (v loop) the runtime increased significantly due to the overhead.

I tried to use parfor within the for loop that's taking up the most computation time, but by nesting it in another for loop it didn't do anything to help speed up the calculations.

The Loop in Question: Is there any way to vectorize this for loop? How do I know if it's even possible?

```
SS = zeros(1,sz(2),sz(3)); %integral in the steady-state.
i = 1
for j = 1:sz(2)
phii = phi(j);
for d = 1:sz(3)
rprime = r;
phiprime = phi;
s = reshape(tau(i,j,d,:),1,n4);
[R3,PHI3,S3] = meshgrid(rprime,phiprime,s);
%g = 100.*exp(-(10^5)*(R3-(b/2)).^2); %heat source.
%disp('trying my best')
%g = exp(-(10^5)*(R3-(b/2)).^2).*(exp(-(10^5)*(PHI3-2*pi/3).^2)); %stationary point heat source.
g = 1000000*exp(-(10^5)*(R3-(b/maxt)*S3).^2).*(exp(-(10^5)*(PHI3-0).^2)+exp(-(10^5)*(PHI3-2*pi).^2)+exp(-(10^5)*(PHI3-2*pi/3).^2)+exp(-(10^5)*(PHI3-4*pi/3).^2)); %stationary point heat source.
%g = 100.*exp(-(10^5)*(R3-(b/2)).^2).*(exp(-(10^5)*(PHI3-0).^2)+exp(-(10^5)*(PHI3-5*pi/3).^2)+exp(-(10^5)*(PHI3-4*pi/3).^2)+exp(-(10^5)*(PHI3-pi).^2)+exp(-(10^5)*(PHI3-2*pi/3).^2)+exp(-(10^5)*(PHI3-pi/3).^2)); %stationary point heat source.
%g = 100.*exp(-(10^5)*(R3-S3).^2).*(exp(-(10^5)*(PHI3-0).^2)+exp(-(10^5)*(PHI3-5*pi/3).^2)+exp(-(10^5)*(PHI3-4*pi/3).^2)+exp(-(10^5)*(PHI3-pi).^2)+exp(-(10^5)*(PHI3-2*pi/3).^2)+exp(-(10^5)*(PHI3-pi/3).^2)); %heat source.
%g = 100.*exp(-(10^5)*(R3-(S3 - b/2)).^2); %heat source.
y = trapz(phiprime,R3.*((sqrt(2)/b)*(1./(sqrt((H2^2/bm^2) + (1 - (v^2/(bm^2*b^2)))))).*(besselj(v,bm*R3)./besselj(v,bm*b))).*cos(v*(phii-PHI3)).*g,1);
gbarbar = trapz(rprime,y,2); %integral transform of heat source.
gbarbar = reshape(gbarbar,1,n4);
[PHI2,S2] = meshgrid(phiprime,s);
%disp('PHI2 created from mesh grid')
%disp(size(PHI2))
%disp(size(phiprime))
%disp(size(s))
sz2 = size(PHI2);
f = h2*37*ones(sz2(1),sz2(2)); %boundary condition.
fbar = trapz(phiprime,cos(v*(phii-PHI2)).*f,2);
fbar = reshape(fbar,1,n4);
A = (alpha/k)*gbarbar + ((sqrt(2)*alpha)/k2)*(1/(sqrt((H2^2/bm^2) + (1 - (v^2/(bm^2*b^2))))))*fbar;
SS(i,j,d) = trapz(s,exp(-alpha*bm^2*(T(i,j,d)-s)).*A);
end
end
```

Raw code without any parallelization. If there are any ways to speed this up that I'm missing that would be amazing. I have also tried to implement this in python using numpy, numba, scipy, and multiprocessing, but to no avail. Matlab is able to perform better than my python implementation so I just want to try and speed it up.

```
alpha = 1.34*10^-7; %alpha^2 = 1, diffusivity constant. *Ozisik uses alpha, not alpha^2.
b = 0.035; %radius of cylinder (or disk...cylinder with height 1).
k = 0.497; %thermal conductivity.
k2 = 0.497;
h2 = 100;
H2 = h2/k2;
maxt = 600; %maximum time.
n1 = 31; %steps of length.
n2 = 31; %steps of angle.
n3 = 31; %steps of time.
r = linspace(0,b,n1); phi = linspace(0,2*pi,n2); t = linspace(0,maxt,n3);
[R,PHI,T] = meshgrid(r,phi,t);
%exp(-(10^5)*(phi-pi/2).^2)
%return
V = 4; %will use first V-1 orders of Bessel's differential equation. (v = 0,1,2,...,V-1). sum of each order Bessel's diff. eqn.
beta = []; %FOR DEBUGGING.
numelbetas = []; %number of eigenvalues for each Bessel function.
beta2 = zeros(V,50); %hardcoded 50 to ensure big enough matrix for ALL eigenvalues. will trim later in code.
%BETA = beta; %for debugging.
%%
maxX = 1500; %maximum length to find positive roots.
%disp('v is here')
for v = 1:V
vv = v-1; %start v from 0,1,2,...,V-1.
for i = 1:maxX %find roots of equation near 1 through 100.
root = fzero(@(x) (1/2)*x.*(besselj(vv-1,b*x) - besselj(vv+1,b*x)) + H2*besselj(vv,b*x),i);
if root > 0 %keep only positive roots.
beta(v,i) = root;
end
end
end
beta = round(beta,2);
beta2 = zeros(V,100);
numelbeta = [];
for v = 1:V
bvec = [];
for i = 1:maxX
if beta(v,i) > 0.5 && ismember(beta(v,i),bvec)==0 %if nonzero and not a duplicate. *note: all roots will be above 0.5, so use 0.5 to assure no roots NEAR zero.
bvec = [bvec, beta(v,i)];
end
end
beta2(v,1:numel(bvec)) = bvec;
numelbeta(v) = numel(bvec);
end
numelbeta = min(numelbeta);
beta = beta2(:,1:numelbeta);
disp(['eigenvalues found at time' num2str(toc)]);
%%
M = 8;%numel(beta(1,:)); %max number of eigenvalues.
finalu = zeros(n1,n2,n3,V);
FINALU = zeros(n1,n2,n3);
% Creating the tau/tprime.
n4 = 300; %steps of tau.
sz = size(T);
tau = zeros(1, sz(2), sz(3), n4); %integrate from 0 to t. *note: SS is r-independent, so we will repmat later.
for i = 1
for j = 1:sz(2)
for d = 1:sz(3)
tau(i,j,d,:) = linspace(0,T(i,j,d),n4);
end
end
end
for v = 1:V %for each order Bessel's differential equation.
%*note: v=0,1,2,...
betavals = beta(v,:); %eigenvalues for Bessel's differential equation of order v.
v = v-1; %to account for v=0.
u = zeros(n1,n2,n3); %temperature solution for Bessel's differential equation of order v.
U = zeros(n1,n2,n3,M); %temperature responses (m = 1,2,...,M). sum of each eigenfunction
%component for Bessel's differential equation of order v.
for m = 1:M %for each eigenvalue/eigenfunction, for Bessel's differential equation of order v.
bm = betavals(m);
%Finding Fbarbar.
rprime = r;
phiprime = phi;
Fbarbar = [];
[R2,PHI2] = meshgrid(rprime,phiprime);
%disp(size(PHI2));
for i = 1:numel(phi)
phii = phi(i);
F = 37*ones(n1,n2);%sin(pi*R2);
%F = exp(-(10^5)*(R2-(b/2)).^2).*(exp(-(10^5)*(PHI2-0).^2)+exp(-(10^5)*(PHI2-2*pi/3).^2)+exp(-(10^5)*(PHI2-4*pi/3).^2)+exp(-(10^5)*(PHI2-2*pi).^2));
%F = exp(-(10^5)*(R2-(b/2)).^2).*(exp(-(10^5)*(PHI2-pi/4).^2)+exp(-(10^5)*(PHI2-(3*pi/4)).^2)+exp(-(10^5)*(PHI2-(5*pi/4)).^2)+exp(-(10^5)*(PHI2-(7*pi/4)).^2));
%F = exp(-(10^5)*(PHI2-2*pi/3).^2);
%F = exp(-(10^5)*(R2-(b/2)).^2).*(exp(-(10^5)*(PHI2-0).^2)+exp(-(10^5)*(PHI2-pi/3).^2)+exp(-(10^5)*(PHI2-2*pi/3).^2)+exp(-(10^5)*(PHI2-pi).^2)+exp(-(10^5)*(PHI2-4*pi/3).^2)+exp(-(10^5)*(PHI2-5*pi/3).^2)+exp(-(10^5)*(PHI2-2*pi).^2)); %initial condition.
a = trapz(phiprime,R2.*((sqrt(2)/b)*(1./(sqrt((H2^2/bm^2) + (1 - (v^2/(bm^2*b^2)))))).*(besselj(v,bm*R2)./besselj(v,bm*b))).*cos(v*(phii-PHI2)).*F, 1);
Fbarbar(i) = trapz(rprime,a,2);
end
[dumbyR, Fbarbar, dumbyT] = meshgrid(r,Fbarbar,t); %dumbyR and dumbyT are just placeholders to compute Fbarbar in the appropriate dimensions.
SS = zeros(1,sz(2),sz(3)); %integral in the steady-state.
i = 1
for j = 1:sz(2)
phii = phi(j);
for d = 1:sz(3)
rprime = r;
phiprime = phi;
s = reshape(tau(i,j,d,:),1,n4);
[R3,PHI3,S3] = meshgrid(rprime,phiprime,s);
%g = 100.*exp(-(10^5)*(R3-(b/2)).^2); %heat source.
%disp('trying my best')
%g = exp(-(10^5)*(R3-(b/2)).^2).*(exp(-(10^5)*(PHI3-2*pi/3).^2)); %stationary point heat source.
g = 1000000*exp(-(10^5)*(R3-(b/maxt)*S3).^2).*(exp(-(10^5)*(PHI3-0).^2)+exp(-(10^5)*(PHI3-2*pi).^2)+exp(-(10^5)*(PHI3-2*pi/3).^2)+exp(-(10^5)*(PHI3-4*pi/3).^2)); %stationary point heat source.
%g = 100.*exp(-(10^5)*(R3-(b/2)).^2).*(exp(-(10^5)*(PHI3-0).^2)+exp(-(10^5)*(PHI3-5*pi/3).^2)+exp(-(10^5)*(PHI3-4*pi/3).^2)+exp(-(10^5)*(PHI3-pi).^2)+exp(-(10^5)*(PHI3-2*pi/3).^2)+exp(-(10^5)*(PHI3-pi/3).^2)); %stationary point heat source.
%g = 100.*exp(-(10^5)*(R3-S3).^2).*(exp(-(10^5)*(PHI3-0).^2)+exp(-(10^5)*(PHI3-5*pi/3).^2)+exp(-(10^5)*(PHI3-4*pi/3).^2)+exp(-(10^5)*(PHI3-pi).^2)+exp(-(10^5)*(PHI3-2*pi/3).^2)+exp(-(10^5)*(PHI3-pi/3).^2)); %heat source.
%g = 100.*exp(-(10^5)*(R3-(S3 - b/2)).^2); %heat source.
y = trapz(phiprime,R3.*((sqrt(2)/b)*(1./(sqrt((H2^2/bm^2) + (1 - (v^2/(bm^2*b^2)))))).*(besselj(v,bm*R3)./besselj(v,bm*b))).*cos(v*(phii-PHI3)).*g,1);
gbarbar = trapz(rprime,y,2); %integral transform of heat source.
gbarbar = reshape(gbarbar,1,n4);
[PHI2,S2] = meshgrid(phiprime,s);
%disp('PHI2 created from mesh grid')
%disp(size(PHI2))
%disp(size(phiprime))
%disp(size(s))
sz2 = size(PHI2);
f = h2*37*ones(sz2(1),sz2(2)); %boundary condition.
fbar = trapz(phiprime,cos(v*(phii-PHI2)).*f,2);
fbar = reshape(fbar,1,n4);
A = (alpha/k)*gbarbar + ((sqrt(2)*alpha)/k2)*(1/(sqrt((H2^2/bm^2) + (1 - (v^2/(bm^2*b^2))))))*fbar;
SS(i,j,d) = trapz(s,exp(-alpha*bm^2*(T(i,j,d)-s)).*A);
end
end
SS = reshape(SS,n2,1,n3);
%szzz = size(SS);
%disp(szzz)
SS = repmat(SS,1,n1,1);
%SS = repmat(SS,1,n1); %31,31,31
U(:,:,:,m) = ((sqrt(2)/b)*(1./(sqrt((H2^2/bm^2) + (1 - (v^2/(bm^2*b^2)))))).*(besselj(v,bm*R)./besselj(v,bm*b))).*Fbarbar.*exp(-alpha*bm^2*T)...
+ ((sqrt(2)/b)*(1./(sqrt((H2^2/bm^2) + (1 - (v^2/(bm^2*b^2)))))).*(besselj(v,bm*R)./besselj(v,bm*b))).*SS;
disp('big U')
disp(size(U))
disp(['m = ' num2str(m) ', found at time ' num2str(toc)]);
end
u = sum(U,4);
if v == 0
u = (1/(2*pi))*u;
else
u = (1/pi)*u;
end
disp('little u')
disp(size(u))
finalu(:,:,:,v+1) = u; %v+1 because we defined v = v-1 to account for v=0.
disp(['v = ' num2str(v) ', found at time ' num2str(toc)]);
end
FINALU = sum(finalu,4);
disp('FINALU SIZE')
disp(size(FINALU))
[R,PHI] = meshgrid(r,phi);
for i = 1:n3
uu = FINALU(:,:,i);
[X,Y,Z] = pol2cart(PHI,R,uu);
figure(1);clf;surf(X,Y,Z,'edgecolor','none');axis([-b b -b b min(FINALU(:)) max(FINALU(:))]);colormap(gray);
xlabel('x');ylabel('y');zlabel('temperature');title(['Time: ' num2str(t(i)) ' seconds']);
pause(0.5)
%return
end
disp('part 4: ');
toc
return
%}
```