% -*- mode: Noweb; noweb-code-mode: c-mode -*-
@
[[aaStats]] contains a structure and the routines to compute the covariance matrix of the angle--of--arrival.
\index{aaStats}
\section{The files}
\subsection{Header}
<<aaStats.h>>=
#ifndef __AASTATS_H__
#define __AASTATS_H__
#ifndef __UTILITIES_H__
#include "utilities.h"
#endif
#ifndef __ATMOSPHERE_H__
#include "atmosphere.h"
#endif
//#define AASTATS_DEBUG
//#define PASTATS_DEBUG
<<aaStats structure>>
<<paStats stucture>>
@
The structures are :
<<aaStats structure>>=
struct aaStats {
<<aaStats parameters>>
void setup(int N, const atmosphere *atm, float lenslet_pitch, const source *src, int N_SRC);
void cleanup(void);
void info(int kappa, float d);
float variance(void);
void toFile(char *filename);
};
@ and
<<paStats stucture>>=
struct paStats {
<<paStats parameters>>
void setup(int M_, int N_, int osf_, const atmosphere *atm, float lenslet_pitch,
source *phase_src, int N_P_SRC, const source *slopes_src, int N_S_SRC);
void setup(int M_, int N_, int osf_, const atmosphere *atm, float lenslet_pitch,
const source *slopes_src, int N_S_SRC,
float z_radius);
void cleanup(void);
void info(int kappa, float d);
/*
void MVM(float *in_vector, float *out_vector,
float d1, float g1, int N1,
float d2, float g2, int N2);
*/
void MVM(float *out_vector, float *in_vector,
float d1, float g1, int N1,
float d2, int N2,
atmosphere *atm,
source *phase_src, int N_P_SRC,
source *slopes_src, int N_S_SRC) ;
void toFile(char *filename);
void variance(void);
};
#endif // __AASTATS_H__
@
\subsection{Source}
<<aaStats.cu>>=
#include "aaStats.h"
<<sinc>>
<<somb>>
<<subaperture FT>>
<<subaperture PSF>>
<<phase power spectrum>>
<<aaStats power spectrum kernel>>
<<paStats power spectrum kernel>>
<<paStats polar average power spectrum kernel>>
<<paStats power spectrum kernel (single layer)>>
<<covariance extraction>>
<<PA covariance extraction>>
<<aaStats setup>>
<<paStats setup>>
<<save aa covariance to file>>
<<save pa covariance to file>>
<<aaStats cleanup>>
<<paStats cleanup>>
<<aaStats info>>
<<paStats info>>
<<aaStats variance>>
@
\section{The theory}
Shack--Hartmann WFS centroids are a measure of the spatial derivatives of the wavefront averaged on each subaperture.
These derivatives are often referred as angle of arrivals $\vec \alpha=(\alpha_x\alpha_y,)$:
\begin{eqnarray}
\label{eq:1}
\alpha_x &=& {\lambda\over 2\pi d^2} {\partial\varphi\over\partial x} \ast \Pi\left(x\over d\right)\Pi\left(y\over d\right),\\
\alpha_y &=& {\lambda\over 2\pi d^2} {\partial\varphi\over\partial y} \ast \Pi\left(x\over d\right)\Pi\left(y\over d\right),
\end{eqnarray}
where $\lambda$ is the sensing wavelength and $d$ in the size of one subaperture.
The covariance is derived from the power spectrum density of the angle of arrivals $W_{\vec \alpha \cdot\ \vec \beta}(\vec f)$:
\begin{equation}
\label{eq:2}
W_{\vec \alpha\cdot\vec \beta}(\vec f) = \lambda^2\left( \vec f \cdot \vec\alpha \right)\left( \vec f \cdot \vec\beta \right) W_\varphi(\vec f) G^2(\vec f)
\end{equation}
$W_\varphi(f)$ is the wavefront power spectrum density given by
\begin{equation}
\label{eq:3}
W_\varphi(f) = 0.0229 r_0^{-5/3} \left( f^2 + {1\over\mathcal L_0^2} \right)^{-11/6}.
\end{equation}
$G(\vec f)$ is the point spread function of a subaperture of the WFS,
\begin{equation}
\label{eq:4}
G(\vec f) = {\sin(\pi d f_x) \over \pi d f_x }{ \sin(\pi d f_y) \over \pi d f_y } .
\end{equation}
The covariance $C_{\vec \alpha \cdot\ \vec \beta}(\vec\rho)$ is derived from the Wiener--Khinchine theorem:
\begin{equation}
\label{eq:5}
C_{\vec \alpha \cdot\ \vec \beta}(\vec\rho) = \mathcal F^{-1} \left[ W_{\vec \alpha\cdot\vec \beta}(\vec f) \right] (\vec\rho)
\end{equation}
where $\mathcal F^{-1}$ stands for the inverse Fourier transform.
The covariance between $\varphi$ and $\vec \alpha$ is derived from the cross--power spectrum density given by:
\begin{equation}
\label{eq:6}
W_{\varphi\vec \alpha}(\vec f) = -\iota\lambda\left( \vec f \cdot \vec\alpha \right) W_\varphi(\vec f) G(\vec f)
\end{equation}
$W_\varphi(f)$ is the wavefront power spectrum density given by
\begin{equation}
\label{eq:7}
W_\varphi(f) = 0.0229 r_0^{-5/3} \left( f^2 + {1\over\mathcal L_0^2} \right)^{-11/6}.
\end{equation}
$G(\vec f)$ is the point spread function of a subaperture of the WFS,
\begin{equation}
\label{eq:8}
G(\vec f) = {\sin(\pi d f_x) \over \pi d f_x }{ \sin(\pi d f_y) \over \pi d f_y } .
\end{equation}
The covariance $C_{\varphi \vec \alpha}(\vec\rho)$ is derived from the Wiener--Khinchine theorem:
\begin{equation}
\label{eq:9}
C_{\varphi \vec \alpha}(\vec\rho) = \mathcal F^{-1} \left[ W_{\varphi\vec \alpha}(\vec f) \right] (\vec\rho)
\end{equation}
where $\mathcal F^{-1}$ stands for the inverse Fourier transform.
\section{Parameters}
\label{sec:parameters}
The parameters of the [[aaStats]] structure are:
<<aaStats parameters>>=
int N, N2, NU, NU2, NF, NF2, psd_size, cov_size, ind_size;
float2 *d__psd;
float *d__cov, *d__alpha, *d__beta, n_full, n_comp, b_full, b_comp, cov_eval_et, sampling;
cufftHandle plan;
#if defined(AASTATS_DEBUG) || defined(PASTATS_DEBUG)
float2 *psd, *cov;
#endif
int N_SRC2;
@ The parameters of the [[paStats]] structure are:
<<paStats parameters>>=
int osf, M, shift;
int *M_LAYER;
<<aaStats parameters>>
@
\section{Angle--of--arrival covariance matrix functions}
\label{sec:aa-functions}
\index{aaStats!aaStats}
The covariance matrix is computed for a square lenslet array of size $[[N]]\times[[N]]$ and of pitch [[sampling]].
The covariance matrix depends on the atmosphere parameters and altitude profile.
The covariance matrix is computed for a vector of [[N_SRC]] sources.
The covariance matrix is written:
\begin{eqnarray}
\label{eq:10}
\Theta = \left[
\begin{array}{ccc}
\Theta_{11}&\cdots&\Theta_{1[[N_SRC]]} \\
\vdots &\ddots & \vdots \\
\Theta_{[[N_SRC]]1} & \vdots & \Theta_{[[N_SRC]][[N_SRC]]}
\end{array}
\right]
\end{eqnarray}
with
\begin{eqnarray}
\label{eq:11}
\Theta_{ij} = \left[
\begin{array}{cc}
\Theta_{{x_i}{x_j}} & \Theta_{{x_i}{y_j}} \\
\Theta_{{y_i}{x_j}} & \Theta_{{y_i}{y_j}}
\end{array}
\right].
\end{eqnarray}
Each matrix $\Theta_{{x_i}{x_j}}$ is a two--level recursive block Toeplitz (2RBT) $[[N]]^2\times[[N]]^2$ matrix.
Toeplitz matrices \cite{GolubVanLoan} are fully defined with their first row and column.
So the matrix $\Theta_{{x_i}{x_j}}$ is reduced to a $[[NU]]=(2[[N]]-1)^2$ elements vector $t_\Theta^{{x_i}{x_j}}$ and the matrix $\Theta_{ij}$ becomes the matrix
\begin{equation}
\label{eq:12}
T_{ij} = \left[ t_\Theta^{{x_i}{x_j}} t_\Theta^{{x_i}{y_j}} t_\Theta^{{y_i}{x_j}} t_\Theta^{{y_i}{y_j}} \right].
\end{equation}
Finally the Toeplitz compressed version of the matrix $\Theta$ is written:
\begin{equation}
\label{eq:13}
T = \left[ t_\Theta^{{x_1}{x_1}} t_\Theta^{{x_1}{y_1}} \cdots t_\Theta^{{x_1}{x_[[N_SRC]]}} t_\Theta^{{x_1}{y_[[N_SRC]]}}
\cdots t_\Theta^{{y_[[N_SRC]]}{x_1}} t_\Theta^{{y_[[N_SRC]]}{y_1}} \cdots t_\Theta^{{y_[[N_SRC]]}{x_[[N_SRC]]}} t_\Theta^{{y_[[N_SRC]]}{y_[[N_SRC]]}} \right].
\end{equation}
@
\subsection{Setup \& Cleanup}
\label{sec:setup--cleanup}
\index{aaStats!aaStats!setup}
<<aaStats setup>>=
void aaStats::setup(int N_, const atmosphere *atm, float lenslet_pitch,
const source *src, int N_SRC)
{
N_SRC2 = N_SRC*N_SRC;
int BATCH = 1;
<<aaStats setup declarations>>
psd_size = sizeof(float2)*NF2;
<<aaStats setup allocations>>
stopwatch tid;
tid.tic();
<<aaStats evaluate covariance>>
tid.toc(&cov_eval_et,"AA covariance evaluation");
info(kappa, sampling);
}
@ The parameters are initialized in the setup function:
<<aaStats setup declarations>>=
float alpha[2] = {1,0}, beta[2] = {0,1}, kappa;
N = N_;
N2 = N*N;
NU = 2*N-1;
NU2 = NU*NU;
NF = 4096;
NF2 = NF*NF;
kappa = 4;
sampling = lenslet_pitch;
psd_size = sizeof(float2)*NF2*4;
cov_size = sizeof(float)*NU2*4*N_SRC2;
ind_size = sizeof(int)*N2;
@ where the memory is also allocated:
<<aaStats setup allocations>>=
HANDLE_ERROR( cudaMalloc((void**)&d__psd, psd_size ) );
HANDLE_ERROR( cudaMalloc((void**)&d__alpha, 2*sizeof(float) ) );
HANDLE_ERROR( cudaMalloc((void**)&d__beta, 2*sizeof(float) ) );
HANDLE_ERROR( cudaMemcpy( d__alpha, alpha, 2*sizeof(float),
cudaMemcpyHostToDevice) );
HANDLE_ERROR( cudaMemcpy( d__beta, beta, 2*sizeof(float),
cudaMemcpyHostToDevice) );
HANDLE_ERROR( cudaMalloc((void**)&d__cov, cov_size ) );
INFO("@(CEO)>aaStats: Creating 2D covariance FFT plan\n");
int n_DFT[2] = {NF, NF};
int iodist = NF2;
/* Create a 2D FFT plan. */
HANDLE_ERROR_CUFFT(cufftPlanMany(&plan, 2, n_DFT,
NULL, 1, iodist,
NULL, 1, iodist,
CUFFT_C2C,BATCH),
"Unable to create plan");
/* HANDLE_ERROR_CUFFT(cufftSetCompatibilityMode(plan, CUFFT_COMPATIBILITY_NATIVE), */
/* "Unable to set compatibility mode to native"); */
@
The covariance is derived from Eq.~(\ref{eq:5}) for 2 sources in the directions $\vec\theta_1$ and $\vec\theta_2$ propagated through a [[N_LAYER]] atmospheric profile.
It is given by
\begin{equation}
\label{eq:14}
C_{\vec \alpha \cdot\ \vec \beta}^{12}(\vec\rho) = \sum_{l=1}^{[[N]]} C_{\vec \alpha \cdot\ \vec \beta}^l(\vec\rho_l)
\end{equation}
where
\begin{equation}
\label{eq:15}
\vec \rho_l = \gamma_l\vec\rho + h_l(\vec\theta_2-\vec\theta_1)
\end{equation}
with $\gamma_l=1-h_l/z_\ast$, $h_l$ is the atmosphere layer altitude and $z_\ast$ is the sources height.
Invoking the Wiener--Khinchine theorem, the translation and scaling properties of the Fourier transform and using Eq.~(\ref{eq:2}), the covariance for a single atmospheric layer at altitude $h_l$ is written:
\begin{equation}
\label{eq:16}
C_{\vec \alpha \cdot\ \vec \beta}^{12,l}(\vec\rho_l) = \mathcal F^{-1} \left[ \lambda^2 \left( \vec f \cdot \vec\alpha \right)\left( \vec f \cdot \vec\beta \right) W_\varphi(\vec f) G^2(\gamma_l\vec f) \exp\left( 2\iota\pi h_l\vec f \cdot (\vec\theta_2-\vec\theta_1) \right) \right] (\gamma_l\vec\rho)
\end{equation}
Setting
\begin{equation}
\label{eq:17}
W_{\vec \alpha\cdot\vec \beta}^{l,12}(\vec f) = \lambda^2 \left( \vec f \cdot \vec\alpha \right)\left( \vec f \cdot \vec\beta \right) W_\varphi(\vec f) G^2(\gamma_l\vec f) \exp\left( 2\iota\pi h_l\vec f \cdot (\vec\theta_2-\vec\theta_1) \right),
\end{equation}
Eq.~(\ref{eq:14}) is now given by
\begin{equation}
\label{eq:18}
C_{\vec \alpha \cdot\ \vec \beta}^{12}(\vec\rho) = \sum_{l=1}^{[[N]]} \mathcal F^{-1} \left[ W_{\vec \alpha\cdot\vec \beta}^{l,12}(\vec f) \right] (\gamma_l\vec\rho).
\end{equation}
Calling $\delta=d/\kappa$ the sampling step of the vector $\vec\rho$ with the integer $\kappa\geq 1$, $\vec\rho_l$ is given by
\begin{equation}
\label{eq:19}
\vec \rho_l = \gamma_l\delta(k\vec x + l\vec y),\quad\forall l,k\in \left[0,\cdots,[[NF]]-1\right],
\end{equation}
$\vec x$ and $\vec y$ are the unit vectors of the X and Y axis, respectively.
Eq.~(\ref{eq:18}) becomes
\begin{equation}
\label{eq:20}
C_{\vec \alpha \cdot\ \vec \beta}^{12}(k,l) = \sum_{l=1}^{[[N]]} \iint {\mathrm d^2}\vec f W_{\vec \alpha\cdot\vec \beta}^{l,12}(\vec f) \exp\left( 2\iota\pi \gamma_l \delta (k f_x + l f_y) \right)
\end{equation}
Discretizing Eq.~(\ref{eq:20}) with
\begin{equation}
\label{eq:21}
\gamma_l\delta f_x = n/[[NF]],\quad \gamma_l\delta f_y = m/[[NF]],\quad \forall n,m\in \left[0,\cdots,[[NF]]-1\right],
\end{equation}
Eq.~(\ref{eq:20}) is transformed into
\begin{eqnarray}
\label{eq:22}
C_{\vec \alpha \cdot\ \vec \beta}^{12}(k,l) &=& \sum_{l=1}^{[[N]]} \sum_{n=0}^{[[NF]]-1} \sum_{m=0}^{[[NF]]-1} \left( 1\over\gamma_l\delta [[NF]]\right)^2 W_{\vec \alpha\cdot\vec \beta}^{l,12}\left({n\over\gamma_l\delta [[NF]]},{m\over\gamma_l\delta [[NF]]}\right) \exp\left( 2\iota\pi {(k n + l m)\over [[NF]]} \right) \\
&=& \sum_{n=0}^{[[NF]]-1} \sum_{m=0}^{[[NF]]-1} W_{\vec \alpha\cdot\vec \beta}^{12}(n,m) \exp\left( 2\iota\pi {(k n + l m)\over [[NF]]} \right)
\end{eqnarray}
with
\begin{equation}
\label{eq:23}
W_{\vec \alpha\cdot\vec \beta}^{12}(n,m) = \sum_{l=1}^{[[N]]} \left( 1\over\gamma_l\delta [[NF]]\right)^2 W_{\vec \alpha\cdot\vec \beta}^{l,12}\left({n\over\gamma_l\delta [[NF]]},{m\over\gamma_l\delta [[NF]]}\right)
\end{equation}
<<aaStats evaluate covariance>>=
dim3 blockDimPsd(16,16);
dim3 gridDimPsd( 1+NF/16 , 1+NF/16 );
dim3 blockDimCov(16,16);
dim3 gridDimCov( 1+NU/16 , 1+NU/16 );
int i_SRC, j_SRC, offset0, offset;
INFO("Covariance %dx%d block:\n",N_SRC,N_SRC);
for (i_SRC=0;i_SRC<N_SRC;i_SRC++) {
offset0 = 4*N_SRC*i_SRC;
for (j_SRC=0;j_SRC<N_SRC;j_SRC++) {
INFO(" [%d,%d]",i_SRC,j_SRC);
offset = offset0 + 2*j_SRC;
//INFO("(%2d",offset);
aaPowerSpectrum LLL gridDimPsd,blockDimPsd RRR (
d__psd, NF, lenslet_pitch,
atm->wavelength, atm->r0, atm->turbulence.L0,
atm->d__layers, atm->N_LAYER,
(src->dev_ptr) + i_SRC, (src->dev_ptr) + j_SRC,
d__alpha, d__alpha, kappa);
<<AA covariance with Wiener-Khinchin>>
float2 *b;
b = (float2*)malloc(psd_size);
HANDLE_ERROR( cudaMemcpy( b, d__psd, psd_size, cudaMemcpyDeviceToHost) );
/*
for (int i=0;i<100;i++) {
INFO("|(%3d) [%+6.6g;%+6.6g]\n",i,b[i].x,b[i].y);
}
*/
free(b);
<<AA covariance reduction>>
offset = offset0 + (2*j_SRC+1);
//INFO(",%2d",offset);
aaPowerSpectrum LLL gridDimPsd,blockDimPsd RRR (
d__psd, NF, lenslet_pitch,
atm->wavelength, atm->r0, atm->turbulence.L0,
atm->d__layers, atm->N_LAYER,
(src->dev_ptr) + i_SRC, (src->dev_ptr) + j_SRC,
d__alpha, d__beta, kappa);
<<AA covariance with Wiener-Khinchin>>
<<AA covariance reduction>>
offset = offset0 + 2*(N_SRC+j_SRC);
//INFO(",%2d",offset);
aaPowerSpectrum LLL gridDimPsd,blockDimPsd RRR (
d__psd, NF, lenslet_pitch,
atm->wavelength, atm->r0, atm->turbulence.L0,
atm->d__layers, atm->N_LAYER,
(src->dev_ptr) + i_SRC, (src->dev_ptr) + j_SRC,
d__beta, d__alpha, kappa);
<<AA covariance with Wiener-Khinchin>>
<<AA covariance reduction>>
offset = offset0 + (2*(N_SRC+j_SRC)+1);
//INFO(",%2d)",offset);
aaPowerSpectrum LLL gridDimPsd,blockDimPsd RRR (
d__psd, NF, lenslet_pitch,
atm->wavelength, atm->r0, atm->turbulence.L0,
atm->d__layers, atm->N_LAYER,
(src->dev_ptr) + i_SRC, (src->dev_ptr) + j_SRC,
d__beta, d__beta, kappa);
<<AA covariance with Wiener-Khinchin>>
<<AA covariance reduction>>
}
INFO("\n");
}
@
The Wiener-Khinchin theorem is applied on the power spectrum with the following:
<<AA covariance with Wiener-Khinchin>>=
HANDLE_ERROR_CUFFT(cufftExecC2C(plan, d__psd, d__psd, CUFFT_FORWARD),
"Unable to execute plan");
//HANDLE_ERROR(cudaThreadSynchronize());
@
The power spectrum, given by Eq.~(\ref{eq:23}) and Eq.~(\ref{eq:17}), must be sampled such as its Fourier transform gives unbiased values of the covariance.
The largest spatial frequency is $f_{max}=\kappa/2\gamma_l\delta$ with $\kappa\ge 1$.
The numyber of sample is given by $[[N_F]]\ge [[N_SIDE_LENSLET]]$.
The spatial frequencies are given by
\begin{eqnarray}
\label{eq:24}
f_{x,y} &=& (i,j){2\over [[NF]] }{\kappa\over 2\gamma_l\delta}, (i,j) \in \left[ 0,\dots,[[NF]]/2-1\right] \\
f_{x,y} &=& (i,j){2\over [[NF]] }{\kappa\over 2\gamma_l\delta}, (i,j) \in \left[ [[NF]]/2,\dots,[[NF]]-1\right]
\end{eqnarray}
Note that [[NF]] must be even.
<<aaStats power spectrum kernel>>=
__global__ void aaPowerSpectrum(float2 *d__psd, int NF, float d,
float wavelength, float r0, float L0,
layer *turb, int N_LAYER,
source *src1, source *src2,
float *alpha, float *beta, float kappa)
{
int i, j, k;
float fx, fy, f_square, fs, gl, red, c_red, s_red;
i = blockIdx.x * blockDim.x + threadIdx.x;
j = blockIdx.y * blockDim.y + threadIdx.y;
k = i*NF + j;
if ( (i<NF) && (j<(NF)) ) {
k = i*NF + j;
wavelength *= wavelength;
d__psd[k].x = 0;
d__psd[k].y = 0;
for (int k_LAYER=0;k_LAYER<N_LAYER;k_LAYER++) {
gl = (1-turb[k_LAYER].altitude/src1->height);
red = 2*PI*turb[k_LAYER].altitude;
fs = (2.0/NF)*(kappa/(2*d*gl));
fx = (float) i;
fy = (float) j;
if (i>=(NF/2))
fx = fx - NF;
if (j>=(NF/2))
fy = fy - NF;
fx *= fs;
fy *= fs;
f_square = fx*fx + fy*fy;
fs *= fs;
red *= fx*(src2->theta_x - src1->theta_x) + fy*(src2->theta_y - src1->theta_y);
sincosf( red, &s_red, &c_red);
red = (k==0) ? 0 : turb[k_LAYER].xi0*
fs*wavelength*(fx*alpha[0] + fy*alpha[1])*(fx*beta[0] + fy*beta[1])*
powf(r0,-5.0/3.0)*
phasePowerSpectrum(f_square, 1.0/L0)*
gatePSF(gl*fx*d,gl*fy*d);
d__psd[k].x += red*c_red;
d__psd[k].y += red*s_red;
}
}
}
@
The covariance derived from the power spectrum is sampled every $\delta=d/\kappa$.
The covariance derived from the slopes of the lenslet array is sampled every $d$.
Consequently the lenslet--array--slope--covariance is extracted from the power--spectrum--covariance at $(i\kappa,j\kappa)$.
Due to the symmetry of the Fourier transform output, the subscripts in the power--spectrum--covariance are in fact given by
\begin{eqnarray}
\label{eq:25}
(i,j)\kappa, (i,j) \in \left[ 0,\dots,[[N]]-1 \right] \\
\left[(i,j)-[[NU]]\right]\kappa + [[NF]], (i,j) \in \left[ [[N]],\dots,[[NU]]-1 \right]
\end{eqnarray}
The lenslet--array--slope--covariance is shifted such as the baseline coordinate $(0,0$) is a the center of the array i.e.
\begin{equation}
\label{eq:26}
(i,j) \leftarrow \left[ (i,j) + (2[[N]]-2)/2 \right] \mod ( 2[[N]]-1 )
\end{equation}
<<AA covariance reduction>>=
covariance_extraction LLL gridDimCov,blockDimCov RRR( d__cov + offset*NU2, NU,
d__psd, NF, kappa);
@ The extraction of the covariance is done with the kernel:
<<covariance extraction>>=
__global__ void covariance_extraction(float *cov_out, int NC_out,
float2 *cov_in, int NC_in, float kappa)
{
int i, j, k_out, k_in, h;
i = blockIdx.x * blockDim.x + threadIdx.x;
j = blockIdx.y * blockDim.y + threadIdx.y;
if ( (i<NC_out) && (j<(NC_out)) ) {
h = (NC_out-1)/2;
k_out = (NC_out - 1 - (i+h)%NC_out)*NC_out + (NC_out - 1 - (j+h)%NC_out);
if (i>=((NC_out+1)/2))
i = kappa*(i - NC_out) + NC_in;
else
i *= kappa;
if (j>=(NC_out+1)/2)
j = kappa*(j - NC_out) + NC_in;
else
j *= kappa;
k_in = i*NC_in + j;
cov_out[k_out] = cov_in[k_in].x;
}
}
@ The memory is freed with the following routine:
\index{aaStats!aaStats!cleanup}
<<aaStats cleanup>>=
void aaStats::cleanup(void)
{
INFO("@(CEO)>aaStats: freeing memory!\n");
<<cleanup contents>>
}
<<cleanup contents>>=
cufftDestroy(plan);
HANDLE_ERROR( cudaFree( d__psd ) );
HANDLE_ERROR( cudaFree( d__alpha ) );
HANDLE_ERROR( cudaFree( d__beta ) );
HANDLE_ERROR( cudaFree( d__cov ) );
#ifdef AASTATS_DEBUG
free(psd);
free(cov);
#endif
@
\subsection{Input/Output}
\label{sec:inputoutput}
The main parameters of [[aaStats]] are displayed with the [[info]] routine:
\index{aaStats!aaStats!info}
<<aaStats info>>=
void aaStats::info(int kappa, float d)
{
INFO("\n\x1B[1;42m@(CEO)>aaStats:\x1B[;42m\n");
<<info content>>
INFO("----------------------------------------------------\x1B[0m\n");
}
<<info content>>=
INFO(" . sampling [m] : %.2f\n",d);
INFO(" . size of covariance matrix block: %dX%d\n",NU,NU);
INFO(" . spectrum resolution : %d\n",NF);
INFO(" . covariance oversampling factor : %d\n",kappa);
INFO(" . spectrum highest frequency : %4.2E\n",(2.0/NF)*(kappa/(2*d)));
@ The covariance can be written to a file with:
\index{aaStats!aaStats!toFile}
<<save aa covariance to file>>=
void aaStats::toFile(char *filename)
{
dev2file(filename, d__cov, cov_size/sizeof(float));
}
@
\subsection{Miscellaneous}
\label{sec:miscellaneous}
From the power spectrum, once can derive the variance:
\index{aaStats!aaStats!variance}
<<aaStats variance>>=
float aaStats::variance(void) {
float res;
HANDLE_ERROR( cudaMemcpy( &res, d__cov + (N-1)*(NU+1),
sizeof(float), cudaMemcpyDeviceToHost) );
return res;
}
@
\section{Phase and Angle--of--arrival covariance matrix functions}
\label{sec:pa-functions}
\index{aaStats!paStats}
The covariance matrix is defined for a square lenslet array of size $N_L\times N_L$ and of pitch [[sampling]].
%The covariance sampling is set with the over--sampling factor [[osf]].
The covariance matrix depends on the atmosphere parameters and altitude profile.
The covariance matrix is computed for vectors of [[N_P_SRC]] estimation sources and [[N_S_SRC]] guide star sources.
The covariance matrix is written:
\begin{eqnarray}
\label{eq:27}
\Xi = \left[
\begin{array}{ccc}
\Xi_{11}&\cdots&\Xi_{1[[N__S_SRC]]} \\
\vdots &\ddots & \vdots \\
\Xi_{[[N_P_SRC]]1} & \vdots & \Xi_{[[N_P_SRC]][[N_S_SRC]]}
\end{array}
\right]
\end{eqnarray}
with
\begin{eqnarray}
\label{eq:28}
\Xi_{ij} = \left[
\begin{array}{cc}
\Xi_{{\phi_i}{x_j}} & \Xi_{{\phi_i}{y_j}}
\end{array}
\right].
\end{eqnarray}
Each matrix $\Xi_{{\phi_i}{x_j}}$ is a two--level recursive block Toeplitz (2RBT) $[[M]]^2\times[[N]]^2$ matrix.% with $[[N]]=N_L$ if $[[osf]]=1$ or $[[N]]=[[osf]] \times N_L+1$ if $[[osf]]\ge 1$.
Toeplitz matrices \cite{GolubVanLoan} are fully defined with their first row and column.
So the matrix $\Xi_{{phi_i}{x_j}}$ is reduced to a $[[NU]]=([[M]][[N]]-1)^2$ elements vector $t_\Xi^{{\phi_i}{x_j}}$ and the matrix $\Xi_{ij}$ becomes the matrix
\begin{equation}
\label{eq:29}
T_{ij} = \left[ t_\Xi^{{\phi_i}{x_j}} t_\Xi^{{\phi_i}{y_j}} \right].
\end{equation}
Finally the Toeplitz compressed version of the matrix $\Xi$ is written:
\begin{equation}
\label{eq:30}
T = \left[ t_\Xi^{{\phi_1}{x_1}} t_\Xi^{{\phi_1}{y_1}} \cdots t_\Xi^{{\phi_1}{x_[[N_S_SRC]]}} t_\Xi^{{\phi_1}{y_[[N_S_SRC]]}}
\cdots t_\Xi^{{\phi_[[N_P_SRC]]}{x_1}} t_\Xi^{{\phi_[[N_P_SRC]]}{y_1}} \cdots t_\Xi^{{\phi_[[N_P_SRC]]}{x_[[N_S_SRC]]}} t_\Xi^{{\phi_[[N_P_SRC]]}{y_[[N_S_SRC]]}} \right].
\end{equation}
@
\subsection{Setup \& Cleanup}
\label{sec:setup--cleanup-1}
\index{aaStats!paStats!setup}
<<paStats setup>>=
void paStats::setup(int M_, int N_, int osf_,
const atmosphere *atm, float lenslet_pitch,
source *phase_src_in, int N_P_SRC,
const source *slopes_src, int N_S_SRC)
{
N_SRC2 = N_P_SRC*N_S_SRC;
float alpha[2] = {1,0}, beta[2] = {0,1}, kappa;
osf = osf_;
N = N_;
M = M_;
shift = (N==M) ? 0 : 1;
N2 = M*N;
NU = M+N-1;
NU2 = NU*NU;
NF = 4096;
NF2 = NF*NF;
kappa = 4;
sampling = lenslet_pitch/osf;
source *phase_src;
if (phase_src_in[0].height==slopes_src[0].height) {
phase_src = phase_src_in;
M_LAYER = (int *)malloc(sizeof(int));
M_LAYER[0] = M;
psd_size = sizeof(float2)*NF2;
cov_size = sizeof(float)*NU2*2*N_SRC2;
ind_size = sizeof(int)*N2;
int BATCH = 1;
<<paStats setup allocation>>
stopwatch tid;
tid.tic();
<<paStats power spectrum with sources (same heights,rect.)>>
tid.toc(&cov_eval_et,"PA covariance evaluation");
} else {
<<paStats setup (multilayers)>>
}
info(kappa,sampling);
}
<<paStats setup>>=
void paStats::setup(int M_, int N_, int osf_,
const atmosphere *atm, float lenslet_pitch,
const source *slopes_src, int N_S_SRC,
float z_radius)
{
int N_P_SRC = 1;
N_SRC2 = N_P_SRC*N_S_SRC;
float alpha[2] = {1,0}, beta[2] = {0,1}, kappa;
osf = osf_;
N = N_;
M = M_;
shift = (N==M) ? 0 : 1;
N2 = M*N;
NU = M+N-1;
NU2 = NU*NU;
NF = 4096;
NF2 = NF*NF;
kappa = 4;
sampling = lenslet_pitch/osf;
M_LAYER = (int *)malloc(sizeof(int));
M_LAYER[0] = M;
psd_size = sizeof(float2)*NF2;
cov_size = sizeof(float)*NU2*2*N_SRC2;
ind_size = sizeof(int)*N2;
int BATCH = 1;
<<paStats setup allocation>>
stopwatch tid;
tid.tic();
<<paStats polar average power spectrum with sources (same heights,rect.)>>
tid.toc(&cov_eval_et,"PA covariance evaluation");
info(kappa,sampling);
}
@ The phase source is duplicated and set to the same height than the slope source:
<<phase source duplicate>>=
float *zenith, *azimuth;
zenith = (float*)malloc(sizeof(float)*N_P_SRC);
azimuth = (float*)malloc(sizeof(float)*N_P_SRC);
source __src;
for (int k_P_SRC=0;k_P_SRC<N_P_SRC;k_P_SRC++) {
HANDLE_ERROR( cudaMemcpy( &__src, phase_src_in->dev_ptr + k_P_SRC,
sizeof(source), cudaMemcpyDeviceToHost ) );
zenith[k_P_SRC] = __src.zenith;
azimuth[k_P_SRC] = __src.azimuth;
}
source clone_phase_src;
clone_phase_src.setup(phase_src_in[0].photometric_band,
zenith, azimuth,
slopes_src[0].height,N_P_SRC,0);
free(zenith);
free(azimuth);
@ The covariance is computed for each layers independently in the following:
<<paStats setup (multilayers)>>=
/*
source clone_phase_src;
clone_phase_src.setup(phase_src_in[0].photometric_band,
phase_src_in[0].zenith,
phase_src_in[0].azimuth,
slopes_src[0].height,0);
*/
<<phase source duplicate>>
phase_src = &clone_phase_src;
float g;
int o;
NU2 = 0;
M_LAYER = (int *)malloc(sizeof(int)*atm->N_LAYER);
for (int k_LAYER=0;k_LAYER<atm->N_LAYER;k_LAYER++) {
g = 1 - atm->turbulence.altitude[k_LAYER]/slopes_src[0].height;
o = (int) ceilf( osf*0.5*N*(1-g)/g );
M_LAYER[k_LAYER] = M + o*2;
NU = M_LAYER[k_LAYER]+N-1;
NU2 += NU*NU;
// printf("@(CEO)>paStats: layer #%d - NU=%d\n",k_LAYER,NU);
}
psd_size = sizeof(float2)*NF2;
cov_size = sizeof(float)*NU2*2*N_SRC2;
int BATCH = 1;
<<paStats setup allocation>>
stopwatch tid;
tid.tic();
<<paStats power spectrum with sources (same heights,rect.,multilayers)>>
tid.toc(&cov_eval_et,"PA covariance evaluation");
clone_phase_src.cleanup();
<<covariance sampling>>=
int covariance_sampling(float h, float z, int osf, int M, int N) {
float g;
int o, M_LAYER, NU;
g = 1 - h/z;
o = (int) ceilf( osf*0.5*N_SIDE_LENSLET*(1-g)/g );
M_LAYER = M + o*2;
NU = M_LAYER+N-1;
return NU*NU;
}
@ Memory is allocated with:
<<paStats setup allocation>>=
HANDLE_ERROR( cudaMalloc((void**)&d__psd, psd_size ) );
HANDLE_ERROR( cudaMalloc((void**)&d__alpha, 2*sizeof(float) ) );
HANDLE_ERROR( cudaMalloc((void**)&d__beta, 2*sizeof(float) ) );
HANDLE_ERROR( cudaMemcpy( d__alpha, alpha, 2*sizeof(float), cudaMemcpyHostToDevice) );
HANDLE_ERROR( cudaMemcpy( d__beta, beta, 2*sizeof(float), cudaMemcpyHostToDevice) );
HANDLE_ERROR( cudaMalloc((void**)&d__cov, cov_size ) );
INFO("\n@(CEO)>paStats: Creating 2D covariance FFT plan\n");
int n_DFT[2] = {NF, NF};
int iodist = NF2;
/* Create a 2D FFT plan. */
HANDLE_ERROR_CUFFT( cufftPlanMany(&plan, 2, n_DFT,
NULL, 1, iodist,
NULL, 1, iodist,
CUFFT_C2C,BATCH),
"Unable to create plan" );
/* HANDLE_ERROR_CUFFT( cufftSetCompatibilityMode(plan, CUFFT_COMPATIBILITY_NATIVE), */
/* "Unable to set compatibility mode to native" ); */
@
The covariance is derived from Eq.~(\ref{eq:5}) for 2 sources in the directions $\vec\theta_1$ and $\vec\theta_2$ propagated through a [[N_LAYER]] atmospheric profile.
It is given by
\begin{equation}
\label{eq:31}
C_{\varphi \vec \alpha}^{12}(\vec\rho) = \sum_{l=1}^{[[N]]} C_{\varphi \vec \alpha}^l(\vec\rho_l)
\end{equation}
where
\begin{equation}
\label{eq:32}
\vec \rho_l = \gamma_l\vec\rho + h_l(\vec\theta_2-\vec\theta_1)
\end{equation}
with $\gamma_l=1-h_l/z_\ast$, $h_l$ is the atmosphere layer altitude and $z_\ast$ is the sources height.
Invoking the Wiener--Khinchine theorem, the translation and scaling properties of the Fourier transform and using Eq.~(\ref{eq:2}), the covariance for a single atmospheric layer at altitude $h_l$ is written:
\begin{equation}
\label{eq:33}
C_{\varphi \vec \alpha}^{12,l}(\vec\rho_l) = \mathcal F^{-1} \left[ -\iota\lambda\left( \vec f \cdot \vec\alpha \right) W_\varphi(\vec f) G(\gamma_l \vec f) \exp\left( 2\iota\pi h_l\vec f \cdot (\vec\theta_2-\vec\theta_1) \right) \right] (\gamma_l\vec\rho)
\end{equation}
Setting
\begin{equation}
\label{eq:34}
W_{\varphi \vec \alpha}^{l,12}(\vec f) = -\iota\lambda\left( \vec f \cdot \vec\alpha \right) W_\varphi(\vec f) G(\gamma_l \vec f) \exp\left( 2\iota\pi h_l\vec f \cdot (\vec\theta_2-\vec\theta_1) \right),
\end{equation}
Eq.~(\ref{eq:31}) is now given by
\begin{equation}
\label{eq:35}
C_{\varphi \vec \alpha}^{12}(\vec\rho) = \sum_{l=1}^{[[N]]} \mathcal F^{-1} \left[ W_{\varphi \vec \alpha}^{l,12}(\vec f) \right] (\gamma_l\vec\rho).
\end{equation}
Calling $\delta=d/\kappa$ the sampling step of the vector $\vec\rho$ with the integer $\kappa\geq 1$, $\vec\rho_l$ is given by
\begin{equation}
\label{eq:36}
\vec \rho_l = \gamma_l\delta(k\vec x + l\vec y),\quad\forall l,k\in \left[0,\cdots,[[NF]]-1\right],
\end{equation}
$\vec x$ and $\vec y$ are the unit vectors of the X and Y axis, respectively.
Eq.~(\ref{eq:35}) becomes
\begin{equation}
\label{eq:37}
C_{\varphi \vec \alpha}^{12}(k,l) = \sum_{l=1}^{[[N]]} \iint {\mathrm d^2}\vec f W_{\varphi \vec \alpha}^{l,12}(\vec f) \exp\left( 2\iota\pi \gamma_l \delta (k f_x + l f_y) \right)
\end{equation}
Discretizing Eq.~(\ref{eq:37}) with
\begin{equation}
\label{eq:38}
\gamma_l\delta f_x = n/[[NF]],\quad \gamma_l\delta f_y = m/[[NF]],\quad \forall n,m\in \left[0,\cdots,[[NF]]-1\right],
\end{equation}
Eq.~(\ref{eq:37}) is transformed into
\begin{eqnarray}
\label{eq:39}
C_{\varphi \vec \alpha}^{12}(k,l) &=& \sum_{l=1}^{[[N]]} \sum_{n=0}^{[[NF]]-1} \sum_{m=0}^{[[NF]]-1} \left( 1\over\gamma_l\delta [[NF]]\right)^2 W_{\varphi \vec \alpha}^{l,12}\left({n\over\gamma_l\delta [[NF]]},{m\over\gamma_l\delta [[NF]]}\right) \exp\left( 2\iota\pi {(k n + l m)\over [[NF]]} \right) \\
&=& \sum_{n=0}^{[[NF]]-1} \sum_{m=0}^{[[NF]]-1} W_{\varphi \vec \alpha}^{12}(n,m) \exp\left( 2\iota\pi {(k n + l m)\over [[NF]]} \right)
\end{eqnarray}
with
\begin{equation}
\label{eq:40}
W_{\varphi \vec \alpha}^{12}(n,m) = \sum_{l=1}^{[[N]]} \left( 1\over\gamma_l\delta [[NF]]\right)^2 W_{\varphi \vec \alpha}^{l,12}\left({n\over\gamma_l\delta [[NF]]},{m\over\gamma_l\delta [[NF]]}\right)
\end{equation}
%% Rectangular 2RBT
<<paStats power spectrum with sources (same heights,rect.)>>=
dim3 blockDimPsd(16,16);
dim3 gridDimPsd( 1+NF/16 , 1+NF/16 );
dim3 blockDimCov(16,16);
dim3 gridDimCov( 1+NU/16 , 1+NU/16 );
int i_P_SRC, j_S_SRC, offset0, offset;
INFO("Covariance %dx%d block:\n",N_P_SRC,N_S_SRC);
for (i_P_SRC=0;i_P_SRC<N_P_SRC;i_P_SRC++) {
offset0 = 2*N_S_SRC*i_P_SRC;
for (j_S_SRC=0;j_S_SRC<N_S_SRC;j_S_SRC++) {
INFO(" [%d,%d]",i_P_SRC,j_S_SRC);
offset = offset0 + 2*j_S_SRC;
//INFO("(%2d",offset);
paPowerSpectrum LLL gridDimPsd,blockDimPsd RRR (d__psd, NF, osf, lenslet_pitch,
atm->wavelength, atm->r0,
atm->turbulence.L0, atm->d__layers, atm->N_LAYER,
(phase_src->dev_ptr) + i_P_SRC,
(slopes_src->dev_ptr) + j_S_SRC,
d__alpha, kappa, shift);
<<AA covariance with Wiener-Khinchin>>
<<PA covariance reduction>>
offset = offset0 + (2*j_S_SRC+1);
//INFO(",%2d)",offset);
paPowerSpectrum LLL gridDimPsd,blockDimPsd RRR (d__psd, NF, osf, lenslet_pitch,
atm->wavelength, atm->r0,
atm->turbulence.L0, atm->d__layers, atm->N_LAYER,
(phase_src->dev_ptr) + i_P_SRC,
(slopes_src->dev_ptr) + j_S_SRC,
d__beta, kappa, shift);
<<AA covariance with Wiener-Khinchin>>
<<PA covariance reduction>>
}
INFO("\n");
}
@
The power spectrum of the average of the covariance over a continous sample of sources within a circle of angular radius $\zeta$ is given by:
\begin{equation}
\label{eq:45}
W_{\varphi \vec \alpha}^{l,\zeta 2}(\vec f) = {1\over \pi\zeta^2} \int_0^\zeta {\mathrm d}z z \int_0^{2\pi}{\mathrm d} \theta -\iota\lambda\left( \vec f \cdot \vec\alpha \right) W_\varphi(\vec f) G(\gamma_l \vec f) \exp\left( 2\iota\pi h_l\vec f \cdot (\vec\theta_2-\vec\theta) \right),
\end{equation}
Performing the integration leads to:
\begin{equation}
\label{eq:45}
W_{\varphi \vec \alpha}^{l,\zeta 2}(\vec f) = -\iota\lambda\left( \vec f \cdot \vec\alpha \right) W_\varphi(\vec f) G(\gamma_l \vec f) {2J_1(2\pi h_lf\zeta)\over 2\pi h_lf\zeta} \exp\left( 2\iota\pi h_l\vec f \cdot \vec\theta_2 \right),
\end{equation}
<<paStats polar average power spectrum with sources (same heights,rect.)>>=
dim3 blockDimPsd(16,16);
dim3 gridDimPsd( 1+NF/16 , 1+NF/16 );
dim3 blockDimCov(16,16);
dim3 gridDimCov( 1+NU/16 , 1+NU/16 );
int i_P_SRC, j_S_SRC, offset0, offset;
INFO("Polar average covariance %dx%d block:\n",N_P_SRC,N_S_SRC);
for (i_P_SRC=0;i_P_SRC<N_P_SRC;i_P_SRC++) {
offset0 = 2*N_S_SRC*i_P_SRC;
for (j_S_SRC=0;j_S_SRC<N_S_SRC;j_S_SRC++) {
INFO(" [%d,%d]",i_P_SRC,j_S_SRC);
offset = offset0 + 2*j_S_SRC;
//INFO("(%2d",offset);
paPolarPowerSpectrum LLL gridDimPsd,blockDimPsd RRR (d__psd, NF, osf, lenslet_pitch,
atm->wavelength, atm->r0,
atm->turbulence.L0, atm->d__layers, atm->N_LAYER,
(slopes_src->dev_ptr) + j_S_SRC,
d__alpha, kappa, shift, z_radius);
<<AA covariance with Wiener-Khinchin>>
<<PA covariance reduction>>
offset = offset0 + (2*j_S_SRC+1);
//INFO(",%2d)",offset);
paPolarPowerSpectrum LLL gridDimPsd,blockDimPsd RRR (d__psd, NF, osf, lenslet_pitch,
atm->wavelength, atm->r0,
atm->turbulence.L0, atm->d__layers, atm->N_LAYER,
(slopes_src->dev_ptr) + j_S_SRC,
d__beta, kappa, shift, z_radius);
<<AA covariance with Wiener-Khinchin>>
<<PA covariance reduction>>
}
INFO("\n");
}
@ The power spectrum is computed with the following kernels:
<<paStats power spectrum kernel>>=
__global__ void paPowerSpectrum(float2 *d__psd, int NF, int osf, float d,
float wavelength, float r0, float L0,
layer *turb, int N_LAYER,
source *src1, source *src2,
float *alpha, float kappa, int shift)
{
int i, j, k;
float fx, fy, f_square, fs, gl, red, c_red, s_red, wavenumber, src_x, src_y;
i = blockIdx.x * blockDim.x + threadIdx.x;
j = blockIdx.y * blockDim.y + threadIdx.y;
k = i*NF + j;
if ( (i<NF) && (j<(NF)) ) {
wavenumber = wavelength*0.5/PI;
k = i*NF + j;
d__psd[k].x = 0;
d__psd[k].y = 0;
for (int k_LAYER=0;k_LAYER<N_LAYER;k_LAYER++) {
gl = (1-turb[k_LAYER].altitude/src1->height);
red = 2*PI;
src_x = src2->theta_x - src1->theta_x;
src_x *= turb[k_LAYER].altitude;
src_y = src2->theta_y - src1->theta_y;
src_y *= turb[k_LAYER].altitude;
fs = osf*(2.0/NF)*(kappa/(2*d*gl));
fx = (float) i;
fy = (float) j;
if (i>=(NF/2))
fx = fx - NF;
if (j>=(NF/2))
fy = fy - NF;
fx *= fs;
fy *= fs;
f_square = fx*fx + fy*fy;
fs *= fs;
red *= fx*(src_x + 0.5*d*shift) + fy*(src_y + 0.5*d*shift);
sincosf( red, &s_red, &c_red);
red = -wavenumber*turb[k_LAYER].xi0*fs*wavelength*(fx*alpha[0] + fy*alpha[1])*
powf(r0,-5.0/3.0)*
phasePowerSpectrum(f_square, 1.0/L0)*
gateAmp(gl*fx*d,gl*fy*d);
d__psd[k].x += -red*s_red;
d__psd[k].y += red*c_red;
}
}
}
@
The average of the power spectrum on an angular disc of radius $[z_radius]$ is
<<paStats polar average power spectrum kernel>>=
__global__ void paPolarPowerSpectrum(float2 *d__psd, int NF, int osf, float d,
float wavelength, float r0, float L0,
layer *turb, int N_LAYER,
source *src2,
float *alpha, float kappa, int shift,
float z_radius)
{
int i, j, k;
float fx, fy, f_square, f, fs, gl, red, c_red, s_red, wavenumber, src_x, src_y, b;
i = blockIdx.x * blockDim.x + threadIdx.x;
j = blockIdx.y * blockDim.y + threadIdx.y;
k = i*NF + j;
if ( (i<NF) && (j<(NF)) ) {
wavenumber = wavelength*0.5/PI;
k = i*NF + j;
d__psd[k].x = 0;
d__psd[k].y = 0;
for (int k_LAYER=0;k_LAYER<N_LAYER;k_LAYER++) {
gl = 1.0;
red = 2*PI;
src_x = src2->theta_x;
src_x *= turb[k_LAYER].altitude;
src_y = src2->theta_y;
src_y *= turb[k_LAYER].altitude;
fs = osf*(2.0/NF)*(kappa/(2*d*gl));
fx = (float) i;
fy = (float) j;
if (i>=(NF/2))
fx = fx - NF;
if (j>=(NF/2))
fy = fy - NF;
fx *= fs;
fy *= fs;
f = hypot(fx,fy);
f_square = f*f;
fs *= fs;
b = 2*turb[k_LAYER].altitude*f*z_radius;
red *= fx*(src_x + 0.5*d*shift) + fy*(src_y + 0.5*d*shift);
sincosf( red, &s_red, &c_red);
red = -wavenumber*turb[k_LAYER].xi0*fs*wavelength*(fx*alpha[0] + fy*alpha[1])*
powf(r0,-5.0/3.0)*
phasePowerSpectrum(f_square, 1.0/L0)*
gateAmp(gl*fx*d,gl*fy*d)*
somb(b);
d__psd[k].x += -red*s_red;
d__psd[k].y += red*c_red;
}
}
}
@
<<PA covariance reduction>>=
pa_covariance_extraction LLL gridDimCov,blockDimCov RRR( d__cov + offset*NU2, NU,
d__psd, NF, kappa);
@ The extraction of the covariance is done with the kernel:
<<PA covariance extraction>>=
__global__ void pa_covariance_extraction(float *cov_out, int NC_out,
float2 *cov_in, int NC_in, float kappa)
{
int i, j, k_out, k_in, h;
i = blockIdx.x * blockDim.x + threadIdx.x;
j = blockIdx.y * blockDim.y + threadIdx.y;
if ( (i<NC_out) && (j<(NC_out)) ) {
h = (NC_out-1)/2;
k_out = (NC_out - 1 - (i+h)%NC_out)*NC_out + (NC_out - 1 - (j+h)%NC_out);
// k_out = ((i+h)%NC_out)*NC_out + ((j+h)%NC_out);
if (i>(NC_out/2))
i = kappa*(i - NC_out) + NC_in;
else
i *= kappa;
if (j>(NC_out/2))
j = kappa*(j - NC_out) + NC_in;
else
j *= kappa;
k_in = i*NC_in + j;
cov_out[k_out] = cov_in[k_in].x;
}
}
@
%% Rectangular 2RBT
<<paStats power spectrum with sources (same heights,rect.,multilayers)>>=
dim3 blockDimPsd(16,16);
dim3 gridDimPsd( 1+NF/16 , 1+NF/16 );
dim3 blockDimCov(16,16);
dim3 gridDimCov( 1+NU/16 , 1+NU/16 );
int i_P_SRC, j_S_SRC, k_LAYER;
INFO("Covariance %dx%d block:\n",N_P_SRC*atm->N_LAYER,N_S_SRC);
int CNU2 = 0;
for (i_P_SRC=0;i_P_SRC<N_P_SRC;i_P_SRC++) {
for (k_LAYER=0;k_LAYER<atm->N_LAYER;k_LAYER++) {
NU = M_LAYER[k_LAYER]+N-1;
for (j_S_SRC=0;j_S_SRC<N_S_SRC;j_S_SRC++) {
INFO(" [%d,%d]",i_P_SRC+k_LAYER*N_P_SRC,j_S_SRC);
//INFO("(%2d",CNU2);
paPowerSpectrum LLL gridDimPsd,blockDimPsd RRR (
d__psd, NF, osf, lenslet_pitch,
atm->wavelength, atm->r0, atm->turbulence.L0,
atm->d__layers, atm->N_LAYER, k_LAYER,
(phase_src->dev_ptr) + i_P_SRC,
(slopes_src->dev_ptr) + j_S_SRC,
d__alpha, kappa,shift);
<<AA covariance with Wiener-Khinchin>>
pa_covariance_extraction LLL gridDimCov,blockDimCov RRR( d__cov + CNU2, NU,
d__psd, NF, kappa);
CNU2 += NU*NU;
//INFO(",%2d)",CNU2);
paPowerSpectrum LLL gridDimPsd,blockDimPsd RRR (
d__psd, NF, osf, lenslet_pitch,
atm->wavelength, atm->r0, atm->turbulence.L0,
atm->d__layers, atm->N_LAYER, k_LAYER,
(phase_src->dev_ptr) + i_P_SRC,
(slopes_src->dev_ptr) + j_S_SRC,
d__beta, kappa,shift);
<<AA covariance with Wiener-Khinchin>>
pa_covariance_extraction LLL gridDimCov,blockDimCov RRR( d__cov + CNU2, NU,
d__psd, NF, kappa);
CNU2 += NU*NU;
}
INFO("\n");
}
}
<<paStats power spectrum kernel (single layer)>>=
__global__ void paPowerSpectrum(float2 *d__psd, int NF, int osf, float d,
float wavelength, float r0, float L0,
layer *turb, int N_LAYER, int k_LAYER,
source *src1, source *src2,
float *alpha, float kappa, int shift)
{
int i, j, k;
float fx, fy, f_square, fs, gl, red, c_red, s_red, wavenumber, src_x, src_y;
i = blockIdx.x * blockDim.x + threadIdx.x;
j = blockIdx.y * blockDim.y + threadIdx.y;
k = i*NF + j;
if ( (i<NF) && (j<(NF)) ) {
wavenumber = wavelength*0.5/PI;
k = i*NF + j;
d__psd[k].x = 0;
d__psd[k].y = 0;
gl = (1-turb[k_LAYER].altitude/src1->height);
red = 2*PI;
src_x = src2->theta_x - src1->theta_x;
src_x *= turb[k_LAYER].altitude;
src_y = src2->theta_y - src1->theta_y;
src_y *= turb[k_LAYER].altitude;
fs = osf*(2.0/NF)*(kappa/(2*d*gl));
fx = (float) i;
fy = (float) j;
if (i>=(NF/2))
fx = fx - NF;
if (j>=(NF/2))
fy = fy - NF;
fx *= fs;
fy *= fs;
f_square = fx*fx + fy*fy;
fs *= fs;
red *= fx*(src_x + 0.5*d*shift) + fy*(src_y + 0.5*d*shift);
sincosf( red, &s_red, &c_red);
red = -wavenumber*turb[k_LAYER].xi0*fs*wavelength*(fx*alpha[0] + fy*alpha[1])*
powf(r0,-5.0/3.0)*
phasePowerSpectrum(f_square, 1.0/L0)*
gateAmp(gl*fx*d,gl*fy*d);
d__psd[k].x += -red*s_red;
d__psd[k].y += red*c_red;
}
}
@
The memory is freed with the following routine:
\index{aaStats!paStats!cleanup}
<<paStats cleanup>>=
void paStats::cleanup(void)
{
INFO("@(CEO)>paStats: freeing memory!\n");
free(M_LAYER);
<<cleanup contents>>
}
@
\subsection{Input/Output}
\label{sec:inputoutput-1}
The main parameters of [[paStats]] are displayed with the info routine:
\index{aaStats!paStats!info}
<<paStats info>>=
void paStats::info(int kappa, float d)
{
INFO("\n\x1B[1;42m@(CEO)>paStats:\x1B[;42m\n");
<<info content>>
INFO(" . phase oversampling factor : %d\n",osf);
INFO("----------------------------------------------------\x1B[0m\n");
}
@
The [[paStats]] covariance can be written to a file with:
\index{aaStats!paStats!toFile}
<<save pa covariance to file>>=
void paStats::toFile(char *filename)
{
dev2file(filename, d__cov, cov_size/sizeof(float));
}
@
\subsection{Miscellaneous}
\label{sec:miscellaneous-1}
The phase power spectrum is given by:
<<phase power spectrum>>=
__device__ float phasePowerSpectrum(float f_square, float f0) {
return 0.022895587108555*powf(f_square + f0*f0,-11.0/6.0);
}
@
The subaperture Fourier transform is given by
\begin{equation}
\label{eq:41}
{\mathrm sinc}(f_x){\mathrm sinc}(f_y)
\end{equation}
and written in
<<subaperture FT>>=
__device__ float gateAmp(float fx, float fy) {
float out;
out = sinc(fx)*sinc(fy);
return out;
}
@
The subaperture PSF is given by
\begin{equation}
\label{eq:42}
{\mathrm sinc}^2(f_x){\mathrm sinc}^2(f_y)
\end{equation}
and written in
<<subaperture PSF>>=
__device__ float gatePSF(float fx, float fy) {
float out;
out = sinc(fx)*sinc(fy);
return out*out;
}
@ that depends on the $\mathrm sinc$ function:
\begin{equation}
\label{eq:43}
{\sin(\pi x)\over \pi x}
\end{equation}
<<sinc>>=
__device__ float sinc(float x) {
return (x==0) ? 1.0 : sinf( PI*x) / (PI*x) ;
}
@
The Sombrero function is defined as:
\begin{equation}
\label{eq:44}
{\mathrm somb}(x) = {2J_1(x)\over x}
\end{equation}
<<somb>>=
__device__ float somb(float x) {
return (x==0) ? 1.0 : 2 * j1(PI*x) / (PI*x);
}
@
\section{Tests}
\label{sec:tests}
The test routine is written:
<<aaStats.bin>>=
#include <iostream>
#include <fstream>
#include <iomanip>
using namespace std;
#include "cublas_v2.h"
#include "h"
#include "aaStats.h"
#include "BTBT.h"
using namespace std;
#define N_RUN 5
#define N_SAMPLE 100
__global__ void fill(float *A, int m_a, int n_a) {
int i, j, k;
float n;
i = blockIdx.x * blockDim.x + threadIdx.x;
j = blockIdx.y * blockDim.y + threadIdx.y;
k = i*m_a +j;
n = (float) m_a*n_a;
if (k<n)
A[k] = ((float)k)/n;
}
int main( void) {
atmosphere atm;
source src;
aaStats aa;
paStats pa;
BTBT C, S;
int k, data_size, M, N, NX, NY, NA, MA, k_N, k_SAMPLE, osf;
float altitude[] = {0},
xi0[] = {1},
wind_speed[] = {10},
wind_direction[] = {0};
float *x, *y, *d__x, *d__y, *d__A;
stopwatch tid;
cublasHandle_t handle;
cublasCreate(&handle);
cublasStatus_t status;
atm.setup(0.15,30,altitude,xi0,wind_speed,wind_direction);
src.setup(0,0,INFINITY);
int N_[N_RUN] = {20,40,64,84,150};
float aa_full_mvm_et[N_RUN], pa_full_mvm_et[N_RUN], aa_comp_mvm_et[N_RUN], pa_comp_mvm_et[N_RUN],
aa_comp_eval_et[N_RUN], pa_comp_eval_et[N_RUN];
dim3 blockDim(16,16);
dim3 gridDim;
float alpha, beta;
alpha = 1;
beta = 0;
osf = 1;
std::ofstream aa_fid, pa_fid;
aa_fid.open ("aa_runtime.txt");
aa_fid.setf(ios::fixed, ios::floatfield);
aa_fid.precision(3);
aa_fid << "\nAASTATS:\n";
aa_fid << " N FULL MVM COMP MVM COMP EVAL\n";
pa_fid.open ("pa_runtime.txt");
pa_fid.setf(ios::fixed, ios::floatfield);
pa_fid.precision(3);
pa_fid << "\nPASTATS:\n";
pa_fid << " N FULL MVM COMP MVM COMP EVAL\n";
for (k_SAMPLE=0;k_SAMPLE<N_SAMPLE;k_SAMPLE++) {
//AASTATS TESTS
for (k_N=0;k_N<N_RUN;k_N++) {
printf("\n_______ AASTATS TESTS: SAMPLE #%d - RUN #%d _______\n",k_SAMPLE,k_N);
N = N_[k_N];
NX = N*N*2;
data_size = sizeof(float)*NX;
x = (float *)malloc( data_size );
y = (float *)malloc( data_size );
HANDLE_ERROR( cudaMalloc((void**)&d__x, data_size ) );
HANDLE_ERROR( cudaMalloc((void**)&d__y, data_size ) );
for (k=0;k<NX;k++) {
x[k] = (float)k;
}
HANDLE_ERROR( cudaMemcpy( d__x, x, data_size, cudaMemcpyHostToDevice) );
aa.setup(N,&atm,0.1,&src,1);
C.setup(2,2,N,aa.d__cov);
aa_comp_eval_et[k_N] = aa.cov_eval_et;
//printf("\n AA VARIANCE: %.4E rd^2\n",aa.variance(0.1, &atm, 4, 2048));
tid.tic();
C.MVM(d__y,d__x);
tid.toc(aa_comp_mvm_et + k_N,"Covariance MVM");
HANDLE_ERROR( cudaMemcpy( y, d__y, data_size, cudaMemcpyDeviceToHost) );
/* printf("\nMVM:\n"); */
/* for (k=0;k<NX;k++) { */
/* printf("(%2d) %4.2E\n",k,y[k]); */
/* } */
FILE *aa_fid;
char filename[100];
sprintf(filename,"aa_mvm%03d.bin",N);
aa_fid = fopen(filename,"wb");
fwrite(y,sizeof(float),NX,aa_fid);
fclose(aa_fid);
if (N<=100) {
M = 2*N*N;
data_size = sizeof(float)*M*M;
HANDLE_ERROR( cudaMalloc((void**)&d__A, data_size ) );
gridDim = dim3(1+M/16,1+M/16);
tid.tic();
fill LLL gridDim,blockDim RRR (d__A, M, M);
tid.toc("Filling of A");
tid.tic();
status = cublasSgemv(handle, CUBLAS_OP_N, M, M, &alpha, d__A, M, d__x, 1, &beta, d__y, 1);
tid.toc(aa_full_mvm_et + k_N,"A MVM");
if (status!=CUBLAS_STATUS_SUCCESS)
printf(">>> ERROR! CUBLAS FAILED! <<<\n");
HANDLE_ERROR( cudaFree( d__A ) );
} else aa_full_mvm_et[k_N]=0;
aa.cleanup();
C.cleanup();
HANDLE_ERROR( cudaFree( d__x ) );
HANDLE_ERROR( cudaFree( d__y ) );
free(x);
free(y);
}
for (k_N=0;k_N<N_RUN;k_N++) {
aa_fid << setw(3) << N_[k_N] << " " << setw(6) << aa_full_mvm_et[k_N] << " " << aa_comp_mvm_et[k_N] << " " << aa_comp_eval_et[k_N] << "\n";
}
//PASTATS TESTS
for (k_N=0;k_N<N_RUN;k_N++) {
printf("\n_______ PASTATS TESTS: SAMPLE #%d - RUN #%d _______\n",k_SAMPLE,k_N);
// N = 2*N_[k_N]+1;
NX = N_[k_N];
NX *= NX*2;
NY = N_[k_N] + 1;
NY *= NY;
data_size = sizeof(float);
x = (float *)malloc( data_size*NX );
y = (float *)malloc( data_size*NY );
HANDLE_ERROR( cudaMalloc((void**)&d__x, data_size*NX ) );
HANDLE_ERROR( cudaMalloc((void**)&d__y, data_size*NY ) );
for (k=0;k<NX;k++) {
x[k] = (float)k;
}
HANDLE_ERROR( cudaMemcpy( d__x, x, data_size*NX, cudaMemcpyHostToDevice) );
pa.setup(N_[k_N]+1,N_[k_N],osf,&atm,0.1,&src,1,&src,1);
S.setup(1,2,N_[k_N]+1,N_[k_N],pa.d__cov);
pa_comp_eval_et[k_N] = pa.cov_eval_et;
tid.tic();
S.MVM(d__y,d__x);
tid.toc(pa_comp_mvm_et + k_N,"Covariance MVM");
HANDLE_ERROR( cudaMemcpy( y, d__y, data_size*NY, cudaMemcpyDeviceToHost) );
/* printf("\nMVM:\n"); */
/* for (k=0;k<NY;k++) { */
/* printf("(%3d) %+6.4E\n",k,y[k]); */
/* } */
FILE *aa_fid;
char filename[100];
sprintf(filename,"pa_mvm%03d.bin",N);
aa_fid = fopen(filename,"wb");
fwrite(y,sizeof(float),NY,aa_fid);
fclose(aa_fid);
if (N_[k_N]<=100) {
MA = NY;
NA = NX;
data_size = sizeof(float)*MA*NA;
HANDLE_ERROR( cudaMalloc((void**)&d__A, data_size ) );
gridDim = dim3(1+MA/16,1+NA/16);
tid.tic();
fill LLL gridDim,blockDim RRR (d__A, MA , NA);
tid.toc("Filling of A");
tid.tic();
status = cublasSgemv(handle, CUBLAS_OP_N, MA, NA, &alpha, d__A, MA, d__x, 1, &beta, d__y, 1);
tid.toc(pa_full_mvm_et + k_N,"A MVM");
if (status!=CUBLAS_STATUS_SUCCESS)
printf(">>> ERROR! CUBLAS FAILED! <<<\n");
HANDLE_ERROR( cudaFree( d__A ) );
} else pa_full_mvm_et[k_N]=0;
pa.cleanup();
S.cleanup();
HANDLE_ERROR( cudaFree( d__x ) );
HANDLE_ERROR( cudaFree( d__y ) );
free(x);
free(y);
}
for (k_N=0;k_N<N_RUN;k_N++) {
pa_fid << setw(3) << N_[k_N] << " " << setw(6) << pa_full_mvm_et[k_N] << " " << pa_comp_mvm_et[k_N] << " " << pa_comp_eval_et[k_N] << "\n";
}
} // SAMPLE LOOP
atm.cleanup();
cublasDestroy(handle);
aa_fid.close();
pa_fid.close();
}
@ The following is the Matlab script to test the [[MVM_kernel]]:
<<MVM kernel test>>=
kern = parallel.gpu.CUDAKernel('aaStats.ptx','aaStats.cu','MVM_kernel');
kern.ThreadBlockSize = [16,16];
kern.GridSize = [2,2];
d1 = 2/3;
d2 = 2/3;
NU = 7;
N1 = 4;
N2 = 4;
g1 = 1;
g2 = 1;
[x,y] = meshgrid( linspace(-1,1,4) );
z = x(:) + 1i*y(:);
c = abs( bsxfun( @minus, z , z.') );
idx = [13 9 5 1 2 3 4];
cs = mat2cell( c , ones(1,4)*4,ones(1,4)*4);
cov = cell2mat(cellfun( @(x) x(idx)', cs(idx) , 'UniformOutput', false));
vout = gpuArray.zeros(16^2,1,'single');
vin = gpuArray.ones(16^2,1,'single');
sampling = 2/3;