Re: [LAD] vectorization

From: Jens M Andreasen <jens.andreasen@email-addr-hidden>
Date: Mon May 05 2008 - 20:18:39 EEST

Jussi!

Could you try this out with your proposed compiler options on your own
hardware?
Admittedly, the recycled PIII here is very unrepresentative, outdated
and old-skool (although it seems to shine when paired up with icc :)

--8<-----------------------------

// include everything just in case we need it ...

#include <unistd.h>
#include <stdio.h>
#include <sched.h>
#include <time.h>
#include <stdlib.h>

#define N 1024

#include <complex.h>

float // complex
   ffta[N][2] __attribute__ ((aligned(16))),
   fftb[N][2] __attribute__ ((aligned(16))),
   data[N][2] __attribute__ ((aligned(16)));

_Complex float
   cxA[N] __attribute__ ((aligned(16))),
   cxB[N] __attribute__ ((aligned(16))),
   cxD[N] __attribute__ ((aligned(16))) ;

typedef struct
{
   float r[N] __attribute__ ((aligned(16)));
   float i[N] __attribute__ ((aligned(16)));
} cvec_t;

cvec_t cA,cB,cD;

int main()
{
   int n = 1000000;
   int i,j;
   char* s;

   clock_t clk = clock();
   s = "(_Complex)";

   for (j = 0; j < n; ++j)
      for (i = 0;i < N; ++i)
         cxD[i]+= cxA[i]*cxB[i];
   
   fprintf (stderr,"> clock: %d ms %s\n",(clock()-clk)/1000,s);

   s = "(cvec_t)";
   clk = clock();

   for (j = 0; j < n; ++j)
      for (i = 0;i < N; ++i)
      {
         cD.r[i] += cA.r[i] * cB.r[i] - cA.i[i] * cB.i[i];
         cD.i[i] += cA.r[i] * cB.i[i] + cA.i[i] * cB.r[i];
      }

   fprintf (stderr,"> clock: %d ms %s\n",(clock()-clk)/1000,s);

   s = "(original float array[N][2])";
   clk = clock();
   for (j = 0; j < n ; ++j)
      for (i = 0; i <N; ++i)
      {
         data [i][0] += ffta [i][0] * fftb [i][0] - ffta [i][1] * fftb [i][1];
         data [i][1] += ffta [i][0] * fftb [i][1] + ffta [i][1] * fftb [i][0];
      }
   fprintf (stderr,"> clock: %d ms %s\n",(clock()-clk)/1000,s);

   return 0;
}

On Mon, 2008-05-05 at 19:15 +0300, Jussi Laako wrote:
> Jussi Laako wrote:
> > I would propose something like "-march=prescott -O3 -ftree-vectorize" or
> > "-O3 -sse3 -ftree-vectorize".
>
> Sorry, typo, "-O3 -msse3 -ftree-vectorize" of course...
>
>
> - Jussi

-- 
_______________________________________________
Linux-audio-dev mailing list
Linux-audio-dev@email-addr-hidden
http://lists.linuxaudio.org/mailman/listinfo/linux-audio-dev
Received on Tue May 6 00:15:02 2008

This archive was generated by hypermail 2.1.8 : Tue May 06 2008 - 00:15:02 EEST