
// memspeed test

#include <stdlib.h>
#include <string.h>

#include "types.h"

#define MB (1024*1024)
#define KB (1024)

#define MIN_TIME 10.0 // minimal test time in seconds

extern double dtime(void);
extern void err(char* s);

_Inline void touchf(long* p1, long n);
_Inline void touchb(long* p1, long n);
_Inline long readmem(long* p1, long n);
_Inline void writemem(long* p1, long n);
_Inline int memcpy2(long* p1, long* p2, long n);
volatile long bluttanbla;

double 
pmb_memspeed(int bytes) {
  char* p;
  char* p1;
  char* p2;
  s32 i,j, runs;
  double t2, t1;
  s32 mem2;

  mem2 = bytes/2;
  p = (char*)malloc(bytes);
  if (!p) {
    err("memspeed test: can't allocate enough memory to do test");
  }

  /* First, touch all the memory to activate it */
  touchf((long*)p, bytes);
  touchb((long*)(p+bytes), bytes);
  touchf((long*)p, bytes);
  touchb((long*)(p+bytes), bytes);

  runs = 1;
  p1 = p;
  p2 = p+mem2;
  while (1) { 
    t1 = dtime();
    for (i = 0; i < runs; i++) {
      memcpy(p2, p1, mem2); // copy forwards
      memcpy(p1, p2, mem2); // copy backwards
    }
    t2 = dtime();
    if ((t2-t1) < MIN_TIME) {
      if ((t2-t1) < 0.1) {
        runs *= MIN_TIME*10;
      } else {
        runs = (MIN_TIME*1.2)/(t2-t1)*runs;
      }
    } else {
      bluttanbla = p[(rand() * rand()) % bytes]; // fool optimizer
      free(p);
      return runs*bytes/(t2-t1);
    }
  }
  // we won't get here
  err("memspeed: internal error 1");
  return -1.0;
}

double 
pmb_memspeedr(int bytes) {

  char* p;
  s32 i,j, runs;
  double t2, t1;

  p = (char*)malloc(bytes);
  if (!p) {
    err("memspeed test: can't allocate enough memory to do test");
  }

  /* First, touch all the memory to activate it */
  touchf((long*)p, bytes);
  touchb((long*)(p+bytes), bytes);

  runs = 1;
  while (1) { 
    t1 = dtime();
    for (i = 0; i < runs; i++) {
      bluttanbla += readmem((long*)p, bytes);
    }
    t2 = dtime();
    if ((t2-t1) < MIN_TIME) {
      if ((t2-t1) < 0.1) {
        runs *= MIN_TIME*10;
      } else {
        runs = (MIN_TIME*1.2)/(t2-t1)*runs;
      }
    } else {
      bluttanbla += p[(rand() * rand()) % bytes]; // fool optimizer
      free(p);
      return runs*bytes/(t2-t1);
    }
  }
  // we won't get here
  err("memspeed: internal error 1");
  return -1.0;
}

double 
pmb_memspeedw(int bytes) {

  char* p;
  s32 i,j, runs;
  double t2, t1;

  p = (char*)malloc(bytes);
  if (!p) {
    err("memspeed test: can't allocate enough memory to do test");
  }

  /* First, touch all the memory to activate it */
  touchf((long*)p, bytes);
  touchb((long*)(p+bytes), bytes);

  runs = 1;
  while (1) {
    t1 = dtime();
    for (i = 0; i < runs; i++) {
      writemem((long*)p, bytes);
    }
    t2 = dtime();
    if ((t2-t1) < MIN_TIME) {
      if ((t2-t1) < 0.1) {
        runs *= MIN_TIME*10;
      } else {
        runs = (MIN_TIME*1.2)/(t2-t1)*runs;
      }
    } else {
      bluttanbla = p[(rand() * rand()) % bytes]; // fool optimizer
      free(p);
      return runs*bytes/(t2-t1);
    }
  }
  // we won't get here
  err("memspeed: internal error 1");
  return -1.0;
}

// not used:
_Inline int
memcpy2(long* p1, long* p2, long n) {
  int i;
  n = n >> 6;
  for (i = 0; i < n; i++) {
    *p1++ = *p2++;
    *p1++ = *p2++;
    *p1++ = *p2++;
    *p1++ = *p2++;
    *p1++ = *p2++;
    *p1++ = *p2++;
    *p1++ = *p2++;
    *p1++ = *p2++;

    *p1++ = *p2++;
    *p1++ = *p2++;
    *p1++ = *p2++;
    *p1++ = *p2++;
    *p1++ = *p2++;
    *p1++ = *p2++;
    *p1++ = *p2++;
    *p1++ = *p2++;
/*
    *p1++ = *p2++;
    *p1++ = *p2++;
    *p1++ = *p2++;
    *p1++ = *p2++;
    *p1++ = *p2++;
    *p1++ = *p2++;
    *p1++ = *p2++;
    *p1++ = *p2++;

    *p1++ = *p2++;
    *p1++ = *p2++;
    *p1++ = *p2++;
    *p1++ = *p2++;
    *p1++ = *p2++;
    *p1++ = *p2++;
    *p1++ = *p2++;
    *p1++ = *p2++; */
  }
}

_Inline void
touchf(long* p1, long n) {
  int i;
  int r;
  n = n/4;  
  for (i = 0; i < n; i++) {
    bluttanbla += *p1;
    *p1++ = rand();
  }
}

_Inline void  
touchb(long* p1, long n) {
  int i;
  int r;
  n = n/4;
  for (i = 0; i < n; i++) {
    *(--p1)= rand();
    bluttanbla += *p1;
  }
}

_Inline void
writemem(long* p1, long n) {
  int i;
  n = n >> 7;
  for (i = 0; i < n; i++) {
    *p1++ = 0;
    *p1++ = 0;
    *p1++ = 0;
    *p1++ = 0;
    *p1++ = 0;
    *p1++ = 0;
    *p1++ = 0;
    *p1++ = 0;

    *p1++ = 0;
    *p1++ = 0;
    *p1++ = 0;
    *p1++ = 0;
    *p1++ = 0;
    *p1++ = 0;
    *p1++ = 0;
    *p1++ = 0;

    *p1++ = 0;
    *p1++ = 0;
    *p1++ = 0;
    *p1++ = 0;
    *p1++ = 0;
    *p1++ = 0;
    *p1++ = 0;
    *p1++ = 0;

    *p1++ = 0;
    *p1++ = 0;
    *p1++ = 0;
    *p1++ = 0;
    *p1++ = 0;
    *p1++ = 0;
    *p1++ = 0;
    *p1++ = 0;
  }

}

_Inline long
readmem(long* p1, long n) {
  int i;
  long t = 0;
  n = n >> 7;
  for (i = 0; i < n; i++) {
    t += (*p1++);
    t += (*p1++);
    t += (*p1++);
    t += (*p1++);
    t += (*p1++);
    t += (*p1++);
    t += (*p1++);
    t += (*p1++);

    t += (*p1++);
    t += (*p1++);
    t += (*p1++);
    t += (*p1++);
    t += (*p1++);
    t += (*p1++);
    t += (*p1++);
    t += (*p1++);

    t += (*p1++);
    t += (*p1++);
    t += (*p1++);
    t += (*p1++);
    t += (*p1++);
    t += (*p1++);
    t += (*p1++);
    t += (*p1++);

    t += (*p1++);
    t += (*p1++);
    t += (*p1++);
    t += (*p1++);
    t += (*p1++);
    t += (*p1++);
    t += (*p1++);
    t += (*p1++);
  }
  return t;
}

