/*
 * cutils - Color Conversion Utilities - implementation
 * Copyright(c) 2002-2004 of wave++ (Yuri D'Elia) <wavexx@thregr.org>
 * Distributed under GNU LGPL without ANY warranty.
 *
 * For each color conversion, a set of 4 functions is made (char, float, vector
 * char and vector float) in a design similar to OpenGL, for a total of 8
 * functions per color transform. This leaves us room for future optimizations.
 */

/* interface and headers */
#include "cutils.h"
#include <math.h>


/*
 * Helpers, macros and constants
 */

#define CU_CLAMP(v, a, b)	((v < a)? a: ((v > b)? b: v))


/*
 * Linear operations
 */

/* v = Ma, generic unrolled version */
inline void cu_vMa3fv(float v[3], const float M[3][3], const float a[3])
{
  v[0] = M[0][0] * a[0] + M[0][1] * a[1] + M[0][2] * a[2];
  v[1] = M[1][0] * a[0] + M[1][1] * a[1] + M[1][2] * a[2];
  v[2] = M[2][0] * a[0] + M[2][1] * a[1] + M[2][2] * a[2];
}


/* v = a+b, generic unrolled version */
inline void cu_vab3fv(float v[3], const float a[3], const float b[3])
{
  v[0] = a[0] + b[0];
  v[1] = a[1] + b[1];
  v[2] = a[2] + b[2];
}


/*
 * YUV conversion routines.
 *
 * YUV/YCC tristimulus as from CCIR 601-1. Sadly, the 8bit unsigned conversion
 * needs both shifting and clamping to be in-range, so the conversion is not
 * particularly fast.
 *
 * A possible optimization could be normalizing UV factors to remove clamps,
 * at the expense of a non-standard conversion, or shift them and replace
 * UC_CLAMP with a toward-zero truncation.
 */

static const float RGB2YUV[3][3] =
{
  { 0.29900,  0.58700,  0.11400},
  {-0.16874, -0.33126,  0.50000},
  { 0.50000, -0.41869, -0.08131}
};

static const float YUV2RGB[3][3] =
{
  { 1.00000,  0.00000,  1.40200},
  { 1.00000, -0.34414, -0.71414},
  { 1.00000,  1.77200,  0.00000}
};

/* 8bit value offsets */
static const float RGB2YUV_C[3] = {0.,  128.,  128.};
static const float YUV2RGB_C[3] = {0., -128., -128.};


void cuRgbYuv3f(float r, float g, float b, float* y, float* u, float* v)
{
  float rgb[3] = {r, g, b};
  float yuv[3];

  cuRgbYuv3fv(rgb, yuv);

  *y = yuv[0];
  *u = yuv[1];
  *v = yuv[2];
}


void cuYuvRgb3f(float y, float u, float v, float* r, float* g, float* b)
{
  float yuv[3] = {y, u, v};
  float rgb[3];

  cuYuvRgb3fv(yuv, rgb);

  *r = rgb[0];
  *g = rgb[1];
  *b = rgb[2];
}


void cuRgbYuv3c(uint8_t r, uint8_t g, uint8_t b,
    uint8_t* y, uint8_t* u, uint8_t* v)
{
  uint8_t rgb[3] = {r, g, b};
  uint8_t yuv[3];

  cuRgbYuv3cv(rgb, yuv);

  *y = yuv[0];
  *u = yuv[1];
  *v = yuv[2];
}


void cuYuvRgb3c(uint8_t y, uint8_t u, uint8_t v,
    uint8_t* r, uint8_t* g, uint8_t* b)
{
  uint8_t yuv[3] = {y, u, v};
  uint8_t rgb[3];

  cuYuvRgb3cv(yuv, rgb);

  *r = rgb[0];
  *g = rgb[1];
  *b = rgb[2];
}


void cuRgbYuv3cv(const uint8_t rgb[3], uint8_t yuv[3])
{
  float in[3] = {rgb[0], rgb[1], rgb[2]};
  float out[3];

  cuRgbYuv3fv(in, out);
  cu_vab3fv(out, RGB2YUV_C, out);

  yuv[0] = CU_CLAMP(out[0], 0, 255);
  yuv[1] = CU_CLAMP(out[1], 0, 255);
  yuv[2] = CU_CLAMP(out[2], 0, 255);
}


void cuYuvRgb3cv(const uint8_t yuv[3], uint8_t rgb[3])
{
  float in[3] = {yuv[0], yuv[1], yuv[2]};
  float out[3];

  cu_vab3fv(in, YUV2RGB_C, in);
  cuYuvRgb3fv(in, out);

  rgb[0] = CU_CLAMP(out[0], 0, 255);
  rgb[1] = CU_CLAMP(out[1], 0, 255);
  rgb[2] = CU_CLAMP(out[2], 0, 255);
}


void cuRgbYuv3fv(const float rgb[3], float yuv[3])
{
  cu_vMa3fv(yuv, RGB2YUV, rgb);
}


void cuYuvRgb3fv(const float yuv[3], float rgb[3])
{
  cu_vMa3fv(rgb, YUV2RGB, yuv);
}