blob: 512404d4101cff452e00eca25ad1abf61731e78b [file] [log] [blame]
/*
* Copyright 2009-2015 Freescale Semiconductor, Inc. All Rights Reserved.
*/
/*
* The code contained herein is licensed under the GNU General Public
* License. You may obtain a copy of the GNU General Public License
* Version 2 or later at the following locations:
*
* http://www.opensource.org/licenses/gpl-license.html
* http://www.gnu.org/copyleft/gpl.html
*/
/*
* @file ipu_calc_stripes_sizes.c
*
* @brief IPU IC functions
*
* @ingroup IPU
*/
#include <linux/ipu-v3.h>
#include <linux/module.h>
#include <linux/math64.h>
#define BPP_32 0
#define BPP_16 3
#define BPP_8 5
#define BPP_24 1
#define BPP_12 4
#define BPP_18 2
static u32 truncate(u32 up, /* 0: down; else: up */
u64 a, /* must be non-negative */
u32 b)
{
u32 d;
u64 div;
div = div_u64(a, b);
d = b * (div >> 32);
if (up && (a > (((u64)d) << 32)))
return d+b;
else
return d;
}
static unsigned int f_calc(unsigned int pfs, unsigned int bpp, unsigned int *write)
{/* return input_f */
unsigned int f_calculated = 0;
switch (pfs) {
case IPU_PIX_FMT_YVU422P:
case IPU_PIX_FMT_YUV422P:
case IPU_PIX_FMT_YUV420P2:
case IPU_PIX_FMT_YUV420P:
case IPU_PIX_FMT_YVU420P:
case IPU_PIX_FMT_YUV444P:
f_calculated = 16;
break;
case IPU_PIX_FMT_RGB565:
case IPU_PIX_FMT_YUYV:
case IPU_PIX_FMT_UYVY:
f_calculated = 8;
break;
case IPU_PIX_FMT_NV12:
f_calculated = 8;
break;
default:
f_calculated = 0;
break;
}
if (!f_calculated) {
switch (bpp) {
case BPP_32:
f_calculated = 2;
break;
case BPP_16:
f_calculated = 4;
break;
case BPP_8:
case BPP_24:
f_calculated = 8;
break;
case BPP_12:
f_calculated = 16;
break;
case BPP_18:
f_calculated = 32;
break;
default:
f_calculated = 0;
break;
}
}
return f_calculated;
}
static unsigned int m_calc(unsigned int pfs)
{
unsigned int m_calculated = 0;
switch (pfs) {
case IPU_PIX_FMT_YUV420P2:
case IPU_PIX_FMT_YUV420P:
case IPU_PIX_FMT_YVU422P:
case IPU_PIX_FMT_YUV422P:
case IPU_PIX_FMT_YVU420P:
case IPU_PIX_FMT_YUV444P:
m_calculated = 16;
break;
case IPU_PIX_FMT_NV12:
case IPU_PIX_FMT_YUYV:
case IPU_PIX_FMT_UYVY:
m_calculated = 8;
break;
default:
m_calculated = 8;
break;
}
return m_calculated;
}
static int calc_split_resize_coeffs(unsigned int inSize, unsigned int outSize,
unsigned int *resizeCoeff,
unsigned int *downsizeCoeff)
{
uint32_t tempSize;
uint32_t tempDownsize;
if (inSize > 4096) {
pr_debug("IC input size(%d) cannot exceed 4096\n",
inSize);
return -EINVAL;
}
if (outSize > 1024) {
pr_debug("IC output size(%d) cannot exceed 1024\n",
outSize);
return -EINVAL;
}
if ((outSize << 3) < inSize) {
pr_debug("IC cannot downsize more than 8:1\n");
return -EINVAL;
}
/* Compute downsizing coefficient */
/* Output of downsizing unit cannot be more than 1024 */
tempDownsize = 0;
tempSize = inSize;
while (((tempSize > 1024) || (tempSize >= outSize * 2)) &&
(tempDownsize < 2)) {
tempSize >>= 1;
tempDownsize++;
}
*downsizeCoeff = tempDownsize;
/* compute resizing coefficient using the following equation:
resizeCoeff = M*(SI -1)/(SO - 1)
where M = 2^13, SI - input size, SO - output size */
*resizeCoeff = (8192L * (tempSize - 1)) / (outSize - 1);
if (*resizeCoeff >= 16384L) {
pr_debug("Overflow on IC resize coefficient.\n");
return -EINVAL;
}
pr_debug("resizing from %u -> %u pixels, "
"downsize=%u, resize=%u.%lu (reg=%u)\n", inSize, outSize,
*downsizeCoeff, (*resizeCoeff >= 8192L) ? 1 : 0,
((*resizeCoeff & 0x1FFF) * 10000L) / 8192L, *resizeCoeff);
return 0;
}
/* Stripe parameters calculator */
/**************************************************************************
Notes:
MSW = the maximal width allowed for a stripe
i.MX31: 720, i.MX35: 800, i.MX37/51/53: 1024
cirr = the maximal inverse resizing ratio for which overlap in the input
is requested; typically cirr~2
flags
bit 0 - equal_stripes
0 each stripe is allowed to have independent parameters
for maximal image quality
1 the stripes are requested to have identical parameters
(except the base address), for maximal performance
bit 1 - vertical/horizontal
0 horizontal
1 vertical
If performance is the top priority (above image quality)
Avoid overlap, by setting CIRR = 0
This will also force effectively identical_stripes = 1
Choose IF & OF that corresponds to the same IOX/SX for both stripes
Choose IFW & OFW such that
IFW/IM, IFW/IF, OFW/OM, OFW/OF are even integers
The function returns an error status:
0: no error
1: invalid input parameters -> aborted without result
Valid parameters should satisfy the following conditions
IFW <= OFW, otherwise downsizing is required
- which is not supported yet
4 <= IFW,OFW, so some interpolation may be needed even without overlap
IM, OM, IF, OF should not vanish
2*IF <= IFW
so the frame can be split to two equal stripes, even without overlap
2*(OF+IF/irr_opt) <= OFW
so a valid positive INW exists even for equal stripes
OF <= MSW, otherwise, the left stripe cannot be sufficiently large
MSW < OFW, so splitting to stripes is required
OFW <= 2*MSW, so two stripes are sufficient
(this also implies that 2<=MSW)
2: OF is not a multiple of OM - not fully-supported yet
Output is produced but OW is not guaranited to be a multiple of OM
4: OFW reduced to be a multiple of OM
8: CIRR > 1: truncated to 1
Overlap is not supported (and not needed) y for upsizing)
**************************************************************************/
int ipu_calc_stripes_sizes(const unsigned int input_frame_width,
/* input frame width;>1 */
unsigned int output_frame_width, /* output frame width; >1 */
const unsigned int maximal_stripe_width,
/* the maximal width allowed for a stripe */
const unsigned long long cirr, /* see above */
const unsigned int flags, /* see above */
u32 input_pixelformat,/* pixel format after of read channel*/
u32 output_pixelformat,/* pixel format after of write channel*/
struct stripe_param *left,
struct stripe_param *right)
{
const unsigned int irr_frac_bits = 13;
const unsigned long irr_steps = 1 << irr_frac_bits;
const u64 dirr = ((u64)1) << (32 - 2);
/* The maximum relative difference allowed between the irrs */
const u64 cr = ((u64)4) << 32;
/* The importance ratio between the two terms in the cost function below */
unsigned int status;
unsigned int temp;
unsigned int onw_min;
unsigned int inw = 0, onw = 0, inw_best = 0;
/* number of pixels in the left stripe NOT hidden by the right stripe */
u64 irr_opt; /* the optimal inverse resizing ratio */
u64 rr_opt; /* the optimal resizing ratio = 1/irr_opt*/
u64 dinw; /* the misalignment between the stripes */
/* (measured in units of input columns) */
u64 difwl, difwr = 0;
/* The number of input columns not reflected in the output */
/* the resizing ratio used for the right stripe is */
/* left->irr and right->irr respectively */
u64 cost, cost_min;
u64 div; /* result of division */
bool equal_stripes = (flags & 0x1) != 0;
bool vertical = (flags & 0x2) != 0;
unsigned int input_m, input_f, output_m, output_f; /* parameters for upsizing by stripes */
unsigned int resize_coeff;
unsigned int downsize_coeff;
status = 0;
if (vertical) {
input_f = 2;
input_m = 8;
output_f = 8;
output_m = 2;
} else {
input_f = f_calc(input_pixelformat, 0, NULL);
input_m = m_calc(input_pixelformat);
output_f = input_m;
output_m = m_calc(output_pixelformat);
}
if ((input_frame_width < 4) || (output_frame_width < 4))
return 1;
irr_opt = div_u64((((u64)(input_frame_width - 1)) << 32),
(output_frame_width - 1));
rr_opt = div_u64((((u64)(output_frame_width - 1)) << 32),
(input_frame_width - 1));
if ((input_m == 0) || (output_m == 0) || (input_f == 0) || (output_f == 0)
|| (input_frame_width < (2 * input_f))
|| ((((u64)output_frame_width) << 32) <
(2 * ((((u64)output_f) << 32) + (input_f * rr_opt))))
|| (maximal_stripe_width < output_f)
|| ((output_frame_width <= maximal_stripe_width)
&& (equal_stripes == 0))
|| ((2 * maximal_stripe_width) < output_frame_width))
return 1;
if (output_f % output_m)
status += 2;
temp = truncate(0, (((u64)output_frame_width) << 32), output_m);
if (temp < output_frame_width) {
output_frame_width = temp;
status += 4;
}
pr_debug("---------------->\n"
"if = %d\n"
"im = %d\n"
"of = %d\n"
"om = %d\n"
"irr_opt = %llu\n"
"rr_opt = %llu\n"
"cirr = %llu\n"
"pixel in = %08x\n"
"pixel out = %08x\n"
"ifw = %d\n"
"ofwidth = %d\n",
input_f,
input_m,
output_f,
output_m,
irr_opt,
rr_opt,
cirr,
input_pixelformat,
output_pixelformat,
input_frame_width,
output_frame_width
);
if (equal_stripes) {
if ((irr_opt > cirr) /* overlap in the input is not requested */
&& ((input_frame_width % (input_m << 1)) == 0)
&& ((input_frame_width % (input_f << 1)) == 0)
&& ((output_frame_width % (output_m << 1)) == 0)
&& ((output_frame_width % (output_f << 1)) == 0)) {
/* without overlap */
left->input_width = right->input_width = right->input_column =
input_frame_width >> 1;
left->output_width = right->output_width = right->output_column =
output_frame_width >> 1;
left->input_column = 0;
left->output_column = 0;
div = div_u64(((((u64)irr_steps) << 32) *
(right->input_width - 1)), (right->output_width - 1));
left->irr = right->irr = truncate(0, div, 1);
} else { /* with overlap */
onw = truncate(0, (((u64)output_frame_width - 1) << 32) >> 1,
output_f);
inw = truncate(0, onw * irr_opt, input_f);
/* this is the maximal inw which allows the same resizing ratio */
/* in both stripes */
onw = truncate(1, (inw * rr_opt), output_f);
div = div_u64((((u64)(irr_steps * inw)) <<
32), onw);
left->irr = right->irr = truncate(0, div, 1);
left->output_width = right->output_width =
output_frame_width - onw;
/* These are valid assignments for output_width, */
/* assuming output_f is a multiple of output_m */
div = (((u64)(left->output_width-1) * (left->irr)) << 32);
div = (((u64)1) << 32) + div_u64(div, irr_steps);
left->input_width = right->input_width = truncate(1, div, input_m);
div = div_u64((((u64)((right->output_width - 1) * right->irr)) <<
32), irr_steps);
difwr = (((u64)(input_frame_width - 1 - inw)) << 32) - div;
div = div_u64((difwr + (((u64)input_f) << 32)), 2);
left->input_column = truncate(0, div, input_f);
/* This splits the truncated input columns evenly */
/* between the left and right margins */
right->input_column = left->input_column + inw;
left->output_column = 0;
right->output_column = onw;
}
if (left->input_width > left->output_width) {
if (calc_split_resize_coeffs(left->input_width,
left->output_width,
&resize_coeff,
&downsize_coeff) < 0)
return -EINVAL;
if (downsize_coeff > 0) {
left->irr = right->irr =
(downsize_coeff << 14) | resize_coeff;
}
}
pr_debug("inw %d, onw %d, ilw %d, ilc %d, olw %d,"
" irw %d, irc %d, orw %d, orc %d, "
"difwr %llu, lirr %u\n",
inw, onw, left->input_width,
left->input_column, left->output_width,
right->input_width, right->input_column,
right->output_width,
right->output_column, difwr, left->irr);
} else { /* independent stripes */
onw_min = output_frame_width - maximal_stripe_width;
/* onw is a multiple of output_f, in the range */
/* [max(output_f,output_frame_width-maximal_stripe_width),*/
/*min(output_frame_width-2,maximal_stripe_width)] */
/* definitely beyond the cost of any valid setting */
cost_min = (((u64)input_frame_width) << 32) + cr;
onw = truncate(0, ((u64)maximal_stripe_width), output_f);
if (output_frame_width - onw == 1)
onw -= output_f; /* => onw and output_frame_width-1-onw are positive */
inw = truncate(0, onw * irr_opt, input_f);
/* this is the maximal inw which allows the same resizing ratio */
/* in both stripes */
onw = truncate(1, inw * rr_opt, output_f);
do {
div = div_u64((((u64)(irr_steps * inw)) << 32), onw);
left->irr = truncate(0, div, 1);
div = div_u64((((u64)(onw * left->irr)) << 32),
irr_steps);
dinw = (((u64)inw) << 32) - div;
div = div_u64((((u64)((output_frame_width - 1 - onw) * left->irr)) <<
32), irr_steps);
difwl = (((u64)(input_frame_width - 1 - inw)) << 32) - div;
cost = difwl + (((u64)(cr * dinw)) >> 32);
if (cost < cost_min) {
inw_best = inw;
cost_min = cost;
}
inw -= input_f;
onw = truncate(1, inw * rr_opt, output_f);
/* This is the minimal onw which allows the same resizing ratio */
/* in both stripes */
} while (onw >= onw_min);
inw = inw_best;
onw = truncate(1, inw * rr_opt, output_f);
div = div_u64((((u64)(irr_steps * inw)) << 32), onw);
left->irr = truncate(0, div, 1);
left->output_width = onw;
right->output_width = output_frame_width - onw;
/* These are valid assignments for output_width, */
/* assuming output_f is a multiple of output_m */
left->input_width = truncate(1, ((u64)(inw + 1)) << 32, input_m);
right->input_width = truncate(1, ((u64)(input_frame_width - inw)) <<
32, input_m);
div = div_u64((((u64)(irr_steps * (input_frame_width - 1 - inw))) <<
32), (right->output_width - 1));
right->irr = truncate(0, div, 1);
temp = truncate(0, ((u64)left->irr) * ((((u64)1) << 32) + dirr), 1);
if (temp < right->irr)
right->irr = temp;
div = div_u64(((u64)((right->output_width - 1) * right->irr) <<
32), irr_steps);
difwr = (u64)(input_frame_width - 1 - inw) - div;
div = div_u64((difwr + (((u64)input_f) << 32)), 2);
left->input_column = truncate(0, div, input_f);
/* This splits the truncated input columns evenly */
/* between the left and right margins */
right->input_column = left->input_column + inw;
left->output_column = 0;
right->output_column = onw;
if (left->input_width > left->output_width) {
if (calc_split_resize_coeffs(left->input_width,
left->output_width,
&resize_coeff,
&downsize_coeff) < 0)
return -EINVAL;
left->irr = (downsize_coeff << 14) | resize_coeff;
}
if (right->input_width > right->output_width) {
if (calc_split_resize_coeffs(right->input_width,
right->output_width,
&resize_coeff,
&downsize_coeff) < 0)
return -EINVAL;
right->irr = (downsize_coeff << 14) | resize_coeff;
}
}
return status;
}
EXPORT_SYMBOL(ipu_calc_stripes_sizes);