/* NVClock 0.8 - Linux overclocker for NVIDIA cards
 *
 * site: http://nvclock.sourceforge.net
 *
 * Copyright(C) 2001-2004 Roderick Colenbrander
 *
 * Thanks to Erik Waling for doing Smartdimmer coding/testing. (his code isn't the one in NVClock)
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 * 
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 * 
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
 */

/* This source file uses some clock calculation code from nvidia's xfree86 driver.
   To keep Nvidia happy I have added their copyright. The way they interpret it (see linux kernel riva_hw.h)
   is that you need to add the disclaimer and copyright and when that's done
   you can basicly do what you want.
*/

 /***************************************************************************\
|*                                                                           *|
|*       Copyright 1993-2003 NVIDIA, Corporation.  All rights reserved.      *|
|*                                                                           *|
|*     NOTICE TO USER:   The source code  is copyrighted under  U.S. and     *|
|*     international laws.  Users and possessors of this source code are     *|
|*     hereby granted a nonexclusive,  royalty-free copyright license to     *|
|*     use this code in individual and commercial software.                  *|
|*                                                                           *|
|*     Any use of this source code must include,  in the user documenta-     *|
|*     tion and  internal comments to the code,  notices to the end user     *|
|*     as follows:                                                           *|
|*                                                                           *|
|*       Copyright 1993-2003 NVIDIA, Corporation.  All rights reserved.      *|
|*                                                                           *|
|*     NVIDIA, CORPORATION MAKES NO REPRESENTATION ABOUT THE SUITABILITY     *|
|*     OF  THIS SOURCE  CODE  FOR ANY PURPOSE.  IT IS  PROVIDED  "AS IS"     *|
|*     WITHOUT EXPRESS OR IMPLIED WARRANTY OF ANY KIND.  NVIDIA, CORPOR-     *|
|*     ATION DISCLAIMS ALL WARRANTIES  WITH REGARD  TO THIS SOURCE CODE,     *|
|*     INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY, NONINFRINGE-     *|
|*     MENT,  AND FITNESS  FOR A PARTICULAR PURPOSE.   IN NO EVENT SHALL     *|
|*     NVIDIA, CORPORATION  BE LIABLE FOR ANY SPECIAL,  INDIRECT,  INCI-     *|
|*     DENTAL, OR CONSEQUENTIAL DAMAGES,  OR ANY DAMAGES  WHATSOEVER RE-     *|
|*     SULTING FROM LOSS OF USE,  DATA OR PROFITS,  WHETHER IN AN ACTION     *|
|*     OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,  ARISING OUT OF     *|
|*     OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOURCE CODE.     *|
|*                                                                           *|
|*     U.S. Government  End  Users.   This source code  is a "commercial     *|
|*     item,"  as that  term is  defined at  48 C.F.R. 2.101 (OCT 1995),     *|
|*     consisting  of "commercial  computer  software"  and  "commercial     *|
|*     computer  software  documentation,"  as such  terms  are  used in     *|
|*     48 C.F.R. 12.212 (SEPT 1995)  and is provided to the U.S. Govern-     *|
|*     ment only as  a commercial end item.   Consistent with  48 C.F.R.     *|
|*     12.212 and  48 C.F.R. 227.7202-1 through  227.7202-4 (JUNE 1995),     *|
|*     all U.S. Government End Users  acquire the source code  with only     *|
|*     those rights set forth herein.                                        *|
|*                                                                           *|
 \***************************************************************************/

#include <stdio.h>
#include "nvclock.h"
#include "backend.h"

/*
/ The original NV40 gpu was used as the base for 6800LE/6800NU/6800GT/6800Ultra
/ GPUs. The difference between all these models lie in the amount of enabled
/ pixel/vertex pipelines and clockspeeds. For instance the 6800LE ships with 8
/ pixel pipelines while the 6800GT ships with 16 of those. Right after production
/ all GPUs are tested, if all pipelines work and they run at high clocks they
/ are called Ultra or if pipes are broken they are called 6800NU(12p) or 6800LE(8p).
/ Further in some cases 'good' GPUs can be rebranded too if there's a shortage of
/ 6800NU/6800LE GPUs. The key to this rebranding is register 0x1540 which contains
/ the enabled pixel/vertex pipelines. Depending on the GPU architecture a bit can
/ correspond to a single vertex shader or to a block containing two or four
/ pixel pipelines.
/
/ We now define some words coming from Rivatuner as people are familiar with those.
/ A 'good' GPU for which pipelines are disabled just to get enough lowend models
/ is said to contain 'Software masked units'. In this case the videobios initializes
/ 0x1540 with a value that locks out some units.
/ GPUs which didn't pass the hardware quality testing contain 'Hardware masked units'.
/ In this case the bios initializes 0x1540 with a value that enables all pipelines.
/ A certain (read-only) register (0xc010) contains a mask of units to disable by default.
/ The bios loads this value into another register (0xc020) at startup. The value from
/ 0xc020 is then used by the drivers to disable units in 0x1540. For example by clearing this
/ register before the drivers are loaded, you can prevent masks from being disabled.

/ 1540 units_cfg (rw) second byte contains vertex configuration and first byte pipeline
/ c010 default_mask (r) pixel pipelines start at +22, while vertex start at +16 (is this correct for all cards?)
/ c020 active_mask (rw)
/ c024/c028/c02c are being set to 0, why? (they correspond to c014/c018/c01c)
/
/ Below are supported pipeline configurations on various types of cards. Not sure
/ if everything is fully correct though:
/ - NV40 0x3f0f 6800 cards
/ - NV41 0x1f07 6800 pci-e (is this correct?)
/ - NV43 0x0703 6200/6600 cards
/ - NV44 0x0703 6200(Go)/Turbocache cards
/ - NV47 0xff3f 7800 cards (Does it support modding??)
*/

int nv40_get_default_mask()
{
    int mask;
    switch(nv_card.arch)
    {
	case NV40:
	    mask = 0x3f0f;
	    break;
	case NV41:
	    mask = 0x1f07;
	    break;
	case NV43:
	case NV44:
	    mask = 0x703;
	    break;
	case NV47:
	    mask = 0xff3f;
	    break;
    }
    return mask;
}

/* Convert a mask containing enabled/disabled pipelines for nv4x cards
/  to a binary string.
*/
static void nv40_unit_mask_to_binary(unsigned char mask, unsigned char hw_default, char *buf)
{
    int i, len;
    
    /* Count the number of pipelines on the card */
    for(i=0, len=0; i<8; i++)
	len += (hw_default & (1<<i)) ? 1 : 0;
    
    for(i=0; i<len; i++)
    {
	buf[i] = (mask & (1<<(len-i-1))) ? '1' : '0';
    }
    buf[len] = 0;
}

/* Try to locate hardware maskes units. On success we return 1 and pmask/vmask
/  contain the masked units. When no hw masked units are found we return 0.
*/
int nv40_get_hw_masked_units(char *pmask, char *vmask, int byte)
{
    unsigned int mask = nv_card.PMC[0xc010/4]; /* Or should we show the currently locked pipes? */
    unsigned int masked_units;

    /* For now we show units that are hw masked by default, not the currently masked units; the cli code wants to have this info 
    /  Unfortunately bios dumping isn't possible on various mobile 6200go cards, so as a fallback use the currently masked pipes
    /  in favor of a segfault ;)
    */
    masked_units = (((mask & 0x3f0000) >> 8) | (mask >> 22)) & nv40_get_default_mask(); /* What to do with NV47 which has 8 vertex units? */

    if(masked_units != 0)
    {
	if(byte)
	{
	    *pmask = masked_units & 0xff; /* pixel */
	    *vmask = (masked_units >> 8) & 0xff; /* vertex */
	}
	else
	{
	    nv40_unit_mask_to_binary(masked_units & 0xff, nv40_get_default_mask() & 0xff, pmask);
	    nv40_unit_mask_to_binary((masked_units >> 8)& 0xff, (nv40_get_default_mask()>>8) & 0xff, vmask);
	}
	return 1;
    }
    
    return 0;
}

/* Try to locate software maskes units. On success we return 1 and pmask/vmask
/  contain the masked units. When no sw masked units are found we return 0.
*/
int nv40_get_sw_masked_units(char *pmask, char *vmask, int byte)
{
    unsigned int mask = nv40_get_default_mask();
    unsigned int pipe_cfg;
    
    /* When a biosdump is present read the default value from there
    /  else we use the current pipe_cfg as an alternative. This isn't correct
    /  but on some cards we just can't dump the bios (for instance on 6200go cards).
    */
    if(nv_card.bios)
	pipe_cfg = nv_card.bios->pipe_cfg;
    else
	nv_card.PMC[0x1540/4] & nv40_get_default_mask();
    
    if(!pipe_cfg)
	return 0;
    
    /* Check if the card contains sw masked units by comparing
    /  the default pipe_cfg register value with the most optimal
    /  register value for the type of card. If they differ we have
    /  sw masked units. The check below first does a AND-mask to filter
    /  out bits which aren't needed.
    */
    if((pipe_cfg & 0xffff) != mask)
    {
	if(byte)
	{
	    *pmask = (pipe_cfg & ~mask) & 0xff;
	    *vmask = (pipe_cfg & ~mask) & 0xff00;
	}
	else
	{
	    nv40_unit_mask_to_binary((pipe_cfg & ~mask) & 0xff, nv40_get_default_mask() & 0xff, pmask);
	    nv40_unit_mask_to_binary((pipe_cfg & ~mask) & 0xff00, (nv40_get_default_mask() >> 8) & 0xff, vmask);
	}
	return 1;
    }
    return 0;
}

/* Receive the number of enabled pixel pipelines and also
/  store a mask with active pipelines.
*/
int nv40_get_pixel_pipelines(char *mask, int byte)
{
    unsigned char pipe_cfg = nv_card.PMC[0x1540/4] & 0xff;;
    int i, pipelines=0;

    /* The number of enabled pixel pipelines is stored in the first 4 (or more?) bits.
    /  In case of 6800 hardware a single bit corresponds to 4 pipelines and on NV44/NV46
    /  hardware a bit corresponds to 2 pipelines
    */
    for(i=0; i<8; i++)
	if((pipe_cfg >> i) & 0x1)
	    pipelines++;

    if(byte)
	*mask = pipe_cfg;
    else
	nv40_unit_mask_to_binary(pipe_cfg, nv40_get_default_mask() & 0xff, mask);

    /* NV44/NV46 use 2 pipes */
    if(nv_card.arch == NV44)
	return pipelines*2;

    return pipelines * 4;
}

/* Receive the number of enabled vertex pipelines and also
/  store a mask with active pipelines.
*/
int nv40_get_vertex_pipelines(char *mask, int byte)
{
    unsigned char pipe_cfg = (nv_card.PMC[0x1540/4]  >> 8) & 0x3f;
    int i, pipelines=0;

    /* The number of enabled vertex pipelines is stored in the second byte.
    /  A a single bit corresponds to 1 vertex pipeline.
    */
    for(i=0; i<8; i++)
	if((pipe_cfg >> i) & 0x1)
	    pipelines++;

    if(byte)
	*mask = pipe_cfg;
    else
	nv40_unit_mask_to_binary(pipe_cfg, (nv40_get_default_mask() >> 8) & 0xff, mask);
    
    return pipelines;
}

void nv40_set_pixel_pipelines(unsigned char mask)
{
    int pipe_cfg = nv_card.PMC[0x1540/4];

    /* Why do 0xc024/0xc028/0xc02c need to be reset? What do they contain? */
    nv_card.PMC[0xc020/4] = nv_card.PMC[0xc024/4] = nv_card.PMC[0xc028/4] = nv_card.PMC[0xc02c/4] = 0;

    nv_card.PMC[0x1540/4] = ~(~pipe_cfg | 0xff) | mask;
}

void nv40_set_vertex_pipelines(unsigned char mask)
{
    int pipe_cfg = nv_card.PMC[0x1540/4];

    /* Why do 0xc024/0xc028/0xc02c need to be reset? What do they contain? */
    nv_card.PMC[0xc020/4] = nv_card.PMC[0xc024/4] = nv_card.PMC[0xc028/4] = nv_card.PMC[0xc02c/4] = 0;

    nv_card.PMC[0x1540/4] = ~(~pipe_cfg | 0xff00) | (mask<<8);
}

/* Fanspeed code for Geforce6800 hardware */
float nv40_get_fanspeed()
{
    /* Bit 26-16 of register 0x10f0 control the voltage for the pwm signal generator in 0.1%
    /  that is connected to the fan. By changing the value in the register the duty cycle
    /  can be controlled so that the fan turns slower or faster.
    /  The value stored in the registers needs to be inverted, so a value of 10% means 90% and so on.
    */
    float fanspeed = (1000 - ((nv_card.PMC[0x10f0/4] >> 16) & 0x3ff))/10;
    return fanspeed;
}

void nv40_set_fanspeed(float speed)
{
    int value;

    /* For safety reasons we should never disable the fan by not putting it below 10%; further negative values don't exist ;)  */
    if(speed < 10 || speed > 100)
	return;
    
    value = (nv_card.PMC[0x10f0/4] & 0xfc000000) + (((int)(1000 - speed * 10) & 0x3ff)<<16) + (nv_card.PMC[0x10f0/4] & 0xffff);
    nv_card.PMC[0x10f0/4] = value;
}

/* Fanspeed code for Geforce6600 hardware (does this work for 6200 cards too??)*/
float nv43_get_fanspeed()
{
    /* The first 8bits of register 0x15f4 control the voltage for the pwm signal generator in case
    /  of Geforce6600(GT) hardware. By changing the value in the register the duty cycle of the pwm signal
    /  can be controlled so that the fan turns slower or faster.
    /  The value stored in the registers needs to be inverted, so a value of 10% means 90% and so on. (0xff means off, 0 means on)
    */
    float fanspeed = (0xff - (nv_card.PMC[0x15f4/4] & 0xff)) * 100/0xff;
    return fanspeed;
}

void nv43_set_fanspeed(float speed)
{
    int value;

    /* For safety reasons we should never disable the fan by not putting it below 10%; further negative values don't exist ;) */
    if(speed < 10 || speed > 100)
        return;

    value = 0x80000000 + (int)((100 - speed) * 0xff/100);
    nv_card.PMC[0x15f4/4] = value;
}

/* There's an internal temperature sensor on NV43 hardware and higher
/  Note that the sensor variable which is passed to this function is bogus
/  it is only there to share nv_card.get_gpu_temp between I2C and low-level.
*/
int nv43_get_gpu_temp(void *sensor)
{
    /* The first byte of 0x15b4 contains the gpu temperature. Further
    /  0x15b0 can be used to enable disable the sensor. Second there's
    /  some offset which needs to be added to the temperature. Under normal
    /  circumstances this seems to be 28C or 33C for (different) 6600GT boards. 
    /  When running 3d applications this offset was 28C instead of 33C on one of
    /  tested cards. Likely the offset is variable as a non-linearity correction.
    /  We need to figure out where the offset is stored or else we need to figure
    /  out what offsets all cards use. For now add that 28C.
    / 
    /  FIXME: offset
    */
    
    /* Assume that the sensor is disabled when the temperature part (without offset) is 0 */
    if((nv_card.PMC[0x15b4/4] & 0xff) == 0)
    {
	/* Initialize the sensor, not sure what both registers fully do.
	/  Atleast we noticed that the most significant bits of 0x15b8 can do
	/  some unknown things. Somehow by messing with the highest 4 bits it
	/  seems to be possible to show the slowdown threshold and more but
	/  not sure what everything is. The values below work correct on most
	/  cards.
	*/
	nv_card.PMC[0x15b0/4] = 0x100000a1;
	nv_card.PMC[0x15b8/4] = 0x14800000;
	usleep(500);
    }

    return (nv_card.PMC[0x15b4/4] & 0xff) + 28;
}

/* Get current backpanel brightness level on laptops */
int nv44_mobile_get_smartdimmer()
{
    /* Convert level to a value between 1 and 100 */
    return 5*(((nv_card.PMC[0x15f0/4] >> 16) & 0x1f) - 1);
}

/* Adjust backpanel brightness on laptops */
void nv44_mobile_set_smartdimmer(int level)
{
    if(level < 15 || level > 100)
	return;

    /* Convert the level to correct Smartdimmer values; on Windows a value between 4 and 21 works fine although 0-31 should work.
    /  The code below creates a value between 4 and 21;
    */
    level = level/5 + 1;

    nv_card.PMC[0x15f0/4] = (level << 16) | (nv_card.PMC[0x15f0/4] & 0xffe0ffff); /* Modify the smartdimmer part but keep the rest of the register the same */
}

float GetClock_nv40(int base_freq, unsigned int pll, unsigned int pll2)
{
    int m1, m2, n1, n2, p;

    /* mpll at 0x4020 and 0x4024; nvpll at 0x4000 and 0x4004 */	
    p = (pll >> 16) & 0x03;
    m1 = pll2 & 0xFF;
    n1 = (pll2 >> 8) & 0xFF;
    m2 = (pll2 >> 16) & 0xFF;
    n2 = (pll2 >> 24) & 0xFF;

    if(nv_card.debug)
	printf("m1=%d m2=%d n1=%d n2=%d p=%d\n", m1, m2, n1, n2, p);

    return (float)CalcSpeed_nv30(base_freq, m1, m2, n1, n2, p)/1000;
}

void ClockSelect_nv40(int clockIn, unsigned int pllIn, unsigned int *pllOut, unsigned int *pllBOut)
{
    unsigned diff, diffOld;
    unsigned VClk, Freq;
    unsigned m, m2, n, n2, p = 0;
    int base_freq = 27000;

    diffOld = 0xFFFFFFFF;

    if(clockIn < 125)
	p = 3;
    else if(clockIn < 250)
	p = 2;
    else if(clockIn < 500)
	p = 1;
    else
	p = 0;
	
    VClk = (unsigned)clockIn;

    Freq = VClk;
    if ((Freq >= 75000) && (Freq <= 1200000))
    {
        for(m = 1; m <= 4; m++)
	{
            for (m2 = 1; m2 <= 4; m2++)
	    {
	        for(n = 1; n <= 31; n++)
	        {
            	    n2 = (int)((float)((VClk << p) * m * m2) / (float)(base_freq * n)+.5);

                    if((n2 < 24) && (n >= n2) && (m >= m2))
		    {
                	Freq = ((base_freq * n * n2) / (m * m2)) >> p;
                	if (Freq > VClk)
                    	    diff = Freq - VClk;
                	else
		    	    diff = VClk - Freq;

                	/* When the difference is 0 or less than .5% accept the speed */
                	if(((diff == 0) || ((float)diff/(float)clockIn <= 0.001)))
                	{
			    /* What do the 0x1c and 0xe mean? further there is some bit in pllOut that is sometimes 1 */
			    *pllOut = (pllIn & 0xfffcffff) + (p << 16);
			    *pllBOut = m + (n<<8) + (m2<<16) + (n2 << 24);
                    	    return;
                	}
                	if (diff < diffOld)
			{
			    *pllOut = (pllIn & 0xfffcffff) + (p << 16);
			    *pllBOut = m + (n<<8) + (m2<<16) + (n2 << 24);
                    	    diffOld  = diff;
            	        }
            	    }
        	}
	    }
        }
    }
}

float nv40_get_gpu_speed()
{
    int pll = nv_card.PMC[0x4000/4];
    int pll2 = nv_card.PMC[0x4004/4];
    if(nv_card.debug == 1)
    {
	printf("NVPLL_COEFF=%08x\n", pll);
	printf("NVPLL2_COEFF=%08x\n", pll2);
    }

    return (float)GetClock_nv40(nv_card.base_freq, pll, pll2);
}

void nv40_set_gpu_speed(unsigned int nvclk)
{
    unsigned int PLL, PLL2;
    nvclk *= 1000;
	    
    ClockSelect_nv40(nvclk, nv_card.PMC[0x4000/4], &PLL, &PLL2);
	
    /* When no speed is found, don't change the PLL */
    /* The algorithm doesn't allow too low speeds */
    if(PLL)
    {
        if(nv_card.debug)
        {
            printf("NVPLL_COEFF: %08x\n", PLL);
            printf("NVPLL2_COEFF: %08x\n", PLL2);
        }

        nv_card.PMC[0x4000/4] = PLL;
        nv_card.PMC[0x4004/4] = PLL2;
    }
}

float nv40_get_memory_speed()
{
    int pll = nv_card.PMC[0x4020/4];
    int pll2 = nv_card.PMC[0x4024/4];
    if(nv_card.debug == 1)
    {
	printf("MPLL_COEFF=%08x\n", pll);
	printf("MPLL2_COEFF=%08x\n", pll2);
    }

    return (float)GetClock_nv40(nv_card.base_freq, pll, pll2);
}

void nv40_set_memory_speed(unsigned int memclk)
{
    unsigned int PLL, PLL2;
    memclk *= 1000;
	    
    ClockSelect_nv40(memclk, nv_card.PMC[0x4020/4], &PLL, &PLL2);
	
    /* When no speed is found, don't change the PLL */
    /* The algorithm doesn't allow too low speeds */
    if(PLL)
    {
        if(nv_card.debug)
        {
            printf("MPLL_COEFF: %08x\n", PLL);
            printf("MPLL2_COEFF: %08x\n", PLL2);
        }

	/* It seems that different NV4X GPUs contain multiple memory clocks.
	/  A 6800 card has 4 of them, a 6600GT 2 of them and a NV44 (6200) 1.
	/  Very likely this is related to the width of the memory bus, which
	/  is 256bit on the 6800, 128bit on the 6600GT (NV43) and 64bit on the NV44.
	/
	/  The code below handles the setting of the extra clockspeeds.
	*/
	switch(nv_card.arch)
	{
	    case NV40:
	    case NV41:
	    case NV47:
		nv_card.PMC[0x402c/4] = PLL;
		nv_card.PMC[0x4030/4] = PLL2;
		nv_card.PMC[0x4044/4] = PLL;
		nv_card.PMC[0x4048/4] = PLL2;
	    case NV43:
		nv_card.PMC[0x4038/4] = PLL;
		nv_card.PMC[0x403c/4] = PLL2;
	    case NV44:
		nv_card.PMC[0x4020/4] = PLL;
		nv_card.PMC[0x4024/4] = PLL2;
	}
    }
}

void nv40_reset_gpu_speed()
{
    /* Set the gpu speed */
    nv_card.PMC[0x4000/4] = nv_card.nvpll;
    nv_card.PMC[0x4004/4] = nv_card.nvpll2;
}

void nv40_reset_memory_speed()
{
    /* Set the memory speed */
    nv_card.PMC[0x4024/4] = nv_card.mpll2;

    switch(nv_card.arch)
    {
	case NV40:
	case NV41:
	case NV47:
	    nv_card.PMC[0x402c/4] = nv_card.mpll;
	    nv_card.PMC[0x4030/4] = nv_card.mpll2;
	    nv_card.PMC[0x4044/4] = nv_card.mpll;
	    nv_card.PMC[0x4048/4] = nv_card.mpll2;
	case NV43:
	    nv_card.PMC[0x4038/4] = nv_card.mpll;
    	    nv_card.PMC[0x403c/4] = nv_card.mpll2;
	case NV44:
	    nv_card.PMC[0x4020/4] = nv_card.mpll;
	    nv_card.PMC[0x4024/4] = nv_card.mpll2;
    }
}
