/* This code continuously compensates for the DDS crystal oscillator error by measuring the value of a known
 * reference frequency, REF_FREQ, being generated with CLK1. It then uses this correction to produce any
 * frequency on CLK0 to the same relative accuracy.
 *
 * The DDS CLK1 is set to generate the REF_FREQ. This is connected to D5 (T1) so it causes Timer1 to
 * count continuously from 0 to 65535 then overflow and repeat. Each overflow triggers an interrupt
 * which increments n_overflows. Meanwhile, the GPS 1 PPS is connected to D8 (ICP1). Each falling
 * edge copies Timer1 (TCNT1) to the Input Capture Register (ICR1) and triggers an interrupt.
 * The first PPS interrupt after a measurement is started saves ICR1 in icr_0. After m_secs more PPS
 * interrupts the ICR1 is saved in icr_1. Thus the total counts during m_secs is the remaining Timer1
 * counts until the first overflow, plus the number of counts in the next full n_overflows periods, plus
 * the counts until the final PPS. 
 *
 *
 *
 *                 *            *            *            *            *            *        ^ 65535
 *               * *          * *          * *          * *          * *          * *        |
 *             *   *        *   *        *   *        *   *        *   *        * | *        | Timer1 
 *           *     *      *     *      *     *      *     *      *     *      *   | *        | counting
 *         *       *    *       *    *       *    *       *    *       *    *     | *    *   | up CLK1
 *       * |       *  *         *  *         *  *         *  *         *  *       | *  *     |
 *     *   |       *            *            *            *            *          | *        | 0
 *         |                                                                      | 
 *         |                                                                      |
 *   PPS   ^                         ^                      ^                     ^
 *         |                                                                      |
 *         |<-----><---------------------------------------------------><-------->|
 *         |       1            2            3            4            5          |
 *         icr_0                       n_overflows                                icr_1
 *
 *
 *
 *
 * The measured frequency can then be computed in Hz as:
 *
 *    measured_frequency = (65536-icr_0 + 65536*(n_overflows-1) + icr_1)/m_secs, Hz
 *
 * The Si5351 architecture connects the crystal oscillator to a PLL which multiplies it to a larger frequency.
 * It then divides this PLL frequency to produce the desired clock output frequency. The Si5351 doc calls
 * these multipliers and dividers "multisynths". Both are represented as fixed point numbers a + b/c where
 * each term is 20 bits of precision, or about 6 decimal digits. For maximum precision we always set c
 * to ((1<<20)-1) and search down from the max PLL frequency for the combination with least quantization
 * error.
 *
 * Notes:
 *
 *   [ ] The precision is independent of the reference frequency and depends only on the measurement period
 *       such that err = 1/m_secs Hz
 *   [ ] The precision to which the DDS may be set is about 0.01 Hz, or 1 cHz. Thus the optimal measurement
 *       period is 1/0.01 = 100 seconds. If this is shortened, then the measurement accuracy will be less
 *       than the commandable precision but the system will respond faster to external disturbances. We
 *       choose a compromise and apply 1/10 the measured error every 10 secs.
 *   [ ] The measurement accuracy depends only on the PPS jitter and the Timer1 capture mechanism, it is not
 *       effected by user program interrupt latency or processing time.
 *   [ ] For more information about Timer1 read chapter 20 of the Atmega328 data sheet:
 *       http://www.atmel.com/Images/Atmel-42735-8-bit-AVR-Microcontroller-ATmega328-328P_Datasheet.pdf
 */

#include "GPSReference.h"

// user-adjustable values

#define REF_FREQ        500000000UL	// reference frequency, cHz; must be < sys clock / 2.5 (pg 187)
#define M_SECS          10              // measurement duration; longer higher accuracy but slower reaction
#define	ERR_PROP	0.10		// fraction to use of each measurement, use about M_SECS/100

// these values likely need never change

#define	XTAL		2500000000ULL	// SI crystal freq, cHz; some use 27 MHz
#define	DDS_I2C_ADDR	0x60		// DDS I2C address
#define TIM1_ROLL       65536ULL        // counts per roll of Timer1
#define MIN_UFREQ       50000000UL      // min user freq, cHz; determined by max 900/1800 PLL divisor
#define MAX_UFREQ       4294967295UL    // max user freq, cHz; determined by limit of uint32_t
#define	MIN_PLL		60000000000ULL	// min allowed si5351 pll freq, cHz
#define	MAX_PLL		90000000000ULL	// max allowed si5351 pll freq, cHz
#define	WILD_CHZ	50000		// crazy measurement threshold, cHz
#define	PIN_CLK1	5		// CLK1 counter pin
#define	PIN_PPS		8		// PPS trigger pin

/* set up to control the DDS with initial value for user's desired frequency
 */
DDSControl::DDSControl(uint32_t initial_hz)
{
    // configure Si5351
    reg16 = 0x0F;	// power up CKL0, fraction mode, use PLL A, non-inverted, use multisynth 0, 8 mA
    writeSi (15, 0x00);	// use XTAL as input for PLL A and B
    writeSi (16, reg16);
    writeSi (17, 0x2F); // power up CKL1, fraction mode, use PLL B, non-inverted, use multisynth 1, 8 mA
    writeSi (18, 0x80); // power down CKL2
    writeSi (3, ~0x03); // enable only outputs 0 and 1

    // init user frequency on CLK0
    user_freq = initial_hz * 100U;
    (void) setSiFreq(CLK0, user_freq);

    // init commanded frequency on CLK1
    ctrl_freq = REF_FREQ;
    (void) setSiFreq(CLK1, ctrl_freq);

    // define input pins
    pinMode (PIN_CLK1, INPUT);
    pinMode (PIN_PPS, INPUT);

    // init Timer1
    cli();				// no interrupts during setup
    TCCR1A = 0x00;			// normal mode
    TCCR1B = 0x06;			// no noise filter, capture and external inputs use falling edges
    TCCR1C = 0x00;			// no forced output compare
    TIMSK1 = 0x21;			// enable input capture and overflow interrupts
    TIFR1 = 0x0;			// clear interrupt flags
    sei();				// allow interrupts
}

/* start next incremental measurement, or start all over if full
 */
void DDSControl::startMeasurement(bool full)
{
    if (full) {
	m_secs = M_SECS;
	init_err = true;
	n_overflows = 0;
	n_ovf_capture = 0;
	icr_0 = 0;
	icr_1 = 0;
	n_pps = 0;
    }

    m_t0 = millis();			// record measurement start time 
    freq_ready = false;			// set when finished
    start_freq = true;			// start measurement
}

/* update the DDS correction when it becomes available then start another measurement
 */
void DDSControl::update()
{
    // confirm PPS is running allowing for a lot of Arduino slop
    uint32_t now = millis();
    uint32_t run_secs = (now - m_t0)/1000U;
    if (run_secs > 4 && n_pps < run_secs - 4) {
	// Serial.print(F("NO PPS ")); Serial.print(n_pps); Serial.print(' '); Serial.println(run_secs);
	dpy->drawMessage("No PPS");
	startMeasurement(true);
	return;
    }

    // process when new measurement is ready
    if (freq_ready) {

	// validate expected number of overflows
	if (n_ovf_capture < (REF_FREQ/100U)/TIM1_ROLL - 10) {	// expect one roll per TIM1_ROLL Hz
	    // Serial.print(F("NO CLK1 ")); Serial.println(n_ovf_capture);
	    dpy->drawMessage("No CLK1");
	    startMeasurement(true);
	    return;
	}

	// compute raw difference from REF_FREQ
	int32_t m_cyc = (TIM1_ROLL-icr_0) + (TIM1_ROLL*(n_ovf_capture-1)) + icr_1;  // measured cycles/m_secs
	int32_t f_err = 100LL*m_cyc/m_secs - REF_FREQ;			            // total error in cHz

	// ignore wild errors
	if (f_err < WILD_CHZ && -f_err < WILD_CHZ) {

	    // apply corrections
	    if (init_err) {
		// apply full correction if first measurement
		// Serial.print(F("Initial err: ")); Serial.print(f_err);
		// Serial.print(F("\tm_cyc: ")); Serial.print(m_cyc);
		// Serial.print(F("\trolls: ")); Serial.println(n_ovf_capture);
		ctrl_freq -= f_err;
		init_err = false;
		(void) setSiFreq (CLK1, ctrl_freq);
		(void) setUserFreq (user_freq);
	    } else {
		// find fraction of error to apply
		int32_t f_err_p = floor(ERR_PROP*f_err+0.5);

		// apply error to reference clock and save error after quantization
		ctrl_freq -= f_err_p;
		int32_t ctrl_q_err = setSiFreq (CLK1, ctrl_freq);

		// scale both ctrl errors to user freq
		int32_t scaled_ctrl_errs = (int32_t)((int64_t)(f_err_p+ctrl_q_err)*user_freq/REF_FREQ);

		// update user_freq using the corrected control freq and save resulting quantization error
		int32_t user_q_err = setUserFreq (user_freq);

		// sum all user errors
		int32_t tot_user_err = scaled_ctrl_errs + user_q_err;

		// inform user of total error with respect to their desired frequency
		dpy->drawCorrection(tot_user_err);

		// Serial.print(F("Errors,cHz:\tRaw ")); Serial.print(f_err);
		// Serial.print(F("\tCorr ")); Serial.print(f_err_p);
		// Serial.print(F("\tCtrlFreq ")); Serial.print(ctrl_freq);
		// Serial.print(F("\tCtrlQ ")); Serial.print(ctrl_q_err);
		// Serial.print(F("\tUserQ ")); Serial.print(user_q_err);
		// Serial.print(F("\tTotal ")); Serial.println(tot_user_err);
	    }
	}

	// start measuring CLK1 again
	startMeasurement(false);
    }
}

/* set user_freq to the given cHz then engage in CKL0 scaled by ctrl_freq error.
 * return DDS error due to register quantizing.
 */
int32_t DDSControl::setUserFreq (uint32_t new_uf)
{
    // update with clamp
    if (new_uf > MAX_UFREQ)
	user_freq = MAX_UFREQ;
    else if (new_uf < MIN_UFREQ)
	user_freq = MIN_UFREQ;
    else
	user_freq = new_uf;

    // engage with correction proportional to that needed by reference frequency, return err
    uint32_t si_f = (uint32_t) ((uint64_t)user_freq * ctrl_freq / REF_FREQ);
    return (setSiFreq (CLK0, si_f));
}

/* return current user freq in cHz
 */
uint32_t DDSControl::getUserFreq()
{
    return (user_freq);
}

/* set output level given desired PowerLevel.
 * N.B. roll over if out of allowed range
 */
void DDSControl::setPower (uint8_t l)
{
    reg16 = (reg16 & ~0x3) | (l & 0x3);
    writeSi (16, reg16);
}

/* set whether the user clock output is even on.
 */
void DDSControl::setOutput (bool want_on)
{
    if (want_on)
	reg16 = (reg16 & ~0x80);
    else
	reg16 = (reg16 | 0x80);
    writeSi (16, reg16);
}


/* return current output level as a value from PowerLevel
 */
uint8_t DDSControl::getPower (void)
{
    return (reg16 & 0x3);
}

/* set the given SI5351 clock to the given frequency in cHz.
 * use PLL A and Multisynth 0 for CLK0, PLL B and Multisynth 1 for CLK1.
 * search for multiplier and divider that minimizes quantization error.
 * return the freq error in cHz given the realities of the register precision.
 */
int32_t DDSControl::setSiFreq (Clock clock, uint32_t freq_cHz)
{
    // always use the max precision fraction denominator
    #define  DENOM ((1UL<<20U) - 1U)     		// 2^20 - 1 = 1,048,575; doc calls this "c"

    int32_t b_q_err = 1000000000L;			// best quantization error so far
    uint32_t b_pll_div = 0UL, b_pll_num = 0UL;		// best multiplier coefficients so far
    uint32_t b_ms_div = 0UL, b_ms_num = 0UL;		// best divider coefficients so far

    // search down from MAX_PLL to MIN_PLL for least error
    for (uint64_t my_pll = MAX_PLL; my_pll >= MIN_PLL; my_pll -= 100000000UL) {

	// multiply up from XTAL to my_pll
	uint32_t pll_div = my_pll/XTAL;			// doc calls this "a"
	uint64_t pll_rem = my_pll - XTAL*pll_div;	// remainder
	uint32_t pll_num = pll_rem*DENOM/XTAL;		// remainder as fraction over "c", doc calls this "b"

	// compute the actual pll frequency given the register values
	uint64_t act_pll = XTAL*pll_div + XTAL*pll_num/DENOM;

	// Serial.print(F("CLK")); Serial.print(clock);
	// Serial.print(F("\tFreq: ")); Serial.print(freq_cHz);
	// Serial.print(F("\tmy_pll: ")); Serial.print((uint32_t)(my_pll/100UL)); Serial.print(F("00"));
	// Serial.print(F("\tact_pll: ")); Serial.print((uint32_t)(act_pll/100UL)); Serial.print(F("00"));
	// Serial.print(F("\tpll_div: ")); Serial.print(pll_div);
	// Serial.print(F("\tpll_num: ")); Serial.print(pll_num);

	// divide down from act_pll to freq_cHz
	uint32_t ms_div = act_pll/freq_cHz;
	uint64_t ms_rem = act_pll - (uint64_t)freq_cHz*ms_div;
	uint32_t ms_num = ms_rem*DENOM/freq_cHz;

	// Serial.print(F("\tms_div: ")); Serial.print(ms_div);
	// Serial.print(F("\tms_num: ")); Serial.print(ms_num);

	// compute the actual commanded freq given the register values.
	uint32_t act_freq = (act_pll*DENOM)/(ms_div*DENOM+ms_num);
	// Serial.print(F("\tAct: ")); Serial.print(act_freq);

	// compute quantization err
	int32_t q_err = (int32_t)((int64_t)act_freq - (int64_t)freq_cHz);
	// Serial.print(F("\tErr: ")); Serial.println(q_err);

	// save if less error, done if perfect
	#define	abs32(x) ((x) < 0 ? -(x) : (x))
	if (abs32(q_err) < abs32(b_q_err)) {
	    // Serial.print(F("better: ")); Serial.println(b_q_err);
	    b_q_err = q_err;
	    b_pll_div = pll_div;
	    b_pll_num = pll_num;
	    b_ms_div = ms_div;
	    b_ms_num = ms_num;
	    if (q_err == 0)
		break;
	}
    }

    // load best pll frequency
    uint32_t t  = 128U*b_pll_num/DENOM;
    uint32_t p1 = 128U*b_pll_div + t - 512U;
    uint32_t p2 = 128U*b_pll_num - DENOM*t;
    uint32_t p3 = DENOM;
    loadSiRegs (clock == CLK0 ? 26 : 34, p1, p2, p3);	// multiplier synth
    writeSi (177, clock == CLK0 ? 0x20 : 0x80);		// reset

    // load best multisynth divider
    t  = 128U*b_ms_num/DENOM;
    p1 = 128U*b_ms_div + t - 512U;
    p2 = 128U*b_ms_num - DENOM*t;
    p3 = DENOM;
    loadSiRegs (clock == CLK0 ? 42 : 50, p1, p2, p3);	// divider synth

    // return best error
    // Serial.print(F("CLK")); Serial.print(clock);
    // Serial.print(F("\tFreq: ")); Serial.print(freq_cHz);
    // Serial.print(F("\tpll_div: ")); Serial.print(b_pll_div);
    // Serial.print(F("\tpll_num: ")); Serial.print(b_pll_num);
    // Serial.print(F("\tms_div: ")); Serial.print(b_ms_div);
    // Serial.print(F("\tms_num: ")); Serial.print(b_ms_num);
    // Serial.print(F("\tbest: ")); Serial.println(b_q_err);
    return (b_q_err);
}

/* load the given parameters in a standard Si5351 patten starting at the given address
 */
void DDSControl::loadSiRegs (uint8_t addr, uint32_t p1, uint32_t p2, uint32_t p3)
{
    // no need to mask final 0xFF since writeSi are is already typed uint8_t

    writeSi (addr++, (p3>>8));
    writeSi (addr++, p3);
    writeSi (addr++, (p1>>16)&0x03);
    writeSi (addr++, (p1>>8));
    writeSi (addr++, p1);
    writeSi (addr++, ((p3>>12)&0xF0) | (p2>>16));
    writeSi (addr++, (p2>>8));
    writeSi (addr++, p2);
}

/* set a low-level si5351 register 
 */
void DDSControl::writeSi (uint8_t reg, uint8_t val)
{
    Wire.beginTransmission(DDS_I2C_ADDR);
    Wire.write(reg);
    Wire.write(val);
    Wire.endTransmission();
}

/* interrupt handler called when Timer1 overflows, ie, each TIM1_ROLL cycles of CLK1 
 */
ISR (TIMER1_OVF_vect)
{
    dds->n_overflows += 1;
}

/* interrupt handler called when Timer1 input capture fires, ie, each GPS 1 PPS
 */
ISR (TIMER1_CAPT_vect)
{
    if (dds->start_freq) {
	// initialize first capture
	dds->icr_0 = ICR1;			// capture initial counter value
	dds->n_overflows = 0;			// init number of timer overflows
	dds->n_pps = 0;				// init number of captured PPS _periods_
	dds->start_freq = false;		// underway so reset start flag
    } else if (++dds->n_pps == dds->m_secs) {
	// finished
	dds->icr_1 = ICR1;			// capture final counter value
	dds->n_ovf_capture = dds->n_overflows;	// capture number of overflows
	dds->freq_ready = true;			// set ready flag
    }
}
