//You may wonder why "&" is used instead of "%".

#define SAMPLE_SIZE         100000000

U0 TimeIns()
{
    I64 start1, end1, start2, end2, overhead_time, test_time;
    I64 reg i, reg tmp;

    CPURep;

    //Measure Loop Overhead
    start1 = TSCGet;
    for (i = 0; i < SAMPLE_SIZE; i++)
    {
    }
    end1 = TSCGet;
    overhead_time = end1 - start1;

    //Measure remainder...
    start2 = TSCGet;
    for (i = 0; i < SAMPLE_SIZE; i++)
        tmp = i % 0x400;
    end2 = TSCGet;
    test_time = end2 - start2;
    "Remainder Version #1 Cycles\t: %10.5f\n", ToF64(test_time - overhead_time) / SAMPLE_SIZE;

    //Measure remainder...
    start2 = TSCGet;
    for (i = 0; i < SAMPLE_SIZE; i++)
        tmp = i & 0x3FF;
    end2 = TSCGet;
    test_time = end2 - start2;
    "Remainder Version #2 Cycles\t: %10.5f\n", ToF64(test_time - overhead_time) / SAMPLE_SIZE;
}

TimeIns;

/*  Program Output
8 Cores 2.660GHz
Remainder Version #1 Cycles         :   26.85345
Remainder Version #2 Cycles         :   -0.00800
*/