#ifdef HAVE_XORG_CONFIG_H
#include <xorg-config.h>
#endif

#include "xaa.h"
#include "xaalocal.h"
#include "xaacexp.h"
#include "xf86.h"

static CARD32 *StipplePowerOfTwo(CARD32 *, CARD32 *, int, int, int);
static CARD32 *StipplePowerOfTwo_Inverted(CARD32 *, CARD32 *, int, int, int);
static CARD32 *StippleUpTo32(CARD32 *, CARD32 *, int, int, int);
static CARD32 *StippleUpTo32_Inverted(CARD32 *, CARD32 *, int, int, int);
static CARD32 *StippleOver32(CARD32 *, CARD32 *, int, int, int);
static CARD32 *StippleOver32_Inverted(CARD32 *, CARD32 *, int, int, int);

#ifdef TRIPLE_BITS
#define stipple_scanline_func EXPNAME(XAAStippleScanlineFunc3)
#define stipple_get_scanline_func EXPNAME(XAAGetStippleScanlineFunc3)
#else
#define stipple_scanline_func EXPNAME(XAAStippleScanlineFunc)
#define stipple_get_scanline_func EXPNAME(XAAGetStippleScanlineFunc)
#endif

StippleScanlineProcPtr stipple_scanline_func[6] = {
    StipplePowerOfTwo,
    StippleUpTo32,
    StippleOver32,
    StipplePowerOfTwo_Inverted,
    StippleUpTo32_Inverted,
    StippleOver32_Inverted
};

StippleScanlineProcPtr *
stipple_get_scanline_func(void)
{
    return stipple_scanline_func;
}

#ifdef FIXEDBASE
#define DEST(i)	*dest
#define RETURN(i)	return(dest)
#else
#define DEST(i)	dest[i]
#define RETURN(i)	return(dest + i)
#endif

/* TRIPLE_BITS pattern expansion */
#ifdef TRIPLE_BITS
#define EXPAND_PAT \
	CARD32 pat1 = byte_expand3[pat & 0xFF], \
	       pat2 = byte_expand3[(pat & 0xFF00) >> 8], \
	       pat3 = byte_expand3[(pat & 0xFF0000) >> 16], \
	       pat4 = byte_expand3[(pat & 0xFF000000) >> 24], \
	       patA = pat1 | (pat2 << 24), \
	       patB = (pat2 >> 8) | (pat3 << 16), \
	       patC = (pat3 >> 16) | (pat4 << 8)
#ifdef FIXED_BASE
#define WRITE_PAT1 { \
	*dest = patA; }
#define WRITE_PAT2 { \
	*dest = patA; \
	*dest = patB; }
#define WRITE_PAT3 { \
	*dest = patA; \
	*dest = patB; \
	*dest = patC; }
#else
#define WRITE_PAT1 { \
	*(dest++) = patA; }
#define WRITE_PAT2 { \
	*(dest) = patA; \
	*(dest + 1) = patB; \
	dest += 2; }
#define WRITE_PAT3 { \
	*(dest) = patA; \
	*(dest + 1) = patB; \
	*(dest + 2) = patC; \
	dest += 3; }
#endif
#endif

#if !defined(FIXEDBASE) && !defined(MSBFIRST) && !defined(TRIPLE_BITS)

unsigned int XAAShiftMasks[32] = {
    /* gcc is rather pedantic about SHIFT_R(0xFFFFFFFF,32) */
    0x00000000, SHIFT_R(0xFFFFFFFF, 31),
    SHIFT_R(0xFFFFFFFF, 30), SHIFT_R(0xFFFFFFFF, 29),
    SHIFT_R(0xFFFFFFFF, 28), SHIFT_R(0xFFFFFFFF, 27),
    SHIFT_R(0xFFFFFFFF, 26), SHIFT_R(0xFFFFFFFF, 25),
    SHIFT_R(0xFFFFFFFF, 24), SHIFT_R(0xFFFFFFFF, 23),
    SHIFT_R(0xFFFFFFFF, 22), SHIFT_R(0xFFFFFFFF, 21),
    SHIFT_R(0xFFFFFFFF, 20), SHIFT_R(0xFFFFFFFF, 19),
    SHIFT_R(0xFFFFFFFF, 18), SHIFT_R(0xFFFFFFFF, 17),
    SHIFT_R(0xFFFFFFFF, 16), SHIFT_R(0xFFFFFFFF, 15),
    SHIFT_R(0xFFFFFFFF, 14), SHIFT_R(0xFFFFFFFF, 13),
    SHIFT_R(0xFFFFFFFF, 12), SHIFT_R(0xFFFFFFFF, 11),
    SHIFT_R(0xFFFFFFFF, 10), SHIFT_R(0xFFFFFFFF, 9),
    SHIFT_R(0xFFFFFFFF, 8), SHIFT_R(0xFFFFFFFF, 7),
    SHIFT_R(0xFFFFFFFF, 6), SHIFT_R(0xFFFFFFFF, 5),
    SHIFT_R(0xFFFFFFFF, 4), SHIFT_R(0xFFFFFFFF, 3),
    SHIFT_R(0xFFFFFFFF, 2), SHIFT_R(0xFFFFFFFF, 1)
};

#endif

void
#ifdef TRIPLE_BITS
 EXPNAME(XAAFillColorExpandRects3) (
#else
 EXPNAME(XAAFillColorExpandRects) (
#endif
                                      ScrnInfoPtr pScrn,
                                      int fg, int bg, int rop,
                                      unsigned int planemask,
                                      int nBox,
                                      BoxPtr pBox,
                                      int xorg, int yorg, PixmapPtr pPix) {
    XAAInfoRecPtr infoRec = GET_XAAINFORECPTR_FROM_SCRNINFOPTR(pScrn);
    CARD32 *base;
    Bool TwoPass = FALSE, FirstPass = TRUE;
    StippleScanlineProcPtr StippleFunc, FirstFunc, SecondFunc;
    int stipplewidth = pPix->drawable.width;
    int stippleheight = pPix->drawable.height;
    int srcwidth = pPix->devKind;
    int dwords, srcy, srcx, funcNo = 2, h;
    unsigned char *src = (unsigned char *) pPix->devPrivate.ptr;
    unsigned char *srcp;
    int flag;

    if (stipplewidth <= 32) {
        if (stipplewidth & (stipplewidth - 1))
            funcNo = 1;
        else
            funcNo = 0;
    }
    StippleFunc = stipple_scanline_func[funcNo];
    SecondFunc = stipple_scanline_func[funcNo];
    FirstFunc = stipple_scanline_func[funcNo + 3];

#ifdef TRIPLE_BITS
    if ((bg == -1) ||
        (!(infoRec->CPUToScreenColorExpandFillFlags & TRANSPARENCY_ONLY) &&
         (!(infoRec->CPUToScreenColorExpandFillFlags & RGB_EQUAL) ||
          (CHECK_RGB_EQUAL(bg))))) {
#else
    if ((bg == -1) ||
        !(infoRec->CPUToScreenColorExpandFillFlags & TRANSPARENCY_ONLY)) {
#endif
        /* one pass */
    }
    else if ((rop == GXcopy) && infoRec->FillSolidRects) {
        /* one pass but we fill background rects first */
        (*infoRec->FillSolidRects) (pScrn, bg, rop, planemask, nBox, pBox);
        bg = -1;
    }
    else {
        /* gotta do two passes */
        TwoPass = TRUE;
    }

    if (!TwoPass)
        (*infoRec->SetupForCPUToScreenColorExpandFill) (pScrn, fg, bg, rop,
                                                        planemask);

    while (nBox--) {
#ifdef TRIPLE_BITS
        dwords = (3 * (pBox->x2 - pBox->x1) + 31) >> 5;
#else
        dwords = (pBox->x2 - pBox->x1 + 31) >> 5;
#endif

 SECOND_PASS:
        if (TwoPass) {
            (*infoRec->SetupForCPUToScreenColorExpandFill) (pScrn,
                                                            (FirstPass) ? bg :
                                                            fg, -1, rop,
                                                            planemask);
            StippleFunc = (FirstPass) ? FirstFunc : SecondFunc;
        }

        h = pBox->y2 - pBox->y1;
        flag = (infoRec->CPUToScreenColorExpandFillFlags
                & CPU_TRANSFER_PAD_QWORD) && ((dwords * h) & 0x01);

        (*infoRec->SubsequentCPUToScreenColorExpandFill) (pScrn, pBox->x1,
                                                          pBox->y1,
                                                          pBox->x2 - pBox->x1,
                                                          h, 0);

        base = (CARD32 *) infoRec->ColorExpandBase;

        srcy = (pBox->y1 - yorg) % stippleheight;
        if (srcy < 0)
            srcy += stippleheight;
        srcx = (pBox->x1 - xorg) % stipplewidth;
        if (srcx < 0)
            srcx += stipplewidth;

        srcp = (srcwidth * srcy) + src;

#ifndef FIXEDBASE
        if ((dwords * h) <= infoRec->ColorExpandRange) {
            while (h--) {
                base =
                    (*StippleFunc) (base, (CARD32 *) srcp, srcx, stipplewidth,
                                    dwords);
                srcy++;
                srcp += srcwidth;
                if (srcy >= stippleheight) {
                    srcy = 0;
                    srcp = src;
                }
            }
        }
        else
#endif
            while (h--) {
                (*StippleFunc) (base, (CARD32 *) srcp, srcx, stipplewidth,
                                dwords);
                srcy++;
                srcp += srcwidth;
                if (srcy >= stippleheight) {
                    srcy = 0;
                    srcp = src;
                }
            }

        if (flag) {
            base = (CARD32 *) infoRec->ColorExpandBase;
            base[0] = 0x00000000;
        }

        if (TwoPass) {
            if (FirstPass) {
                FirstPass = FALSE;
                goto SECOND_PASS;
            }
            else
                FirstPass = TRUE;
        }

        pBox++;
    }

    if (infoRec->CPUToScreenColorExpandFillFlags & SYNC_AFTER_COLOR_EXPAND)
        (*infoRec->Sync) (pScrn);
    else
        SET_SYNC_FLAG(infoRec);
}

void
#ifdef TRIPLE_BITS
 EXPNAME(XAAFillColorExpandSpans3) (
#else
 EXPNAME(XAAFillColorExpandSpans) (
#endif
                                      ScrnInfoPtr pScrn,
                                      int fg, int bg, int rop,
                                      unsigned int planemask,
                                      int n,
                                      DDXPointPtr ppt,
                                      int *pwidth,
                                      int fSorted,
                                      int xorg, int yorg, PixmapPtr pPix) {
    XAAInfoRecPtr infoRec = GET_XAAINFORECPTR_FROM_SCRNINFOPTR(pScrn);
    CARD32 *base;
    Bool TwoPass = FALSE, FirstPass = TRUE;
    StippleScanlineProcPtr StippleFunc, FirstFunc, SecondFunc;
    int stipplewidth = pPix->drawable.width;
    int stippleheight = pPix->drawable.height;
    int dwords, srcy, srcx, funcNo = 2;
    unsigned char *srcp;

    if (stipplewidth <= 32) {
        if (stipplewidth & (stipplewidth - 1))
            funcNo = 1;
        else
            funcNo = 0;
    }
    StippleFunc = stipple_scanline_func[funcNo];
    SecondFunc = stipple_scanline_func[funcNo];
    FirstFunc = stipple_scanline_func[funcNo + 3];

#ifdef TRIPLE_BITS
    if ((bg == -1) ||
        (!(infoRec->CPUToScreenColorExpandFillFlags & TRANSPARENCY_ONLY) &&
         (!(infoRec->CPUToScreenColorExpandFillFlags & RGB_EQUAL) ||
          (CHECK_RGB_EQUAL(bg))))) {
#else
    if ((bg == -1) ||
        !(infoRec->CPUToScreenColorExpandFillFlags & TRANSPARENCY_ONLY)) {
#endif
        /* one pass */
    }
    else if ((rop == GXcopy) && infoRec->FillSolidSpans) {
        /* one pass but we fill background rects first */
        (*infoRec->FillSolidSpans) (pScrn, bg, rop, planemask, n, ppt, pwidth,
                                    fSorted);
        bg = -1;
    }
    else {
        /* gotta do two passes */
        TwoPass = TRUE;
    }

    if (!TwoPass)
        (*infoRec->SetupForCPUToScreenColorExpandFill) (pScrn, fg, bg, rop,
                                                        planemask);

    while (n--) {
#ifdef TRIPLE_BITS
        dwords = (3 * *pwidth + 31) >> 5;
#else
        dwords = (*pwidth + 31) >> 5;
#endif

        srcy = (ppt->y - yorg) % stippleheight;
        if (srcy < 0)
            srcy += stippleheight;
        srcx = (ppt->x - xorg) % stipplewidth;
        if (srcx < 0)
            srcx += stipplewidth;

        srcp = (pPix->devKind * srcy) + (unsigned char *) pPix->devPrivate.ptr;

 SECOND_PASS:
        if (TwoPass) {
            (*infoRec->SetupForCPUToScreenColorExpandFill) (pScrn,
                                                            (FirstPass) ? bg :
                                                            fg, -1, rop,
                                                            planemask);
            StippleFunc = (FirstPass) ? FirstFunc : SecondFunc;
        }

        (*infoRec->SubsequentCPUToScreenColorExpandFill) (pScrn, ppt->x, ppt->y,
                                                          *pwidth, 1, 0);

        base = (CARD32 *) infoRec->ColorExpandBase;

        (*StippleFunc) (base, (CARD32 *) srcp, srcx, stipplewidth, dwords);

        if ((infoRec->CPUToScreenColorExpandFillFlags & CPU_TRANSFER_PAD_QWORD)
            && (dwords & 0x01)) {
            base = (CARD32 *) infoRec->ColorExpandBase;
            base[0] = 0x00000000;
        }

        if (TwoPass) {
            if (FirstPass) {
                FirstPass = FALSE;
                goto SECOND_PASS;
            }
            else
                FirstPass = TRUE;
        }

        ppt++;
        pwidth++;
    }

    if (infoRec->CPUToScreenColorExpandFillFlags & SYNC_AFTER_COLOR_EXPAND)
        (*infoRec->Sync) (pScrn);
    else
        SET_SYNC_FLAG(infoRec);
}

#ifndef FIXEDBASE

void
#ifdef TRIPLE_BITS
 EXPNAME(XAAFillScanlineColorExpandRects3) (
#else
 EXPNAME(XAAFillScanlineColorExpandRects) (
#endif
                                              ScrnInfoPtr pScrn,
                                              int fg, int bg, int rop,
                                              unsigned int planemask,
                                              int nBox,
                                              BoxPtr pBox,
                                              int xorg, int yorg,
                                              PixmapPtr pPix) {
    XAAInfoRecPtr infoRec = GET_XAAINFORECPTR_FROM_SCRNINFOPTR(pScrn);
    CARD32 *base;
    Bool TwoPass = FALSE, FirstPass = TRUE;
    StippleScanlineProcPtr StippleFunc, FirstFunc, SecondFunc;
    int stipplewidth = pPix->drawable.width;
    int stippleheight = pPix->drawable.height;
    int srcwidth = pPix->devKind;
    int dwords, srcy, srcx, funcNo = 2, bufferNo, h;
    unsigned char *src = pPix->devPrivate.ptr;
    unsigned char *srcp;

    if (stipplewidth <= 32) {
        if (stipplewidth & (stipplewidth - 1))
            funcNo = 1;
        else
            funcNo = 0;
    }
    StippleFunc = stipple_scanline_func[funcNo];
    SecondFunc = stipple_scanline_func[funcNo];
    FirstFunc = stipple_scanline_func[funcNo + 3];

#ifdef TRIPLE_BITS
    if ((bg == -1) ||
        (!(infoRec->ScanlineCPUToScreenColorExpandFillFlags & TRANSPARENCY_ONLY)
         && (!(infoRec->ScanlineCPUToScreenColorExpandFillFlags & RGB_EQUAL) ||
             (CHECK_RGB_EQUAL(bg))))) {
#else
    if ((bg == -1) ||
        !(infoRec->
          ScanlineCPUToScreenColorExpandFillFlags & TRANSPARENCY_ONLY)) {
#endif
        /* one pass */
    }
    else if ((rop == GXcopy) && infoRec->FillSolidRects) {
        /* one pass but we fill background rects first */
        (*infoRec->FillSolidRects) (pScrn, bg, rop, planemask, nBox, pBox);
        bg = -1;
    }
    else {
        /* gotta do two passes */
        TwoPass = TRUE;
    }

    if (!TwoPass)
        (*infoRec->SetupForScanlineCPUToScreenColorExpandFill) (pScrn, fg, bg,
                                                                rop, planemask);

    while (nBox--) {
#ifdef TRIPLE_BITS
        dwords = (3 * (pBox->x2 - pBox->x1) + 31) >> 5;
#else
        dwords = (pBox->x2 - pBox->x1 + 31) >> 5;
#endif

 SECOND_PASS:
        if (TwoPass) {
            (*infoRec->SetupForScanlineCPUToScreenColorExpandFill) (pScrn,
                                                                    (FirstPass)
                                                                    ? bg : fg,
                                                                    -1, rop,
                                                                    planemask);
            StippleFunc = (FirstPass) ? FirstFunc : SecondFunc;
        }

        h = pBox->y2 - pBox->y1;

        (*infoRec->SubsequentScanlineCPUToScreenColorExpandFill) (pScrn,
                                                                  pBox->x1,
                                                                  pBox->y1,
                                                                  pBox->x2 -
                                                                  pBox->x1, h,
                                                                  0);

        bufferNo = 0;

        srcy = (pBox->y1 - yorg) % stippleheight;
        if (srcy < 0)
            srcy += stippleheight;
        srcx = (pBox->x1 - xorg) % stipplewidth;
        if (srcx < 0)
            srcx += stipplewidth;

        srcp = (srcwidth * srcy) + src;

        while (h--) {
            base = (CARD32 *) infoRec->ScanlineColorExpandBuffers[bufferNo];
            (*StippleFunc) (base, (CARD32 *) srcp, srcx, stipplewidth, dwords);
            (*infoRec->SubsequentColorExpandScanline) (pScrn, bufferNo++);
            if (bufferNo >= infoRec->NumScanlineColorExpandBuffers)
                bufferNo = 0;
            srcy++;
            srcp += srcwidth;
            if (srcy >= stippleheight) {
                srcy = 0;
                srcp = src;
            }
        }

        if (TwoPass) {
            if (FirstPass) {
                FirstPass = FALSE;
                goto SECOND_PASS;
            }
            else
                FirstPass = TRUE;
        }

        pBox++;
    }

    SET_SYNC_FLAG(infoRec);
}

void
#ifdef TRIPLE_BITS
 EXPNAME(XAAFillScanlineColorExpandSpans3) (
#else
 EXPNAME(XAAFillScanlineColorExpandSpans) (
#endif
                                              ScrnInfoPtr pScrn,
                                              int fg, int bg, int rop,
                                              unsigned int planemask,
                                              int n,
                                              DDXPointPtr ppt,
                                              int *pwidth,
                                              int fSorted,
                                              int xorg, int yorg,
                                              PixmapPtr pPix) {
    XAAInfoRecPtr infoRec = GET_XAAINFORECPTR_FROM_SCRNINFOPTR(pScrn);
    CARD32 *base;
    Bool TwoPass = FALSE, FirstPass = TRUE;
    StippleScanlineProcPtr StippleFunc, FirstFunc, SecondFunc;
    int stipplewidth = pPix->drawable.width;
    int stippleheight = pPix->drawable.height;
    int dwords, srcy, srcx, funcNo = 2;
    unsigned char *srcp;

    if (stipplewidth <= 32) {
        if (stipplewidth & (stipplewidth - 1))
            funcNo = 1;
        else
            funcNo = 0;
    }
    StippleFunc = stipple_scanline_func[funcNo];
    SecondFunc = stipple_scanline_func[funcNo];
    FirstFunc = stipple_scanline_func[funcNo + 3];

#ifdef TRIPLE_BITS
    if ((bg == -1) ||
        (!(infoRec->ScanlineCPUToScreenColorExpandFillFlags & TRANSPARENCY_ONLY)
         && (!(infoRec->ScanlineCPUToScreenColorExpandFillFlags & RGB_EQUAL) ||
             (CHECK_RGB_EQUAL(bg))))) {
#else
    if ((bg == -1) ||
        !(infoRec->
          ScanlineCPUToScreenColorExpandFillFlags & TRANSPARENCY_ONLY)) {
#endif
        /* one pass */
    }
    else if ((rop == GXcopy) && infoRec->FillSolidSpans) {
        /* one pass but we fill background rects first */
        (*infoRec->FillSolidSpans) (pScrn, bg, rop, planemask, n, ppt, pwidth,
                                    fSorted);
        bg = -1;
    }
    else {
        /* gotta do two passes */
        TwoPass = TRUE;
    }

    if (!TwoPass)
        (*infoRec->SetupForScanlineCPUToScreenColorExpandFill) (pScrn, fg, bg,
                                                                rop, planemask);

    while (n--) {
#ifdef TRIPLE_BITS
        dwords = (3 * *pwidth + 31) >> 5;
#else
        dwords = (*pwidth + 31) >> 5;
#endif

        srcy = (ppt->y - yorg) % stippleheight;
        if (srcy < 0)
            srcy += stippleheight;
        srcx = (ppt->x - xorg) % stipplewidth;
        if (srcx < 0)
            srcx += stipplewidth;

        srcp = (pPix->devKind * srcy) + (unsigned char *) pPix->devPrivate.ptr;

 SECOND_PASS:
        if (TwoPass) {
            (*infoRec->SetupForScanlineCPUToScreenColorExpandFill) (pScrn,
                                                                    (FirstPass)
                                                                    ? bg : fg,
                                                                    -1, rop,
                                                                    planemask);
            StippleFunc = (FirstPass) ? FirstFunc : SecondFunc;
        }

        (*infoRec->SubsequentScanlineCPUToScreenColorExpandFill) (pScrn, ppt->x,
                                                                  ppt->y,
                                                                  *pwidth, 1,
                                                                  0);

        base = (CARD32 *) infoRec->ScanlineColorExpandBuffers[0];

        (*StippleFunc) (base, (CARD32 *) srcp, srcx, stipplewidth, dwords);
        (*infoRec->SubsequentColorExpandScanline) (pScrn, 0);

        if (TwoPass) {
            if (FirstPass) {
                FirstPass = FALSE;
                goto SECOND_PASS;
            }
            else
                FirstPass = TRUE;
        }

        ppt++;
        pwidth++;
    }

    SET_SYNC_FLAG(infoRec);
}

#endif

static CARD32 *
StipplePowerOfTwo(CARD32 *dest, CARD32 *src, int shift, int width, int dwords)
{
    CARD32 pat = *src;

    if (width < 32) {
        pat &= XAAShiftMasks[width];
        while (width < 32) {
            pat |= SHIFT_L(pat, width);
            width <<= 1;
        }
    }

    if (shift)
        pat = SHIFT_R(pat, shift) | SHIFT_L(pat, 32 - shift);

#ifdef MSBFIRST
    pat = SWAP_BITS_IN_BYTES(pat);
#endif

#ifdef TRIPLE_BITS
    {
        EXPAND_PAT;

        while (dwords >= 3) {
            WRITE_PAT3;
            dwords -= 3;
        }
        if (dwords == 2) {
            WRITE_PAT2;
        }
        else if (dwords == 1) {
            WRITE_PAT1;
        }

        return dest;
    }
#else                           /* TRIPLE_BITS */
    while (dwords >= 4) {
        DEST(0) = pat;
        DEST(1) = pat;
        DEST(2) = pat;
        DEST(3) = pat;
        dwords -= 4;
#ifndef FIXEDBASE
        dest += 4;
#endif
    }

    if (!dwords)
        return dest;
    DEST(0) = pat;
    if (dwords == 1)
        RETURN(1);
    DEST(1) = pat;
    if (dwords == 2)
        RETURN(2);
    DEST(2) = pat;
    RETURN(3);
#endif                          /* TRIPLE_BITS */
}

static CARD32 *
StipplePowerOfTwo_Inverted(CARD32 *dest, CARD32 *src,
                           int shift, int width, int dwords)
{
    CARD32 pat = *src;

    if (width < 32) {
        pat &= XAAShiftMasks[width];
        while (width < 32) {
            pat |= SHIFT_L(pat, width);
            width <<= 1;
        }
    }

    if (shift)
        pat = SHIFT_R(pat, shift) | SHIFT_L(pat, 32 - shift);

#ifdef MSBFIRST
    pat = SWAP_BITS_IN_BYTES(pat);
#endif

    pat = ~pat;

#ifdef TRIPLE_BITS
    {
        EXPAND_PAT;

        while (dwords >= 3) {
            WRITE_PAT3;
            dwords -= 3;
        }
        if (dwords == 2) {
            WRITE_PAT2;
        }
        else if (dwords == 1) {
            WRITE_PAT1;
        }

        return dest;
    }
#else                           /* TRIPLE_BITS */
    while (dwords >= 4) {
        DEST(0) = pat;
        DEST(1) = pat;
        DEST(2) = pat;
        DEST(3) = pat;
        dwords -= 4;
#ifndef FIXEDBASE
        dest += 4;
#endif
    }

    if (!dwords)
        return dest;
    DEST(0) = pat;
    if (dwords == 1)
        RETURN(1);
    DEST(1) = pat;
    if (dwords == 2)
        RETURN(2);
    DEST(2) = pat;
    RETURN(3);
#endif                          /* TRIPLE_BITS */
}

static CARD32 *
StippleUpTo32(CARD32 *base, CARD32 *src, int shift, int width, int dwords)
{
    CARD32 pat = *src & XAAShiftMasks[width];

    while (width <= 15) {
        pat |= SHIFT_L(pat, width);
        width <<= 1;
    }
    pat |= SHIFT_L(pat, width);

    while (dwords--) {
        CARD32 bits = SHIFT_R(pat, shift) | SHIFT_L(pat, width - shift);

#ifdef TRIPLE_BITS
        if (dwords >= 2) {
            WRITE_BITS3(bits);
            dwords -= 2;
        }
        else if (dwords > 0) {
            WRITE_BITS2(bits);
            dwords--;
        }
        else {
            WRITE_BITS1(bits);
        }
#else
        WRITE_BITS(bits);
#endif

        shift += 32;
        shift %= width;
    }
    return base;
}

static CARD32 *
StippleUpTo32_Inverted(CARD32 *base, CARD32 *src,
                       int shift, int width, int dwords)
{
    CARD32 pat = *src & XAAShiftMasks[width];

    while (width <= 15) {
        pat |= SHIFT_L(pat, width);
        width <<= 1;
    }
    pat |= SHIFT_L(pat, width);

    while (dwords--) {
        CARD32 bits = ~(SHIFT_R(pat, shift) | SHIFT_L(pat, width - shift));

#ifdef TRIPLE_BITS
        if (dwords >= 2) {
            WRITE_BITS3(bits);
            dwords -= 2;
        }
        else if (dwords > 0) {
            WRITE_BITS2(bits);
            dwords--;
        }
        else {
            WRITE_BITS1(bits);
        }
#else
        WRITE_BITS(bits);
#endif

        shift += 32;
        shift %= width;
    }
    return base;
}

static CARD32 *
StippleOver32(CARD32 *base, CARD32 *src, int offset, int width, int dwords)
{
    CARD32 *srcp;
    CARD32 bits;
    int bitsleft, shift, usable;

    while (dwords--) {
        bitsleft = width - offset;
        srcp = src + (offset >> 5);
        shift = offset & 31;
        usable = 32 - shift;

        if (bitsleft < 32) {
            if (bitsleft <= usable) {
                bits = SHIFT_L(*src, bitsleft) |
                    (SHIFT_R(*srcp, shift) & XAAShiftMasks[bitsleft]);
            }
            else {
                bits = SHIFT_L(*src, bitsleft) |
                    (SHIFT_L(srcp[1], usable) & XAAShiftMasks[bitsleft]) |
                    (SHIFT_R(*srcp, shift) & XAAShiftMasks[usable]);
            }
        }
        else if (shift)
            bits = SHIFT_R(*srcp, shift) | SHIFT_L(srcp[1], usable);
        else
            bits = *srcp;

#ifdef TRIPLE_BITS
        if (dwords >= 2) {
            WRITE_BITS3(bits);
            dwords -= 2;
        }
        else if (dwords > 0) {
            WRITE_BITS2(bits);
            dwords--;
        }
        else {
            WRITE_BITS1(bits);
        }
#else
        WRITE_BITS(bits);
#endif

        offset += 32;
        offset %= width;
    }
    return base;
}

static CARD32 *
StippleOver32_Inverted(CARD32 *base, CARD32 *src,
                       int offset, int width, int dwords)
{
    CARD32 *srcp;
    CARD32 bits;
    int bitsleft, shift, usable;

    while (dwords--) {
        bitsleft = width - offset;
        srcp = src + (offset >> 5);
        shift = offset & 31;
        usable = 32 - shift;

        if (bitsleft < 32) {
            if (bitsleft <= usable) {
                bits = SHIFT_L(*src, bitsleft) |
                    (SHIFT_R(*srcp, shift) & XAAShiftMasks[bitsleft]);
            }
            else {
                bits = SHIFT_L(*src, bitsleft) |
                    (SHIFT_L(srcp[1], usable) & XAAShiftMasks[bitsleft]) |
                    (SHIFT_R(*srcp, shift) & XAAShiftMasks[usable]);
            }
        }
        else if (shift)
            bits = SHIFT_R(*srcp, shift) | SHIFT_L(srcp[1], usable);
        else
            bits = *srcp;

        bits = ~bits;

#ifdef TRIPLE_BITS
        if (dwords >= 2) {
            WRITE_BITS3(bits);
            dwords -= 2;
        }
        else if (dwords > 0) {
            WRITE_BITS2(bits);
            dwords--;
        }
        else {
            WRITE_BITS1(bits);
        }
#else
        WRITE_BITS(bits);
#endif

        offset += 32;
        offset %= width;
    }
    return base;
}