Viewing file: transpose.c (7.35 KB) -rw-r--r-- Select action/file-type: (+) | (+) | (+) | Code (+) | Session (+) | (+) | SDB (+) | (+) | (+) | (+) | (+) | (+) |
/* * Copyright (c) 2008-2020 Stefan Krah. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS "AS IS" AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */
#include "mpdecimal.h"
#include <assert.h> #include <limits.h> #include <stdio.h> #include <stdlib.h> #include <string.h>
#include "bits.h" #include "constants.h" #include "transpose.h" #include "typearith.h"
#define BUFSIZE 4096 #define SIDE 128
/* Bignum: The transpose functions are used for very large transforms in sixstep.c and fourstep.c. */
/* Definition of the matrix transpose */ void std_trans(mpd_uint_t dest[], mpd_uint_t src[], mpd_size_t rows, mpd_size_t cols) { mpd_size_t idest, isrc; mpd_size_t r, c;
for (r = 0; r < rows; r++) { isrc = r * cols; idest = r; for (c = 0; c < cols; c++) { dest[idest] = src[isrc]; isrc += 1; idest += rows; } } }
/* * Swap half-rows of 2^n * (2*2^n) matrix. * FORWARD_CYCLE: even/odd permutation of the halfrows. * BACKWARD_CYCLE: reverse the even/odd permutation. */ static int swap_halfrows_pow2(mpd_uint_t *matrix, mpd_size_t rows, mpd_size_t cols, int dir) { mpd_uint_t buf1[BUFSIZE]; mpd_uint_t buf2[BUFSIZE]; mpd_uint_t *readbuf, *writebuf, *hp; mpd_size_t *done, dbits; mpd_size_t b = BUFSIZE, stride; mpd_size_t hn, hmax; /* halfrow number */ mpd_size_t m, r=0; mpd_size_t offset; mpd_size_t next;
assert(cols == mul_size_t(2, rows));
if (dir == FORWARD_CYCLE) { r = rows; } else if (dir == BACKWARD_CYCLE) { r = 2; } else { abort(); /* GCOV_NOT_REACHED */ }
m = cols - 1; hmax = rows; /* cycles start at odd halfrows */ dbits = 8 * sizeof *done; if ((done = mpd_calloc(hmax/(sizeof *done) + 1, sizeof *done)) == NULL) { return 0; }
for (hn = 1; hn <= hmax; hn += 2) {
if (done[hn/dbits] & mpd_bits[hn%dbits]) { continue; }
readbuf = buf1; writebuf = buf2;
for (offset = 0; offset < cols/2; offset += b) {
stride = (offset + b < cols/2) ? b : cols/2-offset;
hp = matrix + hn*cols/2; memcpy(readbuf, hp+offset, stride*(sizeof *readbuf)); pointerswap(&readbuf, &writebuf);
next = mulmod_size_t(hn, r, m); hp = matrix + next*cols/2;
while (next != hn) {
memcpy(readbuf, hp+offset, stride*(sizeof *readbuf)); memcpy(hp+offset, writebuf, stride*(sizeof *writebuf)); pointerswap(&readbuf, &writebuf);
done[next/dbits] |= mpd_bits[next%dbits];
next = mulmod_size_t(next, r, m); hp = matrix + next*cols/2;
}
memcpy(hp+offset, writebuf, stride*(sizeof *writebuf));
done[hn/dbits] |= mpd_bits[hn%dbits]; } }
mpd_free(done); return 1; }
/* In-place transpose of a square matrix */ static inline void squaretrans(mpd_uint_t *buf, mpd_size_t cols) { mpd_uint_t tmp; mpd_size_t idest, isrc; mpd_size_t r, c;
for (r = 0; r < cols; r++) { c = r+1; isrc = r*cols + c; idest = c*cols + r; for (c = r+1; c < cols; c++) { tmp = buf[isrc]; buf[isrc] = buf[idest]; buf[idest] = tmp; isrc += 1; idest += cols; } } }
/* * Transpose 2^n * 2^n matrix. For cache efficiency, the matrix is split into * square blocks with side length 'SIDE'. First, the blocks are transposed, * then a square transposition is done on each individual block. */ static void squaretrans_pow2(mpd_uint_t *matrix, mpd_size_t size) { mpd_uint_t buf1[SIDE*SIDE]; mpd_uint_t buf2[SIDE*SIDE]; mpd_uint_t *to, *from; mpd_size_t b = size; mpd_size_t r, c; mpd_size_t i;
while (b > SIDE) b >>= 1;
for (r = 0; r < size; r += b) {
for (c = r; c < size; c += b) {
from = matrix + r*size + c; to = buf1; for (i = 0; i < b; i++) { memcpy(to, from, b*(sizeof *to)); from += size; to += b; } squaretrans(buf1, b);
if (r == c) { to = matrix + r*size + c; from = buf1; for (i = 0; i < b; i++) { memcpy(to, from, b*(sizeof *to)); from += b; to += size; } continue; } else { from = matrix + c*size + r; to = buf2; for (i = 0; i < b; i++) { memcpy(to, from, b*(sizeof *to)); from += size; to += b; } squaretrans(buf2, b);
to = matrix + c*size + r; from = buf1; for (i = 0; i < b; i++) { memcpy(to, from, b*(sizeof *to)); from += b; to += size; }
to = matrix + r*size + c; from = buf2; for (i = 0; i < b; i++) { memcpy(to, from, b*(sizeof *to)); from += b; to += size; } } } }
}
/* * In-place transposition of a 2^n x 2^n or a 2^n x (2*2^n) * or a (2*2^n) x 2^n matrix. */ int transpose_pow2(mpd_uint_t *matrix, mpd_size_t rows, mpd_size_t cols) { mpd_size_t size = mul_size_t(rows, cols);
assert(ispower2(rows)); assert(ispower2(cols));
if (cols == rows) { squaretrans_pow2(matrix, rows); } else if (cols == mul_size_t(2, rows)) { if (!swap_halfrows_pow2(matrix, rows, cols, FORWARD_CYCLE)) { return 0; } squaretrans_pow2(matrix, rows); squaretrans_pow2(matrix+(size/2), rows); } else if (rows == mul_size_t(2, cols)) { squaretrans_pow2(matrix, cols); squaretrans_pow2(matrix+(size/2), cols); if (!swap_halfrows_pow2(matrix, cols, rows, BACKWARD_CYCLE)) { return 0; } } else { abort(); /* GCOV_NOT_REACHED */ }
return 1; }
|