Apply compile time optimisations

This commit is contained in:
Hugo Mårdbrink 2024-04-07 13:06:49 +02:00
parent 29b5365918
commit 33c18dbefa
4 changed files with 284 additions and 13 deletions

View file

@ -0,0 +1,100 @@
#include <stdint.h>
#include <stdlib.h>
#define DCT_SIZE 8
#define TOTAL_DCT_BLOCKS 100
#define element_t int16_t
#define real_t double
#define DCT_COS_TABLE_SIZE 32
// DCT_COS_TABLE[i] = cos(i * PI / (2 * DCT_SIZE))
#define DCT_COS_TABLE (double[DCT_COS_TABLE_SIZE]) { \
1, 0.980785, 0.92388, 0.83147, 0.707107, 0.55557, 0.382683, \
0.19509, 0, -0.19509, -0.382683, -0.55557, -0.707107, -0.83147, \
-0.92388, -0.980785, -1, -0.980785, -0.92388, -0.83147, -0.707107, \
-0.55557, -0.382683, -0.19509, 0, 0.19509, 0.382683, 0.55557, \
0.707107, 0.83147, 0.92388, 0.980785 }
#define INV_SQRTDCT_SIZE (real_t) 0.3535533906
#define SQRT2_INV_SQRTDCT (real_t) 0.5
void dct_2d(element_t** matrix_in, element_t** matrix_out) {
real_t cu, cv, sum, cos_u, cos_v;
int u, v, i, j;
for (u = 0; u < DCT_SIZE; u++) {
cu = u == 0 ? INV_SQRTDCT_SIZE : SQRT2_INV_SQRTDCT;
for (v = 0; v < DCT_SIZE; v++) {
cv = v == 0 ? INV_SQRTDCT_SIZE : SQRT2_INV_SQRTDCT;
sum = 0;
for (i = 0; i < DCT_SIZE; i++) {
cos_u = DCT_COS_TABLE[((2 * i + 1) * u) % DCT_COS_TABLE_SIZE];
for (j = 0; j < DCT_SIZE; j++) {
cos_v = DCT_COS_TABLE[((2 * j + 1) * v) % DCT_COS_TABLE_SIZE];
sum += matrix_in[i][j] * cos_u * cos_v;
}
}
matrix_out[u][v] = cu * cv * sum;
}
}
}
void populate_mock_matrices(element_t*** mock_matrices) {
for (long i = 0; i < TOTAL_DCT_BLOCKS; i++) {
for (int j = 0; j < DCT_SIZE; j++) {
for (int k = 0; k < DCT_SIZE; k++) {
mock_matrices[i][j][k] = j + k;
}
}
}
}
element_t*** generate_mock_matrices() {
element_t ***mock_matrices = (element_t ***) malloc(TOTAL_DCT_BLOCKS * sizeof(element_t**));
for (int i = 0; i < TOTAL_DCT_BLOCKS; i++) {
mock_matrices[i] = (element_t **) malloc(DCT_SIZE * sizeof(element_t*));
for (int j = 0; j < DCT_SIZE; j++) {
mock_matrices[i][j] = (element_t *) malloc(DCT_SIZE * sizeof(element_t));
}
}
populate_mock_matrices(mock_matrices);
return mock_matrices;
}
void free_mock_matrices(element_t*** mock_matrices, element_t** matrix_out) {
for (int i = 0; i < TOTAL_DCT_BLOCKS; i++) {
for (int j = 0; j < DCT_SIZE; j++) {
free(mock_matrices[i][j]);
}
free(mock_matrices[i]);
}
free(mock_matrices);
}
int main() {
element_t ***mock_matrices = generate_mock_matrices();
int i;
element_t** matrix_out = (element_t **) malloc(DCT_SIZE * sizeof(element_t*));
for (i = 0; i < DCT_SIZE; i++) {
matrix_out[i] = (element_t *) malloc(DCT_SIZE * sizeof(element_t));
}
for(i = 0; i < TOTAL_DCT_BLOCKS; i++) {
dct_2d(mock_matrices[i], matrix_out);
}
free_mock_matrices(mock_matrices, matrix_out);
for (i = 0; i < DCT_SIZE; i++) {
free(matrix_out[i]);
}
free(matrix_out);
return 0;
}

90
versions/naive.c Normal file
View file

@ -0,0 +1,90 @@
#include <math.h>
#include <stdint.h>
#include <stdlib.h>
#define DCT_SIZE 8
#define TOTAL_DCT_BLOCKS 100
#define PI 3.14159265358979323846
#define element_t int16_t
#define real_t double
void dct_2d(element_t** matrix_in, element_t** matrix_out) {
real_t cu, cv, sum;
int u, v, i, j;
for (u = 0; u < DCT_SIZE; u++) {
for (v = 0; v < DCT_SIZE; v++) {
cu = u == 0 ? 1 / sqrt(DCT_SIZE) : sqrt(2) / sqrt(DCT_SIZE);
cv = v == 0 ? 1 / sqrt(DCT_SIZE) : sqrt(2) / sqrt(DCT_SIZE);
sum = 0;
for (i = 0; i < DCT_SIZE; i++) {
for (j = 0; j < DCT_SIZE; j++) {
sum += matrix_in[i][j] * cos((2 * i + 1) * u * PI / (2 * DCT_SIZE)) * cos((2 * j + 1) * v * PI / (2 * DCT_SIZE));
}
}
matrix_out[u][v] = cu * cv * sum;
}
}
}
void populate_mock_matrices(element_t*** mock_matrices) {
for (long i = 0; i < TOTAL_DCT_BLOCKS; i++) {
for (int j = 0; j < DCT_SIZE; j++) {
for (int k = 0; k < DCT_SIZE; k++) {
mock_matrices[i][j][k] = j + k;
}
}
}
}
element_t*** generate_mock_matrices() {
element_t ***mock_matrices = (element_t ***) malloc(TOTAL_DCT_BLOCKS * sizeof(element_t**));
for (int i = 0; i < TOTAL_DCT_BLOCKS; i++) {
mock_matrices[i] = (element_t **) malloc(DCT_SIZE * sizeof(element_t*));
for (int j = 0; j < DCT_SIZE; j++) {
mock_matrices[i][j] = (element_t *) malloc(DCT_SIZE * sizeof(element_t));
}
}
populate_mock_matrices(mock_matrices);
return mock_matrices;
}
void free_mock_matrices(element_t*** mock_matrices, element_t** matrix_out) {
for (int i = 0; i < TOTAL_DCT_BLOCKS; i++) {
for (int j = 0; j < DCT_SIZE; j++) {
free(mock_matrices[i][j]);
}
free(mock_matrices[i]);
}
free(mock_matrices);
}
int main() {
element_t ***mock_matrices = generate_mock_matrices();
element_t** matrix_out = (element_t **) malloc(DCT_SIZE * sizeof(element_t*));
for (int i = 0; i < DCT_SIZE; i++) {
matrix_out[i] = (element_t *) malloc(DCT_SIZE * sizeof(element_t));
}
for(long i = 0; i < TOTAL_DCT_BLOCKS; i++) {
dct_2d(mock_matrices[i], matrix_out);
}
free_mock_matrices(mock_matrices, matrix_out);
for (int i = 0; i < DCT_SIZE; i++) {
free(matrix_out[i]);
}
free(matrix_out);
return 0;
}