Edit: My bench marking code was wrong (SIZE / BUFSIZ in fwrite() should only be BUFSIZ), so everything is cleared up: in my case buffered fwrite() is giving better performance.
I try to speed optimize my C program that writes variable length codes to a file. Right now I have a buffer of size BUFSIZ which I fill with my bits until he is filled and then I write the buffer to my file using fwrite(). The resulting file can vary in size (from bytes to GiB, so from very few write calls to many calls) as I implemented the huffman algorithm for file compression.
I bench marked writing content to a file using fputc() and fwrite() (in BUFSIZ sized chunks). Surprisingly, fputc() outperforms fwrite() for 1 GiB, but the opposite is the case for 8 MiB. So why is fputc() sometimes faster than fwrite() and sometimes not, and secondly is it better to buffer or not to buffer when wanting best performance for IO operations?
The code I used for bench marking (as I am lazy this code is from chatGPT):
#include <stdio.h>
#include <stdlib.h>
#include <time.h>
#define SIZE 8388608
// #define SIZE 1073741824
int main()
{
FILE* file;
char* buffer = malloc(SIZE);
// Fill buffer with some data
for (int i = 0; i < SIZE; i++) {
buffer[i] = 'A';
}
// Benchmark fwrite
file = fopen("fwrite_test", "wb");
clock_t start = clock();
for (int i = 0; i < SIZE / BUFSIZ; i++) {
fwrite(buffer + i * BUFSIZ, sizeof(char), SIZE / BUFSIZ, file);
}
clock_t end = clock();
fclose(file);
printf("fwrite: %f seconds\n", (double)(end - start) / CLOCKS_PER_SEC);
// Benchmark fputc
file = fopen("fputc_test", "wb");
start = clock();
for (int i = 0; i < SIZE; i++) {
fputc(buffer[i], file);
}
end = clock();
fclose(file);
printf("fputc : %f seconds\n", (double)(end - start) / CLOCKS_PER_SEC);
return 0;
}
For 8 MiB I get this speed:
fwrite: 0.001915 seconds
fputc : 0.092756 seconds
-> fwrite is 48 times faster
And for 1 GiB:
fwrite: 14.579728 seconds
fputc : 3.512994 seconds
-> fputc is 4 times faster
And this is the code I actually want to optimize (I know that writeBits() and writeByte() shouldn't use writeBit() for good performance, I haven't implemented proper logic for them yet):
#include "bitHandler.h"
#include <stdint.h>
#include <stdio.h>
// Struct with all needed Infos to write buffered bits to a file
typedef struct bitBuffer {
unsigned char* buffer;
unsigned int bufferPosition;
int bitPosition;
FILE* output;
} bitBuffer_t;
// UINT_FAST64_WIDTH is defined in c23, assuming 64 is reaasonable if compiler doesn't support c23
#ifndef UINT_FAST64_WIDTH
#define UINT_FAST64_WIDTH 64
#endif
// Write a single bit to buffer and then to file
extern void writeBit(int bit, bitBuffer_t* bitBuffer)
{
if (bit) {
bitBuffer->buffer[bitBuffer->bufferPosition] |= 1u << (7 - bitBuffer->bitPosition);
} else {
bitBuffer->buffer[bitBuffer->bufferPosition] &= ~(1u << (7 - bitBuffer->bitPosition));
}
// Go to next byte if current one is full, flush buffer if it is full
if (bitBuffer->bitPosition == 7) {
if (bitBuffer->bufferPosition == BUFSIZ - 1) {
flush(bitBuffer);
} else {
++bitBuffer->bufferPosition;
}
bitBuffer->bitPosition = 0;
} else {
++bitBuffer->bitPosition;
}
}
// write multiple bits
extern void writeBits(uint_fast64_t bits, int nbits, bitBuffer_t* bitBuffer)
{
int i;
int bit;
for (i = 0; i < nbits; i++) {
bit = (bits >> (UINT_FAST64_WIDTH - 1 - i)) & 1u;
writeBit(bit, bitBuffer);
}
}
// write byte
extern void writeByte(unsigned char byte, bitBuffer_t* bitBuffer)
{
int i;
for (i = 0; i < 8; i++) {
writeBit((byte >> (7 - i)) & 1u, bitBuffer);
}
}
// flush the remaining bits in buffer to file
extern void flush(bitBuffer_t* bitBuffer)
{
fwrite(bitBuffer->buffer, sizeof(char),
(bitBuffer->bitPosition == 0) ? bitBuffer->bufferPosition : bitBuffer->bufferPosition + 1,
bitBuffer->output);
bitBuffer->bufferPosition = 0;
bitBuffer->bitPosition = 0;
}
fwrite(buffer + i * BUFSIZ, sizeof(char), SIZE / BUFSIZ, file);looks wrong to me.