1

Edit: My bench marking code was wrong (SIZE / BUFSIZ in fwrite() should only be BUFSIZ), so everything is cleared up: in my case buffered fwrite() is giving better performance.

I try to speed optimize my C program that writes variable length codes to a file. Right now I have a buffer of size BUFSIZ which I fill with my bits until he is filled and then I write the buffer to my file using fwrite(). The resulting file can vary in size (from bytes to GiB, so from very few write calls to many calls) as I implemented the huffman algorithm for file compression. I bench marked writing content to a file using fputc() and fwrite() (in BUFSIZ sized chunks). Surprisingly, fputc() outperforms fwrite() for 1 GiB, but the opposite is the case for 8 MiB. So why is fputc() sometimes faster than fwrite() and sometimes not, and secondly is it better to buffer or not to buffer when wanting best performance for IO operations? The code I used for bench marking (as I am lazy this code is from chatGPT):

#include <stdio.h>
#include <stdlib.h>
#include <time.h>

#define SIZE 8388608
// #define SIZE 1073741824

int main()
{
    FILE* file;
    char* buffer = malloc(SIZE);

    // Fill buffer with some data
    for (int i = 0; i < SIZE; i++) {
        buffer[i] = 'A';
    }

    // Benchmark fwrite
    file = fopen("fwrite_test", "wb");
    clock_t start = clock();
    for (int i = 0; i < SIZE / BUFSIZ; i++) {
        fwrite(buffer + i * BUFSIZ, sizeof(char), SIZE / BUFSIZ, file);
    }
    clock_t end = clock();
    fclose(file);
    printf("fwrite: %f seconds\n", (double)(end - start) / CLOCKS_PER_SEC);

    // Benchmark fputc
    file = fopen("fputc_test", "wb");
    start = clock();
    for (int i = 0; i < SIZE; i++) {
        fputc(buffer[i], file);
    }
    end = clock();
    fclose(file);
    printf("fputc : %f seconds\n", (double)(end - start) / CLOCKS_PER_SEC);

    return 0;
}

For 8 MiB I get this speed:

fwrite: 0.001915 seconds
fputc : 0.092756 seconds

-> fwrite is 48 times faster

And for 1 GiB:

fwrite: 14.579728 seconds
fputc : 3.512994 seconds

-> fputc is 4 times faster

And this is the code I actually want to optimize (I know that writeBits() and writeByte() shouldn't use writeBit() for good performance, I haven't implemented proper logic for them yet):

#include "bitHandler.h"
#include <stdint.h>
#include <stdio.h>

// Struct with all needed Infos to write buffered bits to a file
typedef struct bitBuffer {
    unsigned char* buffer;
    unsigned int bufferPosition;
    int bitPosition;
    FILE* output;
} bitBuffer_t;

// UINT_FAST64_WIDTH is defined in c23, assuming 64 is reaasonable if compiler doesn't support c23
#ifndef UINT_FAST64_WIDTH
#define UINT_FAST64_WIDTH 64
#endif

// Write a single bit to buffer and then to file
extern void writeBit(int bit, bitBuffer_t* bitBuffer)
{
    if (bit) {
        bitBuffer->buffer[bitBuffer->bufferPosition] |= 1u << (7 - bitBuffer->bitPosition);
    } else {
        bitBuffer->buffer[bitBuffer->bufferPosition] &= ~(1u << (7 - bitBuffer->bitPosition));
    }
    // Go to next byte if current one is full, flush buffer if it is full
    if (bitBuffer->bitPosition == 7) {
        if (bitBuffer->bufferPosition == BUFSIZ - 1) {
            flush(bitBuffer);
        } else {
            ++bitBuffer->bufferPosition;
        }
        bitBuffer->bitPosition = 0;
    } else {
        ++bitBuffer->bitPosition;
    }
}

// write multiple bits
extern void writeBits(uint_fast64_t bits, int nbits, bitBuffer_t* bitBuffer)
{
    int i;
    int bit;
    for (i = 0; i < nbits; i++) {
        bit = (bits >> (UINT_FAST64_WIDTH - 1 - i)) & 1u;
        writeBit(bit, bitBuffer);
    }
}

// write byte
extern void writeByte(unsigned char byte, bitBuffer_t* bitBuffer)
{
    int i;
    for (i = 0; i < 8; i++) {
        writeBit((byte >> (7 - i)) & 1u, bitBuffer);
    }
}

// flush the remaining bits in buffer to file
extern void flush(bitBuffer_t* bitBuffer)
{
    fwrite(bitBuffer->buffer, sizeof(char),
        (bitBuffer->bitPosition == 0) ? bitBuffer->bufferPosition : bitBuffer->bufferPosition + 1,
        bitBuffer->output);
    bitBuffer->bufferPosition = 0;
    bitBuffer->bitPosition = 0;
}

11
  • 2
    C I/O buffers for you already, so you are buffering buffered data. Commented Jun 7, 2024 at 15:12
  • 1
    The third parameter of fwrite(buffer + i * BUFSIZ, sizeof(char), SIZE / BUFSIZ, file); looks wrong to me. Commented Jun 7, 2024 at 15:16
  • @Dúthomhas I didn't know that. Seems like it is worth buffering read operations but not write operations. Do you have an idea why fwrite is so much faster for small amount of data? Commented Jun 7, 2024 at 15:17
  • @pmacfarlane Yes you're right, it should be BUFSIZ, and then fwrite outperforms fputc constantly. So the bench mark code was wrong adn thus all my questions redundant Commented Jun 7, 2024 at 15:21
  • @Dúthomhas Buffering buffered data gives me better performance. As pmacfarlane pointed out my code was wrong and it is worth it to use fwrite(). I don't know why as I'd expect C intern buffering to be sufficient Commented Jun 7, 2024 at 15:23

0

Your Answer

By clicking “Post Your Answer”, you agree to our terms of service and acknowledge you have read our privacy policy.

Start asking to get answers

Find the answer to your question by asking.

Ask question

Explore related questions

See similar questions with these tags.