Idx File ((full)) Info

fclose(f); return 0; Benchmark results (average of 10 runs, reading 60k MNIST images):

| Operation | Python (struct+numpy) | C (libidx) | NumPy .npy | HDF5 | |-----------|----------------------|------------|--------------|------| | Load 60k images | 0.24 sec | 0.09 sec | 0.19 sec | 0.31 sec | | Memory mapping | N/A | 0.001 sec | 0.001 sec | 0.15 sec | | Random access (per image) | 2.1 µs | 0.4 µs | 1.2 µs | 8.5 µs |

| Code (decimal) | Code (hex) | Data Type | C equivalent (typical) | .NET equivalent | |----------------|------------|-----------|------------------------|------------------| | 0x08 | 8 | Unsigned byte (uint8) | unsigned char | Byte | | 0x09 | 9 | Signed byte (int8) | signed char | SByte | | 0x0B | 11 | Short (int16) | short | Int16 | | 0x0C | 12 | Int32 (int) | int | Int32 | | 0x0D | 13 | Float (single) | float | Single | | 0x0E | 14 | Double | double | Double | idx file

size_t elem_size = 0; switch(out->data_type) case 0x08: case 0x09: elem_size = 1; break; case 0x0B: elem_size = 2; break; case 0x0C: elem_size = 4; break; case 0x0D: elem_size = 4; break; case 0x0E: elem_size = 8; break; default: free(out->dims); fclose(f); return -5;

size_t total_elements = 1; for (int i = 0; i < out->dim_count; i++) total_elements *= out->dims[i]; fclose(f); return 0; Benchmark results (average of 10

with open(filename, 'wb') as f: # Write magic: [0, 0, type_code, dim_count] f.write(bytes([0, 0, data_type_code, dim_count])) # Write dimensions (big-endian) for dim in data_array.shape: f.write(dim.to_bytes(4, 'big')) # Write data (row-major, native endianness) # Convert to flat bytes in correct order data_array.astype(data_array.dtype, copy=False).tofile(f) #include <stdint.h> #include <stdio.h> #include <stdlib.h> #include <arpa/inet.h> typedef struct idx_file uint8_t data_type; // 0x08,0x09,0x0B-0x0E uint8_t dim_count; // 1-255 uint32_t *dims; // array of dim_count sizes void *data; // raw data pointer size_t data_size_bytes; idx_file_t;

out->data_type = header[2]; out->dim_count = header[3]; Benchmark results (average of 10 runs

out->dims = malloc(out->dim_count * sizeof(uint32_t)); for (int i = 0; i < out->dim_count; i++) uint32_t dim_net; if (fread(&dim_net, 4, 1, f) != 1) free(out->dims); fclose(f); return -4; out->dims[i] = ntohl(dim_net);