Compare commits

..

3 commits

Author SHA1 Message Date
Stephan I. Böttcher
6ce3cc04db hist.c: swap axis in matrix output
minor and major axis `aa`, `am` encapsulate the axcis order.
2023-10-31 19:47:47 +01:00
Stephan I. Böttcher
422289f6b2 hist.c implement hist in C
TODO: swap x and y, since gnuplot binary docu seems to be wrong :-)

> The `binary matrix` format contains a two dimensional array of 32 bit IEEE
> float values plus an additional column and row of coordinate values.  In the
> `using` specifier of a plot command, column 1 refers to the matrix row
> coordinate, column 2 refers to the matrix column coordinate, and column 3
> refers to the value stored in the array at those coordinates.

Does the row move fast or slow?
2023-10-31 15:57:27 +01:00
Stephan I. Böttcher
b8b8bbee8a hist.py: enable option -M 2023-10-31 15:56:02 +01:00
4 changed files with 735 additions and 1 deletions

2
.gitignore vendored Normal file
View file

@ -0,0 +1,2 @@
*~
hist

6
Makefile Normal file
View file

@ -0,0 +1,6 @@
#! /usr/bin/make -f
CFLAGS = -g
LOADLIBES = -lm
hist: hist.c

726
hist.c Normal file
View file

@ -0,0 +1,726 @@
// hist.c
#include <getopt.h>
#include <stdio.h>
#include <stdbool.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
#include <math.h>
#include <stdint.h>
#include <assert.h>
static const char optstring[] = "hv::q::iIm::M::2s:S:c:C:pk::l::L::Gw:B3t:n:N:";
static const struct option longopts[] =
{
{.name = "help", .has_arg=no_argument, .val='h'},
{.name = "verbose", .has_arg=optional_argument, .val='v'},
{.name = "quiet", .has_arg=optional_argument, .val='q'},
{.name = "integral", .has_arg=no_argument, .val='i'},
{.name = "Integral", .has_arg=no_argument, .val='I'},
{.name = "moments", .has_arg=optional_argument, .val='m'},
{.name = "Moments", .has_arg=optional_argument, .val='M'},
{.name = "twodiminensional", .has_arg=no_argument, .val='2'},
{.name = "logscale", .has_arg=optional_argument, .val='l'},
{.name = "Logscale", .has_arg=optional_argument, .val='L'},
{.name = "log-out", .has_arg=no_argument, .val='G'},
{.name = "scale", .has_arg=required_argument, .val='s'},
{.name = "Scale", .has_arg=required_argument, .val='S'},
{.name = "python-index", .has_arg=no_argument, .val='p'},
{.name = "index-offset", .has_arg=optional_argument, .val='k'},
{.name = "column", .has_arg=required_argument, .val='c'},
{.name = "Column", .has_arg=required_argument, .val='C'},
{.name = "weight-column", .has_arg=required_argument, .val='w'},
{.name = "binary", .has_arg=no_argument, .val='B'},
{.name = "xyz", .has_arg=no_argument, .val='3'},
{.name = "field-separator", .has_arg=required_argument, .val='t'},
{.name = "size", .has_arg=required_argument, .val='n'},
{.name = "Size", .has_arg=required_argument, .val='N'},
{}
};
typedef int64_t v_index_t;
struct axis_para {
const char *name; // name of this axis
double scale; // bin width
double min_val; // smallest .val
double max_val; // largest .val
double logbase; // base of the logarithm
double val; // linear val
double mval; // moments val
v_index_t ix; // bin index
v_index_t min; // smallest .ix
v_index_t max; // largest .ix
v_index_t origin; // first index of the allocation
int column; // column number 0…
int moments; // multiply the weight by this power of .val
v_index_t size; // allocated size
v_index_t fsize; // fixed size
v_index_t usize; // unused size
v_index_t nsize; // unused size
v_index_t delta; // origin shift
v_index_t stride; // matrix stride
bool integral; // do integral histogram
bool logscale; // take the log of .val
bool need_shift; // need to move .origin
};
static struct axis_para ax = {
.name = "X",
.scale = 1.0,
.logbase = M_LN10,
.mval = 1.0,
.stride = 1,
};
static struct axis_para ay = {
.name = "Y",
.column = 1,
.size = 1,
.nsize = 1,
.scale = 1.0,
.logbase = M_LN10,
.mval = 1.0,
.stride = 1,
};
static struct axis_para * const aa = &ax; // aa->stride = 1
static struct axis_para * const am = &ay; // am->stride = aa->size
static inline v_index_t mmidx(struct axis_para *aa1, v_index_t ii1,
struct axis_para *aa2, v_index_t ii2)
{
return ii1 * aa1->stride + ii2 * aa2->stride;
}
static inline v_index_t midx(v_index_t iix, v_index_t iiy)
{
return mmidx(&ax, iix, &ay, iiy);
}
static bool twodim = false;
static bool log_out = false;
static int weights = -1;
static int column0 = 1;
static int verbosity = 1;
static char separator = '\0';
static v_index_t max_size = 0x1000000;
static bool binary_out = false;
static bool xyz_out = false;
static const char **files = NULL;
static void parse_size(const char *arg, struct axis_para *a)
{
char *tail;
a->min_val = strtod(arg, &tail);
if (!tail || *tail != ',') {
tail = NULL;
}
else {
a->max_val = strtod(tail+1, &tail);
}
if (!tail || *tail) {
fprintf(stderr, "invalid size argument «min», «max»: %s\n", arg);
exit(1);
}
}
static double parse_scale(const char *arg)
{
// allow quotient as --scale arguments
// like 1/20 or even /20
const char *oarg = arg;
char *tail;
double d = strtod(arg, &tail);
double n = 1.0;
if (*tail=='/') {
if (tail==arg) d = 1.0;
arg = tail + 1;
n = strtod(arg, &tail);
}
if (*tail || tail==arg) {
fprintf(stderr, "Error: floating point number or quotient expected: %s\n", oarg);
exit(1);
}
return d/n;
}
static int parse_column(const char *arg)
{
char *tail;
int c = strtol(arg, &tail, 0);
if (*tail || tail==arg) {
fprintf(stderr, "Error: column number expected: %s\n", arg);
exit(1);
}
c -= column0;
if (c<0) {
fprintf(stderr, "Error: invalid column number: %d\n", c);
exit(1);
}
return c;
}
static double parse_logbase(const char *arg)
{
if (!arg) return M_LN10;
if (*arg == 'e') return 1.0;
char *tail;
double b = strtod(arg, &tail);
if (tail && *tail) {
fprintf(stderr, "invalid log base: %s\n", arg);
exit(1);
}
return log(b);
}
static char **parse_args(int argc, char **argv)
{
int opt;
while ((opt=getopt_long(argc, argv, optstring, longopts, NULL)) != -1) {
char *tail = "";
struct axis_para *a = isupper(opt) ? &ay : &ax;
switch (opt) {
case 'h':
default:
fprintf(stderr, "usage: %s «options» «files»\noptions:\n", argv[0]);
fprintf(stderr,
" -h --help print this help\n"
" -v --verbose[=«v»] increase or set verbosity\n"
" -q --quiet[=«v»] set verbosity to zero\n"
" -2 --twodimensional create a 2D histogram\n"
" -i --integral create an integral histogram (1st dimension)\n"
" -I --Integral create an integral histogram 2nd dimension\n"
" -m --moments[=«m»] create a moments histogram (1st dim)\n"
" -M --Moments[=«m»] moments histogram for 2nd dimension\n"
" -l --logscale use logarithmic bins (1st dimension)\n"
" -L --Logscale 2D histogram with log bins in 2nd dim\n"
" -G --log-out output log axis values\n"
" -s --scale=«w» bin widths (1st dimension)\n"
" -S --Scale=«w» 2D histogram, bin widths 2nd dimension\n"
" -n --size=«min»,«max» limits of the input range (1st dimension)\n"
" -N --Size=«min»,«max» 2D histogram, limit … 2nd dimension\n"
" -c --column=«col» column number (1st dimension)\n"
" -C --Column=«col» 2D histogram, column number 2nd dimension\n"
" -p --python-index count columns from 0\n"
" -k --index-offset[=«o»] column number from 1 (default) or «o»\n"
" -w --weight-column=«co» column number for weights\n"
" -B --binary 2D output in gnuplot binary matrix format\n"
" -3 --xyz 2D output in text x y z format\n"
" -t --field-separator=«c» field separator\n"
);
exit(opt != 'h');
case 'q': verbosity = -1; // fall through //
case 'v': verbosity ++;
if (optarg)
verbosity = strtol(optarg, &tail, 10);
break;
case '2': twodim = true; break;
case 'I': twodim = true;
case 'i': a->integral = true; break;
case 'M': twodim = true;
case 'm': a->moments = 1;
if (optarg) a->moments = strtol(optarg, &tail, 10);
break;
case 'L': twodim = true;
case 'l': a->logscale = true;
a->logbase = parse_logbase(optarg);
break;
case 'G': log_out = true; break;
case 'S': twodim = true;
case 's': a->scale=parse_scale(optarg); break;
case 'N': twodim = true;
case 'n': parse_size(optarg, a); break;
case 'C': twodim=true;
case 'c': a-> column = parse_column(optarg); break;
case 'P': column0 = 0; break;
case 'K': column0 = 0;
if (optarg)
column0 = parse_column(optarg);
else
column0 = 1;
break;
case 'w': weights = parse_column(optarg); break;
case 'B': binary_out = true; break;
case '3': xyz_out = true; break;
case 't': separator = optarg ? *optarg : '\0'; break;
}
if (tail && *tail) {
fprintf(stderr, "invalid option value -%c%s\n", opt, optarg);
exit(1);
}
}
return argv + optind;
}
static const char *current_file;
static void process_line(char *line);
static void emit_results();
static void initialize();
int main(int argc, char **argv)
{
char **files = parse_args(argc, argv);
if (verbosity > 2) {
// TODO: fix
fprintf(stderr, "Options given:\n");
fprintf(stderr, " --verbosity=%d\n", verbosity);
if (ax.moments)
fprintf(stderr, " --moments\n");
else if (ax.integral)
fprintf(stderr, " --integral\n");
if (twodim)
fprintf(stderr, " --twodim\n");
fprintf(stderr, " --scale=%g\n", ax.scale);
if (twodim)
fprintf(stderr, " --Scale=%g\n", ay.scale);
if (ax.logscale)
fprintf(stderr, " --logscale=%.2g\n", exp(ax.logbase));
if (ay.logscale)
fprintf(stderr, " --Logscale=%.2g\n", exp(ay.logbase));
fprintf(stderr, " --column=%d\n", ax.column+column0);
if (twodim)
fprintf(stderr, " --Column=%d\n", ay.column+column0);
if (weights >= 0)
fprintf(stderr, " --weights=%d\n", weights+1);
if (*files) {
fprintf(stderr, "Input files given:");
for (char **f=files; *f; f++)
fprintf(stderr, " %s", *f);
fprintf(stderr, "\n");
}
}
initialize();
FILE *inp = NULL;
size_t line_size = 256;
char *line = malloc(line_size);
if (!line) {
perror("line[256]");
exit(3);
}
if (!*files) inp = stdin;
bool inp_opened = false;
while (inp || *files) {
if (!inp) {
current_file = *files++;
inp = fopen(current_file, "r");
if (!inp) {
perror(current_file);
exit(2);
}
inp_opened = true;
if (verbosity > 1)
fprintf(stderr, "opened file %s\n", current_file);
}
if (getline(&line, &line_size, inp) <= 0) {
if (inp_opened) {
if (verbosity > 2 && current_file)
fprintf(stderr, "closed file %s\n", current_file);
fclose(inp);
}
inp = NULL;
continue;
}
process_line(line);
}
emit_results();
}
static int split(char *line, char sep, const char **fields, int n)
{
int i=0;
char *p = line;
while (i<n && *p) {
while (*p && isspace(*p)) p++;
fields[i++] = p;
if (sep)
while (*p && *p!=sep) p++;
else
while (*p && !isspace(*p)) p++;
if (*p)
*p++ = '\0';
}
return i;
}
static const char **fields;
static int n_fields;
static uint64_t *umatrix;
static double *wmatrix;
static void *matrix;
static bool wmatrix_p;
static const size_t element_size =
sizeof(double) > sizeof(uint64_t)
? sizeof(double)
: sizeof(uint64_t)
;
static inline void init_axis(struct axis_para *a)
{
if (a->min_val >= a->max_val) return;
// add one extra row/column for checkout
a->fsize = (int)floor((a->max_val - a->min_val)/a->scale + 2);
a->min = (long)(floor(a->min_val/a->scale)+0.5);
a->max = (long)(floor(a->max_val/a->scale)+0.5);
a->origin = a->min-1;
a->size = a->fsize;
}
static void initialize()
{
int n = ax.column;
if (twodim && ay.column > n) n = ay.column;
if (weights > n) n = weights;
n_fields = n+1;
fields = malloc(n_fields * sizeof(*fields));
if (!fields) {
perror("fields");
exit(3);
}
ax.size = 4096;
if (!twodim)
ay.size = 1;
else
ax.size = ay.size = 1024;
init_axis(&ax);
init_axis(&ay);
am->stride = aa->size;
wmatrix_p = weights >= 0 || ax.moments || ay.moments;
matrix = malloc(ax.size * ay.size * element_size);
if (!matrix) {
perror("matrix");
exit(3);
}
bzero(matrix, ax.size * ay.size * element_size);
if (wmatrix_p)
wmatrix = matrix;
else
umatrix = matrix;
}
static inline bool parse_number(const char *s, double *d)
{
char *tail;
*d = strtod(s, &tail);
bool r = tail==s || *tail && !isspace(*tail);
if (r) {
if (verbosity >= 3)
fprintf(stderr, "Cannot parse column ”%s“\n", s);
return false;
}
return true;
}
static inline bool parse_value(const char **s, struct axis_para *a, int n)
{
if (!parse_number(s[a->column], &a->val))
return false;
if (a->fsize && (a->val < a->min_val || a->val > a->max_val))
return false;
if (a->moments)
a->mval = pow(a->val, a->moments);
if (a->logscale) {
if (a->val <= 0)
return false;
a->val = log(a->val) / a->logbase;
}
a->ix = (v_index_t)(floor(a->val / a->scale) + 0.5);
if (!a->fsize)
if (!n) {
a->min = a->max = a->ix;
a->origin = a->ix - a->size/2;
if (verbosity >= 2)
fprintf(stderr,
"center first point in %s, origin/min/max/size %lld %lld %lld %d\n",
a->name, a->origin, a->min, a->max, a->size);
}
else {
if (a->ix > a->max) a->max = a->ix;
if (a->ix < a->min) a->min = a->ix;
}
v_index_t ix = a->ix - a->origin;
v_index_t msize = a->max - a->min + 1;
a->usize = a->size - msize;
if (a->fsize && (a->usize < 0 || ix < 0 || ix >= a->size)) {
if (verbosity >= 1)
fprintf(stderr,
"THIS CANNOT HAPPEN: drop value for axis %s: ix=%lld size=%d\n",
a->name, ix, a->size);
return false;
}
a->nsize = a->size;
while (a->usize < 0) {
a->nsize *= 2;
a->usize = a->nsize - msize;
}
if (a->nsize > a->size && verbosity >= 2) {
fprintf(stderr,
"resize axix %s, origin/min/max %lld %lld %lld size %d → %d\n",
a->name, a->origin, a->min, a->max, a->size, a->nsize);
}
a->need_shift = ix < 0 || ix >= a->nsize;
a->delta = a->usize/2 - a->min + a->origin;
if (a->need_shift && verbosity >= 2)
fprintf(stderr,
"center data in %s, origin/min/max/size %lld %lld %lld %d\n",
a->name, a->origin, a->min, a->max, a->nsize);
if (verbosity >= 6) {
fprintf(stderr,
"point %s %s"
" origin/min/ix/max %lld %lld %lld %lld"
" delta/size/nsize/usize %s%lld %lld %lld %lld\n",
a->name, s[a->column],
a->origin, a->min, ix, a->max,
a->need_shift ? "!" : "", a->delta, a->size, a->nsize, a->usize);
}
return true;
}
static void reallocate_matrix()
{
v_index_t nsize = ax.nsize * ay.nsize;
v_index_t size = ax.size * ay.size;
assert(nsize > size);
if (nsize > max_size) {
fprintf(stderr, "matrix too large %d > %d", nsize, max_size);
exit(3);
}
matrix = realloc(matrix, nsize*element_size);
if (!matrix) {
perror("realloc matrix");
exit(3);
}
if (wmatrix_p)
wmatrix = matrix;
else
umatrix = matrix;
bzero(matrix + size * element_size, (nsize - size)*element_size);
if (aa->nsize > aa->size) {
v_index_t asize = aa->size * element_size;
v_index_t nasize = aa->nsize * element_size;
for (v_index_t i = am->size-1; i>0; i--) {
void *dst = matrix + i * nasize;
void *src = matrix + i * asize;
memmove(dst, src, asize);
if (dst-src > asize)
bzero(src, asize);
else
bzero(src, dst-src);
}
}
ax.size = ax.nsize;
ay.size = ay.nsize;
am->stride = aa->nsize;
if (verbosity >= 2)
fprintf(stderr, "reallocated matrix %lld×%lld\n", ax.nsize, ay.nsize);
}
static void move_matrix()
{
v_index_t delta = midx(ax.delta, ay.delta);
ax.origin -= ax.delta;
ay.origin -= ay.delta;
assert(ax.origin <= ax.min);
assert(ay.origin <= ay.min);
assert(ax.max < ax.origin + ax.size);
assert(ay.max < ay.origin + ay.size);
v_index_t size = ax.size*ay.size*element_size;
v_index_t dsize = delta*element_size;
if (delta > 0) {
memmove(matrix + dsize, matrix, size - dsize);
bzero(matrix, dsize);
}
if (delta < 0) {
dsize = -dsize;
memmove(matrix, matrix + dsize, size - dsize);
bzero(matrix + size - dsize, dsize);
}
if (verbosity >= 2)
fprintf(stderr,
"moved matrix size %lld×%lld by %lld×%lld → %lld\n",
ax.size, ay.size, ax.delta, ay.delta, delta);
}
static void process_line(char *line)
{
static long n;
int i = split(line, separator, fields, n_fields);
if (i < n_fields) return;
if (*fields[0]=='#') return;
if (!parse_value(fields, &ax, n)) return;
if (twodim && !parse_value(fields, &ay, n)) return;
double w=1.0;
if (weights >= 0 && !parse_number(fields[weights], &w)) return;
if (ax.moments) w *= ax.mval;
if (ay.moments) w *= ay.mval;
if (ax.nsize > ax.size || ay.nsize > ay.size)
reallocate_matrix();
if (ax.need_shift || ay.need_shift)
move_matrix();
v_index_t ii = midx(ax.ix - ax.origin, ay.ix - ay.origin);
if (wmatrix_p)
wmatrix[ii] += w;
else
umatrix[ii] ++;
n++;
}
static inline double value(struct axis_para *a, v_index_t ix)
{
double x = ix*a->scale;
if (a->logscale && !log_out)
x = exp(x*a->logbase);
return x;
}
#define for_axis_ios(_a, _i, _o, _s) for (v_index_t _i = (_a).min + _o + _s; _i <= (_a).max + _o; _i++)
#define for_axis_i(_a, _i) for_axis_ios(_a, _i, 0, 0)
#define for_axis_ii(_a, _i) for_axis_ios(_a, _i, (_a).origin, 0)
#define for_axis_iis(_a, _i, _s) for_axis_ios(_a, _i, (_a).origin, _s)
#define for_axis_iii(_a, _i, _ii) for (v_index_t _i = (_a).min, _ii = _i - (_a).origin; _i <= (_a).max; _i++, _ii++)
static void emit_text()
{
printf("%lld ", aa->max - aa->min + 1);
for_axis_i(*aa, ia)
printf(" %g", value(aa, ia));
printf("\n");
for_axis_iii(*am, im, iim) {
printf("%g ", value(am, im));
for_axis_iii(*aa, ia, iia) {
v_index_t ii = mmidx(aa, iia, am, iim);
if (wmatrix_p)
printf(" %g", wmatrix[ii]);
else
printf(" %llu", umatrix[ii]);
}
printf("\n");
}
}
static void emit_binary()
{
if (ay.size == ay.max - ay.min + 1)
ay.nsize = ay.size+1;
if (ax.size == ax.max - ax.min + 1)
ax.nsize = ax.size+1;
if (ax.nsize > ax.size || ay.nsize > ay.size)
reallocate_matrix();
if (ax.min == ax.origin) {
ax.delta = 1;
ax.need_shift = true;
}
if (ay.min == ay.origin) {
ay.delta = 1;
ay.need_shift = true;
}
if (ax.need_shift || ay.need_shift)
move_matrix();
float *fmatrix = matrix;
assert(aa->stride == 1);
assert(am->stride == aa->size);
v_index_t na = aa->max - aa->min + 1;
v_index_t nm = am->max - am->min + 1;
fmatrix[0] = na;
for_axis_i(*aa, ia) {
fmatrix[ia - aa->min + 1] = value(aa, ia);
}
for_axis_iii(*am, im, iim) {
v_index_t i = (im - am->min + 1)*(na + 1);
fmatrix[i] = value(am, im);
for_axis_iii(*aa, ia, iia) {
float v;
v_index_t ii = mmidx(aa, iia, am, iim);
if (wmatrix_p)
v = wmatrix[ii];
else
v = umatrix[ii];
fmatrix[i + ia - aa->min + 1] = v;
}
}
fwrite(matrix, sizeof(float), (na+1)*(nm+1), stdout);
}
static void emit_xyz()
{
for_axis_iii(ay, iy, iiy) {
for_axis_iii(ax, ix, iix) {
v_index_t ii = midx(iix, iiy);
double x = value(&ax, ix);
double y = value(&ay, iy);
if (wmatrix_p)
printf("%g %g %g\n", x, y, wmatrix[ii]);
else
printf("%g %g %llu\n", x, y, umatrix[ii]);
}
printf("\n");
}
}
static void emit_1d()
{
for_axis_iii(ax, ix, iix) {
double x = value(&ax, ix);
v_index_t ii = midx(iix, 0);
if (wmatrix_p)
printf("%g %g\n", x, wmatrix[ii]);
else
printf("%g %llu\n", x, umatrix[ii]);
}
}
static void integrate(struct axis_para *a, struct axis_para *o)
{
if (!a->integral)
return;
for_axis_ii((*a), iia) {
for_axis_iis((*o), iio, 1) {
v_index_t i = mmidx(a, iia, o, iio);
if (wmatrix_p)
wmatrix[i] += wmatrix[i - a->stride];
else
umatrix[i] += wmatrix[i - a->stride];
}
}
}
static void emit_results()
{
if (verbosity >= 2)
fprintf(stderr, "final matrix size %lld×%lld"
" %lld:%lld…%lld"
" × %lld:%lld…%lld\n",
ax.size, ay.size,
ax.origin, ax.min, ax.max,
ay.origin, ay.min, ay.max );
integrate(&ax, &ay);
integrate(&ay, &ax);
if (!twodim) emit_1d();
else if (binary_out) emit_binary();
else if (xyz_out) emit_xyz();
else emit_text();
}

View file

@ -3,7 +3,7 @@
import sys import sys
import math import math
from getopt import getopt from getopt import getopt
opt,files = getopt(sys.argv[1:], "I2Qs:S:c:C:k:K:lLw:X") opt,files = getopt(sys.argv[1:], "IM2Qs:S:c:C:k:K:lLw:X")
scale=1.0 scale=1.0
Scale=1.0 Scale=1.0
col=0 col=0