/* -*- mode: c; c-basic-offset: 4; indent-tabs-mode: nil; tab-width: 8 -*- */

/*-
 * Copyright (c) 2015, Howard Hughes Medical Institute
 *
 * Permission to use, copy, modify, and/or distribute this software
 * for any purpose with or without fee is hereby granted, provided
 * that the above copyright notice and this permission notice appear
 * in all copies.
 *
 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
 * WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
 * WARRANTIES OF MERCHANTABILITY AND FITNESS.  IN NO EVENT SHALL THE
 * AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR
 * CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS
 * OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT,
 * NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
 * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
 *
 * http://rsb.info.nih.gov/ij/plugins/DM3Format.gj.html
 *
 * https://github.com/openmicroscopy/bioformats/blob/v5.0.2/components/formats-gpl/src/loci/formats/in/GatanReader.java
 */

#ifdef HAVE_CONFIG_H
#    include "config.h"
#endif

#include <sys/stat.h>

#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>

#include <err.h>
#include <errno.h>
#ifdef HAVE_GETOPT_H
#    include <getopt.h>
#endif
#include <libgen.h>
#include <limits.h>
#include <math.h>
#include <string.h>
#include <unistd.h>

#include "frame.h"
#include "util.h"

/* XXX Should probably use an AutoConf macro.
 *
 * XXX This is duplication w.r.t. tvips2smv.c.
 */
#if defined(__linux__)
#    include <byteswap.h>
#elif defined(__APPLE__)
#    include <libkern/OSByteOrder.h>
#    define bswap_16(x) OSSwapInt16(x)
#    define bswap_32(x) OSSwapInt32(x)
#elif defined(__GNUC__)
#    define bswap_16(x) __builtin_bswap16(x)
#    define bswap_32(x) __builtin_bswap32(x)
#endif


enum tag_type
{
    TYPE_SHORT = 2,
    TYPE_LONG =3,
    TYPE_USHORT = 4,
    TYPE_ULONG = 5,
    TYPE_FLOAT = 6,
    TYPE_DOUBLE = 7,
    TYPE_BOOLEAN = 8,
    TYPE_CHAR = 9,
    TYPE_OCTET = 10,
    TYPE_UNK1 = 11,
    TYPE_UNK2 = 12,
    TYPE_STRUCT = 15,
    TYPE_STRING = 18,
    TYPE_ARRAY = 20
};


/**
 * The callback function must return 0 on success.  The frame
 * structure is freed using frame_free() when the callback returns.
 *
 * XXX This is duplication w.r.t. tvipsread.c.
 *
 * @param frame   Frame structure
 * @param counter Image counter, and one-based index that continues
 *                through all the image set files XXX Should have been
 *                zero-based
 * @param data    User data
 * @return        0 if if successful, non-zero otherwise
 */
typedef int (*dm_frame_callback)(
    struct frame *frame, size_t counter, void *data);


/* XXX Maybe more aptly named context, or perhaps handle?  Could
 * and/or should stuff the file handle here as well.
 */
struct state
{
    uint32_t endianness;
    uint32_t version;
    size_t level;

    int image_data_count;

    size_t ndim;
    size_t dim[3];

    /* Number of pixels in the data raster.
     */
    size_t nmemb;
    uint16_t *raster;

    int context; // XXX Should probably have been an enum, and should
                 // probably have been called just "state"

    /* Pointer to user-supplied callback.
     */
    dm_frame_callback callback;

    /* Pointer to user-supplied extra data passed to the user-supplied
     * callback.
     */
    void *userData;
};


/* XXX Need a function to free this structure.
 */
struct dm_entry
{
    char *label;

    /* Should perhaps have been the type (as an int): 20 for group, 21
     * for data.
     */
    int is_data;

    union
    {
        struct dm_group *group;
        struct tag_aos *data;
    } content;
};


/* XXX Need a function to free this structure.
 */
struct dm_group
{
    /* The number of entries in this group.
     */
    size_t nmemb;

    /* The number of entries the group can hold with reallocating the
     * entries array.
     */
    size_t capacity;

    struct dm_entry **entries;
};


/* the tag_aos structure records an array of structures--the most
 * general data representation in the Digital Micrograph format.
 * Structures along the rows.  Arranged by type to avoid alignment
 * issues.
 */
struct tag_aos
{
    enum tag_type *types;

    /* Alternatively, and for better typing, could have a pointer to a
     * list of unions of typed pointers.
     */
    void **values;

    /* Number of types in the structure, or size (length) of the
     * structure.  This can be thought of as the number of columns.
     */
    size_t size;

    /* Number of structures in the array.  This can be thought of as
     * the number of rows.
     */
    size_t nmemb;
};


/***************
 * PRINT STUFF *
 ***************/


static void
print_group(const struct dm_group *group, int indentation);


/* Prints the dimension of the AOS, and outputs the data "row major".
 * Does not print trailing newline.  Only prints the ten first
 * elements.
 */
static void
print_data(const struct tag_aos *data)
{
    size_t i, j, k;


    printf("%zdx%zd ", data->nmemb, data->size);
    for (i = 0; i < data->nmemb; i++) {
        printf("[");
        for (j = 0; j < data->size; j++) {
            switch (data->types[j]) {
            case TYPE_SHORT:
                printf("%d", ((int16_t *)data->values[j])[i]);
                break;

            case TYPE_LONG:
                printf("%d", ((int32_t *)data->values[j])[i]);
                break;

            case TYPE_USHORT:
                printf("%d", ((uint16_t *)data->values[j])[i]);
                break;

            case TYPE_ULONG:
                printf("%d", ((uint32_t *)data->values[j])[i]);
                break;

            case TYPE_FLOAT:
                printf("%f", ((float *)data->values[j])[i]);
                break;

            case TYPE_DOUBLE:
                printf("%f", ((double *)data->values[j])[i]);
                break;

            case TYPE_BOOLEAN:
                printf("%d", ((int8_t *)data->values[j])[i]);
                break;

            case TYPE_CHAR:
                printf("%c", ((char *)data->values[j])[i]);
                break;

            case TYPE_OCTET:
                printf("%d", ((uint8_t *)data->values[j])[i]);
                break;

            case TYPE_UNK1:
                printf("%d", ((int32_t *)data->values[j])[i]);
                break;

            case TYPE_UNK2:
                printf("%lld", (long long int)((int64_t *)data->values[j])[i]);
                break;

            case TYPE_STRUCT:
                printf("Big trouble in little China #1\n");
                break;

            case TYPE_STRING:
                printf("Big trouble in little China #2\n");
                break;

            case TYPE_ARRAY:
                printf("Big trouble in little China #3\n");
                break;
            }

            if (j + 1 < data->size)
                printf(", ");

            /* Early exit if enough elements have been printed and
             * size of the AOS exceeds the number of elements visited.
             * Close row if appropriate.
             */
            k = data->size * i + j;
            if (k >= 10 && data->nmemb * data->size > k) {
                if (j + 1 < data->size)
                    printf("...");
                else
                    printf("], ...");
                return;
            }
        }

        if (i + 1 < data->nmemb)
            printf("], ");
        else
            printf("]");
    }
}


static void
print_entry(const struct dm_entry *entry, int indentation)
{
    size_t i;

    for (i = 0; i < indentation; i++)
        printf(" ");

    printf("'%s' ", entry->label);
    if (entry->is_data == 0) {
        printf("\n");
        print_group(entry->content.group, indentation + 2);
    } else {
        print_data(entry->content.data);
        printf("\n");
    }
}


static void
print_group(const struct dm_group *group, int indentation)
{
    size_t i;
    for (i = 0; i < group->nmemb; i++)
        print_entry(group->entries[i], indentation + 0);
}


static void
free_aos(struct tag_aos *aos)
{
    size_t j;

    if (aos == NULL)
        return;

    if (aos->types != NULL)
        free(aos->types);

    if (aos->values != NULL) {
        for (j = 0; j < aos->size; j++) {
            if (aos->values[j] != NULL)
                free(aos->values[j]);
        }
        free(aos->values);
    }

    free(aos);
}


static size_t
len_of_type(enum tag_type type)
{
    switch (type) {
    case TYPE_SHORT:
        return (sizeof(int16_t));

    case TYPE_LONG:
        return sizeof(int32_t);

    case TYPE_USHORT:
        return (sizeof(uint16_t));

    case TYPE_ULONG:
        return sizeof(uint32_t);

    case TYPE_FLOAT:
        return (sizeof(float));

    case TYPE_DOUBLE:
        return (sizeof(double));

    case TYPE_BOOLEAN:
        return (sizeof(int8_t));

    case TYPE_CHAR:
        return (sizeof(char));

    case TYPE_OCTET:
        return (sizeof(uint8_t));

    case TYPE_UNK1:
        return (sizeof(uint32_t));

    case TYPE_UNK2:
        return (sizeof(uint64_t));

    case TYPE_STRUCT:
        printf("Big trouble in little China #1\n");
        return (-1);

    case TYPE_STRING:
        printf("Big trouble in little China #2\n");
        return (-1);

    case TYPE_ARRAY:
        printf("Big trouble in little China #3\n");
        return (-1);

    default:
        break;
    }

    printf("Big trouble in little China #4\n");
    return (-1);
}


/* This should be the only way to create a values structure!  XXX
 * Should handle errno better!  size == 0 and/or nmemb == 0 is valid!
 */
static struct tag_aos*
read_value(const enum tag_type *types, size_t size, size_t nmemb, FILE *stream)
{
    struct tag_aos *aos;

    size_t i, j;

    aos = calloc(1, sizeof(struct tag_aos));
    if (aos == NULL)
        return (NULL);

    aos->size = 0;
    aos->nmemb = 0;
    aos->types = NULL;
    aos->values = NULL;
    if (size <= 0 || nmemb <= 0)
        return (aos);

    aos->types = calloc(size, sizeof(enum tag_type));
    if (aos->types == NULL) {
        free_aos(aos);
        return (NULL);
    }

    aos->values = calloc(size, sizeof(void *));
    if (aos->values == NULL) {
        free_aos(aos);
        return (NULL);
    }


    /* Initialize all value pointers to NULL so that free_aos() can
     * be used to clean up any residual on error.  Then allocate space
     * for all the data.
     */
    for (j = 0; j < size; j++)
        aos->values[j] = NULL;
    for (j = 0; j < size; j++) {
        aos->values[j] = calloc(nmemb, len_of_type(types[j]));
        if (aos->values[j] == NULL) {
            free_aos(aos);
            return (NULL);
        }
    }

    for (i = 0; i < nmemb; i++) {
        for (j = 0; j < size; j++) {
            if (fread(aos->values[j] + len_of_type(types[j]) * i, len_of_type(types[j]), 1, stream) != 1) {
                free_aos(aos);
                return (NULL);
            }
            /* XXX Probably need to deal with endianness here!
             */
        }
    }


    /* Copy the two scalars and the array of types.
     */
    aos->size = size;
    aos->nmemb = nmemb;
    memcpy(aos->types, types, size * sizeof(enum tag_type));

    return (aos);
}


/* XXX Should include "type" in its name, methinks.
 */
static int
int2enum(uint32_t type_int, enum tag_type *type_enum)
{
    switch (type_int) {
    case TYPE_SHORT:
        *type_enum = TYPE_SHORT;
        return (0);

    case TYPE_LONG:
        *type_enum = TYPE_LONG;
        return (0);

    case TYPE_USHORT:
        *type_enum = TYPE_USHORT;
        return (0);

    case TYPE_ULONG:
        *type_enum = TYPE_ULONG;
        return (0);

    case TYPE_FLOAT:
        *type_enum = TYPE_FLOAT;
        return (0);

    case TYPE_DOUBLE:
        *type_enum = TYPE_DOUBLE;
        return (0);

    case TYPE_BOOLEAN:
        *type_enum = TYPE_BOOLEAN;
        return (0);

    case TYPE_CHAR:
        *type_enum = TYPE_CHAR;
        return (0);

    case TYPE_OCTET:
        *type_enum = TYPE_OCTET;
        return (0);

    case TYPE_UNK1:
        *type_enum = TYPE_UNK1;
        return (0);

    case TYPE_UNK2:
        *type_enum = TYPE_UNK2;
        return (0);

    case TYPE_STRUCT:
        *type_enum = TYPE_STRUCT;
        return (0);

    case TYPE_STRING:
        *type_enum = TYPE_STRING;
        return (0);

    case TYPE_ARRAY:
        *type_enum = TYPE_ARRAY;
        return (0);
    }

    return (-1);
}


/* Expat tXML_StartElementHandler takes another "const XML_Char
 * **atts" parameter.  Now mixing in ideas from neon as well.
 *
 * XXX Should have been StartEntryHandler
 */
typedef int dm_StartGroupHandler(struct state *userData, int parent, const char *name);
typedef int dm_EndGroupHandler(struct state *userData, int state, const char *name);

/* Expat instead takes arguments "const XML_Char *s, int len".
 */
//typedef void dm_DataHandler(struct state *state, char *data, size_t len);
typedef int dm_DataHandler(struct state *userData, int state, struct tag_aos *data);


struct tag_aos *
read_tag_type(FILE *stream,
              struct state *state,
              void *userData,
              dm_StartGroupHandler start,
              dm_DataHandler data,
              dm_EndGroupHandler end)
{
    char percent[4];
    struct tag_aos *aos;
    enum tag_type *tag_types;
    size_t j;
    enum tag_type tag_type;
    uint32_t len, type, numfields;


    /* Assert that the four percent signs are present.
     */
    if (fread(percent, sizeof(percent), 1, stream) != 1)
        return (NULL);
    if (strncmp(percent, "%%%%", 4) != 0)
        return (NULL);


    /* For version 4, skip an additional 4 octets.
     */
    if (fseek(stream, 0 + 4, SEEK_CUR) != 0)
        return (NULL);


    /* Determine length of definition of encoded type.
     */
    if (fread(&len, sizeof(len), 1, stream) != 1)
        return (NULL);
    len = bswap_32(len);


    /* For version 4, skip an additional 4 octets.
     */
    if (fseek(stream, 0 + 4, SEEK_CUR) != 0)
        return (NULL);


    /* XXX According to the NIH documentation, this block may be
     * better placed inside the switch statement below.
     */
    if (fread(&type, sizeof(type), 1, stream) != 1)
        return (NULL);
    if (int2enum(bswap_32(type), &tag_type) != 0)
        return (NULL);

    switch (len) {
    case 1:
        /* Simple type.
         */
        aos = read_value(&tag_type, 1, 1, stream);
        if (aos == NULL)
            return (NULL);
        data(state, state->context, aos);
        return (aos);

    case 2:
        /* String.  XXX This code path is not checked!
         */
        return (NULL);

    case 3:
        /* Array with elements of a single, simple type.
         */
        if (tag_type != TYPE_ARRAY)
            return (NULL);

        /* Read the type of the elements in the array as well as its
         * length.  For version 4, skip an additional 4 octets before
         * and after the type.
         */
        if (fseek(stream, 0 + 4, SEEK_CUR) != 0)
            return (NULL);
        if (fread(&type, sizeof(type), 1, stream) != 1)
            return (NULL);
        if (int2enum(bswap_32(type), &tag_type) != 0)
            return (NULL);
        if (fseek(stream, 0 + 4, SEEK_CUR) != 0)
            return (NULL);
        if (fread(&len, sizeof(len), 1, stream) != 1)
            return (NULL);

        aos = read_value(&tag_type, 1, bswap_32(len), stream);
        if (aos == NULL)
            return (NULL);
        data(state, state->context, aos);
        return (aos);

    default:
        switch (tag_type) {
        case TYPE_STRUCT:
            /* Structure of simple types.  This code allocates and
             * builds a tag_aos structure.  XXX And it will leak
             * memory in case of error, too!  It should perhaps be
             * rolled into its own function?
             */
            if (fseek(stream, 4 + 8, SEEK_CUR) != 0)
                return (NULL);

            if (fread(&numfields, sizeof(numfields), 1, stream) != 1)
                return (NULL);
            numfields = bswap_32(numfields);

            if (3 + 2 * numfields != len)
                return (NULL);

            if (fseek(stream, 4 + 4, SEEK_CUR) != 0)
                return (NULL);

            aos = malloc(sizeof(struct tag_aos));
            if (aos == NULL)
                return (NULL);
            aos->size = numfields;
            aos->nmemb = 1;

            aos->types = calloc(numfields, sizeof(enum tag_type));
            if (aos->types == NULL)
                return (NULL);

            aos->values = calloc(numfields, sizeof(void *));
            if (aos->values == NULL)
                return (NULL);

            if (fseek(stream, 4, SEEK_CUR) != 0)
                return (NULL);

            for (j = 0; j < numfields; j++) {
                if (fread(&type, sizeof(type), 1, stream) != 1)
                    return (NULL);
                if (int2enum(bswap_32(type), &aos->types[j]) != 0)
                    return (NULL);

                aos->values[j] = malloc(len_of_type(aos->types[j]));
                if (aos->values[j] == NULL)
                    return (NULL);

                if (fread(aos->values[j], len_of_type(aos->types[j]), 1, stream) != 1)
                    return (NULL);

                /* In version 4, entries are aligned on 16-octet
                 * boundaries.
                 */
                if (j + 1 < numfields) {
                    if (fseek(stream, 16 - sizeof(type) - len_of_type(aos->types[j]), SEEK_CUR) != 0)
                        return (NULL);
                }
            }

            /* Skip ahead until the next tag is found.  The Java
             * implementation empirically found skips of 4, 8, 12, 18,
             * 24, or 28 octets to work.  The last skip, 36, is to
             * reach EOF.
             */
            long int offsets[] = {0, 4, 8, 12, 18, 24, 28, 36};
            for (j = 0; j < sizeof(offsets) / sizeof(offsets[0]); j++) {
                int c = fgetc(stream);
                if (c == 20 || c == 21 || c == EOF) {
                    ungetc(c, stream);
                    data(state, state->context, aos);
                    return (aos);
                }

                if (j + 1 < sizeof(offsets) / sizeof(offsets[0]))
                    fseek(stream, offsets[j + 1] - offsets[j] - 1, SEEK_CUR);
            }
            return (NULL);

        case TYPE_ARRAY:
            /* Array of structures.
             */
            if (fseek(stream, 0 + 4, SEEK_CUR) != 0)
                return (NULL);
            if (fread(&type, sizeof(type), 1, stream) != 1)
                return (NULL);
            if (int2enum(bswap_32(type), &tag_type) != 0)
                return (NULL);
            if (tag_type != TYPE_STRUCT)
                return (NULL);

            if (fseek(stream, 4 + 8, SEEK_CUR) != 0)
                return (NULL);
            if (fread(&numfields, sizeof(numfields), 1, stream) != 1)
                return (NULL);
            numfields = bswap_32(numfields);

            tag_types = calloc(numfields, sizeof(enum tag_type));
            if (tag_types == NULL)
                return (NULL);

            if (fseek(stream, 12, SEEK_CUR) != 0)
                return (NULL);

            for (j = 0; j < numfields; j++) {
                if (fread(&type, sizeof(type), 1, stream) != 1)
                    return (NULL);
                if (int2enum(bswap_32(type), &tag_types[j]) != 0)
                    return (NULL);

                /* In version 4, entries are aligned on 16-octet
                 * boundaries.
                 */
                if (j + 1 < numfields) {
                    if (fseek(stream, 16 - sizeof(type), SEEK_CUR) != 0)
                        return (NULL);
                }
            }

            if (fseek(stream, 0 + 4, SEEK_CUR) != 0)
                return (NULL);
            if (fread(&len, sizeof(len), 1, stream) != 1)
                return (NULL);

            aos = read_value(tag_types, numfields, bswap_32(len), stream);
            if (aos == NULL)
                return (NULL);
            data(state, state->context, aos);
            return (aos);

        default:
            return (NULL);
        }
    }

    return (NULL);
}


/* Forward declaration due to hierarchy.
 */
static struct dm_group *
read_tag_group(FILE *stream, struct state *state, void *userData, dm_StartGroupHandler start, dm_DataHandler data, dm_EndGroupHandler end);


static struct dm_entry *
read_tag_entry(FILE *stream,
               struct state *state,
               void *userData,
               dm_StartGroupHandler start,
               dm_DataHandler data,
               dm_EndGroupHandler end)
{
    uint8_t type_length[1 + 2];
    uint16_t label_length;
    struct dm_entry *entry;


    /* XXX This memory is leaked on failure!
     */
    entry = malloc(sizeof(struct dm_entry));
    if (entry == NULL)
        return (NULL);


    /* Determine the type of the TagEntry instance, either data or
     * group, as well as the length of the entry's label.
     */
    if (fread(type_length, sizeof(type_length), 1, stream) != 1)
        return (NULL);
    label_length = bswap_16(*(uint16_t *)(type_length + 1));


    /* Allocate space for the null-terminated label string, fread(3)
     * it, and null-terminate it.  XXX This memory is leaked!
     */
    entry->label = calloc(label_length + 1, sizeof(char));
    if (entry->label == NULL)
        return (NULL);
    if (fread(entry->label, sizeof(char), label_length, stream) != label_length)
        return (NULL);
    entry->label[label_length] = '\0';


    /* For version 4, skip an additional 8 octets.
     */
    if (fseek(stream, 0 + 8, SEEK_CUR) != 0)
        return (NULL);

    /* XXX Needs thoughts and comments
     */
    int context_old = state->context;
    state->context = start(state, state->context, entry->label);
    if (state->context < 0) {
        state->context = context_old;
        return (NULL); // XXX But this is not an error!
    }

    switch (type_length[0]) {
    case 20:
        /* Tag group
         */
        //printf("Reading group '%s'\n", entry->label);

#ifdef DEBUG
        printf("\n");
#endif

        entry->is_data = 0;
        entry->content.group = read_tag_group(
            stream, state, userData, start, data, end);
        if (entry->content.group == NULL)
            return (NULL);
        end(state, state->context, entry->label);
        state->context = context_old; // XXX Pop the state, correct?
        return (entry);


    case 21:
        /* Data
         */
        //printf("Reading data '%s'\n", entry->label);
        entry->is_data = 1;
        entry->content.data = read_tag_type(
            stream, state, userData, start, data, end);
        if (entry->content.data == NULL)
            return (NULL);
        end(state, state->context, entry->label);
        state->context = context_old; // XXX Pop the state, correct?
        return (entry);

    default:
        return (NULL); // XXX NOTREACHED
    }


    return (entry);
}


/* Ordering like in neon, except userdata comes first
 */
static struct dm_group *
read_tag_group(FILE *stream,
               struct state *state,
               void *userData,
               dm_StartGroupHandler start,
               dm_DataHandler data,
               dm_EndGroupHandler end)
{
    struct dm_group *group;
    size_t level;
    uint32_t numentries;
    uint8_t sorted_open[2];


    /* XXX This memory is leaked on failure!
     */
    group = malloc(sizeof(struct dm_group));
    if (group == NULL)
        return (NULL);


    /* Determine whether the group is sorted and/or open, only to
     * subsequently ignore it.  For version 4, skip an additional 4
     * octets.
     */
    if (fread(sorted_open, sizeof(sorted_open), 1, stream) != 1)
        return (NULL);
    if (fseek(stream, 0 + 4, SEEK_CUR) != 0)
        return (NULL);


    /* Determine the number of tags in the group, then read all the
     * TagEntry instances in turn.
     */
    if (fread(&numentries, sizeof(numentries), 1, stream) != 1)
        return (NULL);
    group->capacity = bswap_32(numentries);

    group->entries = calloc(group->capacity, sizeof(struct dm_entry *));
    if (group->entries == NULL)
        return (NULL);

    level = state->level;
    for (group->nmemb = 0; group->nmemb < group->capacity; group->nmemb++) {
        state->level = level + 1;
        group->entries[group->nmemb] = read_tag_entry(
            stream, state, userData, start, data, end);
        state->level = level;
        if (group->entries[group->nmemb] == NULL)
            return (NULL);
    }

    return (group);
}


/* XXX It appears there is actually no need to pass around pointers to
 * these callbacks, as they are fixed.
 */
static int
_start_group_handler(struct state *userData, int parent, const char *name)
{
    /* XXX Should really use parent instead of userData->level and
     * userData->context.
     */
    int ret;

    ret = parent;
    if (userData->level == 1 &&
        strcmp(name, "ImageList") == 0) {
        fprintf(stderr, "SEE THE IMAGELIST at %zd [%d]\n",
                userData->level, parent);
        userData->context = 1;
        ret = 1;

    } else if (userData->level == 3 &&
               userData->context == 1 &&
               strcmp(name, "ImageData") == 0) {
        fprintf(stderr, "SEE THE IMAGEDATA 1 at %zd ctxt %d [%d]\n",
                userData->level, userData->context, parent);
        userData->image_data_count += 1;
        userData->context = 2;
        ret = 2;

    } else if (userData->level == 3 &&
               userData->context == 2 &&
               strcmp(name, "ImageData") == 0) {
        fprintf(stderr, "SEE THE IMAGEDATA 2 at %zd ctxt %d [%d]\n",
                userData->level, userData->context, parent);
        userData->context = 2;
        ret = 2;

    } else if (userData->level == 4 &&
               userData->context == 2 &&
               userData->image_data_count == 2 &&
               strcmp(name, "Data") == 0) {
        fprintf(stderr, "SEE THE DATA at %zd ctxt %d [%d]\n",
                userData->level, userData->context, parent);
        userData->context = 3;
        ret = 3;

    } else if (userData->level == 4 &&
               userData->context == 2 &&
               userData->image_data_count == 2 &&
               strcmp(name, "Dimensions") == 0) {
        fprintf(stderr, "SEE THE DIMENSIONS at %zd ctxt %d [%d]\n",
                userData->level, userData->context, parent);
        userData->context = 4;
        ret = 4;

    } else if (strcmp(name, "ImageList") == 0) {
        printf("   *** see other imagelist ***\n");
    }

#ifdef DEBUG
    size_t i;
    for (i = 1; i < userData->level; i++)
        printf("  ");
    printf("'%s'", name);
#endif

    return (ret);
}


/* XXX Might just as well defer this until the entire file has been
 * read.
 */
static int
_end_group_handler(struct state *userData, int state, const char *name)
{
    struct frame frame;
    size_t k;

    if (strcmp(name, "ImageList") == 0 &&
        userData->ndim >= 2 &&
        userData->nmemb > 0 &&
        userData->raster != NULL) {

        /* A single 2D image is a stack of one.
         */
        if (userData->ndim == 2) {
            userData->dim[2] = 1;
            userData->ndim = 3;
        }

        /* XXX Assert that ndim = dim[0] * dim[1] * dim[2]?
         */

        /* XXX This is either stupid or dangerous.  Allocating space
         * for the raster outside the loop assumes the raster will not
         * be resized by the callback.  It's safer, but perhaps a
         * little bit slower, to allocate and release the entire
         * contents of the frame structure inside the loop.
         *
         * This applies to the callbacks in tvips2smv as well.
         */
        frame.raster = calloc(
            userData->dim[0] * userData->dim[1], sizeof(uint16_t));
        if (frame.raster == NULL)
            ; // XXX Deal with it!

        for (k = 0; k < userData->dim[2]; k++) {
            fprintf(stderr, "Writing image %zd of %zd\n",
                    k + 1, userData->dim[2]);

            memcpy(frame.raster,
                   userData->raster + k * userData->dim[0] * userData->dim[1],
                   userData->dim[0] * userData->dim[1] * sizeof(uint16_t));
            frame.height = userData->dim[1];
            frame.width = userData->dim[0];

            /* XXX Unhandled stuff.
             */
            frame.tv.tv_sec = 0;
            frame.tv.tv_nsec = 0;

            frame.binning[0] = 1;
            frame.binning[1] = 1;

            frame.pixel_size[0] = 0.005; // XXX From spec, may be bogus
            frame.pixel_size[1] = 0.005;

            if (userData->callback(&frame, k + 1, userData->userData) != 0)
                ; // XXX Deal with it
        }

        free(frame.raster);


        /* Mark the data as handled.
         */
        free(userData->raster);
        userData->ndim = 0;
        userData->nmemb = 0;
        userData->raster = NULL;
    }


#ifdef DEBUG
    printf("\n");
#endif

    return (0);
}


/* XXX This is a bit clumsy--converting twice, and so on.  Given that
 * there is now knowledge of what to expect, how about parsing on
 * demand (and byte-skipping otherwise to keep the file pointer
 * intact)?
 */
static int
_data_handler(struct state *userData, int state, struct tag_aos *aos)
{
    size_t i;

    switch (userData->context) {
    case 3:
        fprintf(stderr, "Will extract the data at %zd ctxt %d [%d] no %d\n",
                userData->level, userData->context, state, userData->image_data_count);
        if (aos->size == 1) {
            fprintf(stderr, "  have %zd values type %d\n", aos->nmemb, aos->types[0]);

            userData->nmemb = aos->nmemb;
            userData->raster = calloc(userData->nmemb, sizeof(uint16_t));
            if (userData->raster == NULL)
                ; // XXX Error!

            switch (aos->types[0]) {

            case TYPE_SHORT:
            case TYPE_LONG:
            case TYPE_USHORT:
            case TYPE_ULONG:
                /* XXX Implement these */
                break;

            case TYPE_FLOAT:
                /* Extract data, convert to unsigned 16-bit.  XXX The
                 * data (probably) isn't always in floats, and simple
                 * rounding may not be the way!  XXX May want to
                 * return with errno set to ERANGE (see
                 * e.g. strtod(3), or EOVERFLOW fread(3)).
                 */
                for (i = 0; i < userData->nmemb; i++) {
                    float t = 100.0 * ((float *)aos->values[0])[i]; // XXX ARBITRARY MULTIPLIER!
                    if (t > UINT16_MAX)
                        warn("Overflow %f > %d", t, UINT16_MAX);
                    else if (t < 0)
                        warn("Underflow %f < 0", t);
                    userData->raster[i] = (uint16_t)lrintf(t);
                }
                break;

            case TYPE_DOUBLE:
            case TYPE_BOOLEAN:
            case TYPE_CHAR:
            case TYPE_OCTET:
            case TYPE_UNK1:
            case TYPE_UNK2:
            case TYPE_STRUCT:
            case TYPE_STRING:
            case TYPE_ARRAY:
                /* XXX Implement these */
                break;

            default:
                break;
            }
        }

        break;

    case 4:
        if (aos->size == 1 && aos->nmemb == 1 && aos->types[0] == TYPE_ULONG) {
            userData->dim[userData->ndim++] = ((uint32_t *)aos->values[0])[0];
            fprintf(stderr, "  set dim to %d\n", ((uint32_t *)aos->values[0])[0]);
        }
        break;
    }




#ifdef DEBUG
    printf(" ");
    print_data(aos);
#endif

    return (0);
}


/* XXX This should probably return some kind of "root group" that
 * could encapsulate stuff such as version and endianness.  Then
 * REDESIGN: pass the structures as arguments to allow for progressive
 * parsing later on.
 */
int
read_dm(FILE *stream, dm_frame_callback callback, void *userData)
{
    struct stat sb;
    struct state st;
    uint32_t numbytes;


    /* Get the version number.  Can only deal with version 4?  Could
     * probably do something about version 3, if we had an example
     * file.
     */
    if (fread(&st.version, sizeof(st.version), 1, stream) != 1)
        return (-1);
    st.version = bswap_32(st.version);
    if (st.version != 4) {
        printf("Unsupported version %d\n", st.version);
        return (-1);
    }


    /* For version 4, skip an additional 4 octets.
     */
    if (fseek(stream, 0 + 4, SEEK_CUR) != 0)
        return (-1);


    /* According to the NIH page, this should give the "number of
     * bytes in the file".
     *
     * XXX Does not work for
     * /groups/gonen/gonenlab/EMdata/MicroED/lysozyme/Lead/K2camera/061814/movie04t30d4s.dm4:
     * "Empirically mismatching file sizes 1761849110 vs 6056816430".
     */
    if (fread(&numbytes, sizeof(numbytes), 1, stream) != 1)
        return (-1);
    numbytes = bswap_32(numbytes);
    if (fstat(fileno(stream), &sb) != 0)
        return (-1);
#if 0
    if (sb.st_size != numbytes + 24) {
        printf("Empirically mismatching file sizes %d vs %ld\n",
               numbytes, sb.st_size);
        return (-1);
    }
#endif


    /* NIH: Extract byte-ordering: 0 <=> big endian, 1 <=> little
     * endian.  Except, the Java implementation says endianness == 1
     * <=> big endian.
     */
    if (fread(&st.endianness, sizeof(st.endianness), 1, stream) != 1)
        return (-1);
    st.endianness = bswap_32(st.endianness);
    if (st.endianness != 0 && st.endianness != 1) {
        printf("Unsupported endianness %d\n", st.endianness);
        return (-1);
    }
    printf("Extracted endianness %d\n", st.endianness);

    st.level = 0;
    st.context = 0; // STATE_ROOT or some such define?
    st.image_data_count = 0;
    st.ndim = 0;
    st.dim[0] = 0;
    st.dim[1] = 0;
    st.dim[2] = 0;

    st.callback = callback;
    st.userData = userData;

    if (read_tag_group(
            stream, &st,
            NULL, _start_group_handler, _data_handler, _end_group_handler) == NULL)
        return (-1);
    if (!feof(stream)) {
        printf("DIDN'T REACH EOF\n");
        return (-1);
    }

    return (0);
}


/* XXX Both the structure and the _frame_write() function below are
 * copied from tvipsread.c
 */
struct output
{
    const char *output_template;
    float beam_center[2];
    float distance;
    double exposure;
    float tilt_rate;
    float wavelength;
    int k;
};


static int
_frame_write(struct frame *frame, size_t counter, void *data)
{
    struct output *output = (struct output *)data;
    FILE *stream;
    char *output_dir, *output_path;
    mode_t mode, mode_old;


    /* The counter is one-based, but the arithmetic below is
     * zero-based.
     */
    if (output->output_template == NULL)
        return (0);
    if (counter < 1)
        return (-1);


    /* Complete the frame structure with items that are (currently)
     * not extracted from the file headers but supplied on the command
     * line.  Apply the frame rotation.
     */
    frame->beam_center[0] = output->beam_center[0];
    frame->beam_center[1] = output->beam_center[1];
    frame->distance = output->distance;
    frame->exposure = output->exposure;
    frame->osc_range = output->tilt_rate * output->exposure;
    frame->osc_start = (counter - 1) * frame->osc_range;
    frame->wavelength = output->wavelength;

    if (frame_rot90(frame, output->k) != 0)
        return (-1);


    /* Write the file to the path specified by the output template,
     * after applying "base one" sequence number substitutions and
     * creating any intermediate directories.  mkpath() is always
     * called, because the template may affect the directories as well
     * as terminal files.  Because dirname(3) may wreck both its input
     * and the output of any previous invocation, both must be backed
     * up first.
     *
     * XXX This is duplication w.r.t. tiff2smv.c.
     */
    output_dir = calloc(
        2 * (strlen(output->output_template) + 1), sizeof(char));
    if (output_dir == NULL)
        return (-1);
    output_path = output_dir + strlen(output->output_template) + 1;
    mode_old = umask(0);
    mode = 0777 & ~mode_old;
    umask(mode_old);

    if (template2path(output_path, output->output_template, counter) != 0) {
        free(output_dir);
        return (-1);
    }
    strcpy(output_dir, dirname(strcpy(output_dir, output_path)));
    if (mkpath(output_dir, mode) != 0) {
        free(output_dir);
        return (-1);
    }

    stream = fopen(output_path, "w");
    if (stream == NULL) {
        free(output_dir);
        return (-1);
    }

    if (frame_write(frame, stream) != 0) {
        free(output_dir);
        fclose(stream);
    }
    fclose(stream);

    return (0);
}


/* Note that use of __progname is not portable.
 *
 * XXX This is duplication w.r.t. tiff2smv.c.
 */
static void
usage()
{
    extern char *__progname;

    fprintf(stderr,
            "usage: %s "
            "[-d distance] "
            "[-k rotation] "
            "[-o output_template] "
            "[-r oscillation_speed] "
            "[-v] "
            "[-x beam_center_x] "
            "[-y beam_center_y] "
            "[-z timezone] "
            "file ...\n", __progname);
    fprintf(stderr,
            "       %s -V\n", __progname);
    fprintf(stderr,
            "       %s -h\n", __progname);
    exit(EXIT_FAILURE);
}


/* See the GNU coding standards for more information on this (and
 * opinions on how the --help option should have been implemented).
 *
 * XXX This is duplication w.r.t. tiff2smv.c.
 */
#define xstr(s) str(s)
#define str(s) #s
static void
version()
{
    extern char *__progname;
    fprintf(stdout,
            "%s (TVIPS tools) " xstr(GIT_BRANCH) "." xstr(GIT_COMMIT) "\n",
            __progname);
}


int
main(int argc, char *argv[])
{
//    const char *path = "/groups/gonen/gonenlab/EMdata/shid/K2-test-movie/2014-06-09_12.22.36_avg.dm4";
//    const char *path = "/groups/gonen/gonenlab/EMdata/shid/K2-test-movie/2014-06-09_12.22.36.dm4";

    struct output output;

    FILE *stream;
    char *ep;
    size_t i;
    int verbose;

    /* Default values for command line options.  Beam center and
     * sample--detector distance in mm, tilt_rate in degrees per
     * second, and wavelength in Ångström.
     *
     * XXX This is duplication w.r.t. tiff2smv.c and tvipsread.c.
     */
    output.beam_center[0] = 10.8;
    output.beam_center[1] = 8.2;
    output.distance = 540; // 700.0;
    output.exposure = 4.0;
    output.k = 3;
    output.tilt_rate = 0.09;
    output.output_template = "dm/test_###.img";
    output.wavelength = 0.0191;

    verbose = 1;


    /* Use getopt_long(3) if available; the POSIX.2 fall-back
     * getopt(3) is assumed to always be available.  optstring begins
     * with a colon in order to enable tracking of missing option
     * arguments.
     *
     * XXX Duplication w.r.t. tiff2smv.c!
     */
    int ch;
    const char* optstring = ":Vd:hk:o:r:x:y:vw:z:";

#ifdef HAVE_GETOPT_LONG
    static struct option options[] = {
        { "version",         no_argument,       NULL, 'V' },
        { "distance",        required_argument, NULL, 'd' },
        { "help",            no_argument,       NULL, 'h' },
        { "rot90",           required_argument, NULL, 'k' },
        { "output-template", required_argument, NULL, 'o' },
        { "rotation-speed",  required_argument, NULL, 'r' },
        { "verbose",         no_argument,       NULL, 'v' },
        { "wavelength",      required_argument, NULL, 'w' },
        { "beam-center-x",   required_argument, NULL, 'x' },
        { "beam-center-y",   required_argument, NULL, 'y' },
        { "timezone",        required_argument, NULL, 'z' },
        { NULL,              0,                 NULL, 0   }
    };
#endif


    /* Loop through all the arguments in argv using either getopt(3)
     * function.  This code does its own error reporting, hence opterr
     * is set to zero to disable getopt(3) error messages.  Because
     * optreset is an extension to the POSIX.2 specification, it is
     * not used here.
     *
     * XXX Duplication w.r.t. tiff2smv.c!
     */
    opterr = 0;
#ifdef HAVE_GETOPT_LONG
    while ((ch = getopt_long(argc, argv, optstring, options, NULL)) != -1) {
#else
    while ((ch = getopt(argc, argv, optstring)) != -1) {
#endif
        switch (ch) {
        case 'V':
            version();
            exit(EXIT_SUCCESS);

        case 'd':
            errno = 0;
            output.distance = strtof(optarg, &ep);
            if (optarg[0] == '\0' || *ep != '\0' || errno != 0) {
                warnx("Illegal -d argument %s", optarg);
                usage();
            }
            break;

        case 'h':
            usage();

        case 'k':
            errno = 0;
            output.k = strtol(optarg, &ep, 10) % 4;
            if (optarg[0] == '\0' || *ep != '\0' || errno != 0) {
                warnx("Illegal -k argument %s", optarg);
                usage();
            }
            break;

        case 'o':
            output.output_template = optarg;
            break;

        case 'r':
            errno = 0;
            output.tilt_rate = strtof(optarg, &ep);
            if (optarg[0] == '\0' || *ep != '\0' || errno != 0) {
                warnx("Illegal -r argument %s", optarg);
                usage();
            }
            break;

        case 'v':
            verbose++;
            break;

        case 'w':
            errno = 0;
            output.wavelength = strtof(optarg, &ep);
            if (optarg[0] == '\0' || *ep != '\0' || errno != 0) {
                warnx("Illegal -w argument %s", optarg);
                usage();
            }
            break;

        case 'x':
            errno = 0;
            output.beam_center[0] = strtof(optarg, &ep);
            if (optarg[0] == '\0' || *ep != '\0' || errno != 0) {
                warnx("Illegal -x argument %s", optarg);
                usage();
            }
            break;

        case 'y':
            errno = 0;
            output.beam_center[1] = strtof(optarg, &ep);
            if (optarg[0] == '\0' || *ep != '\0' || errno != 0) {
                warnx("Illegal -y argument %s", optarg);
                usage();
            }
            break;

        case 'z':
            //zone = optarg; // XXX
            break;

        case ':':
            /* Missing the required argument of an option.  Use the
             * last known option character (optopt) for error
             * reporting.
             */
#ifdef HAVE_GETOPT_LONG
            for (i = 0; options[i].name != NULL; i++) {
                if (options[i].val == optopt) {
                    warnx("Option -%c (--%s) requires an argument",
                          optopt, options[i].name);
                    usage();
                }
            }
#endif
            warnx("Option -%c requires an argument", optopt);
            usage();

        case '?':
            warnx("Unrecognized option '%s'", argv[optind - 1]);
            usage();

        default:
            usage();
        }
    }


    /* If requested, repeat command line options verbatim on standard
     * output.
     *
     * XXX This is duplication w.r.t. tiff2smv.c.
     */
    if (verbose > 1) {
        version();
        printf("\n");
        for (i = 0; i < optind; i++) {
            printf("%s%s", argv[i], i + 1 < optind ? " " : "\n");
        }
        printf("\n");
    }


    /* Since the documentation states that at least on image must be
     * provided exit with failure if there are none.
     *
     * XXX Duplication w.r.t. tiff2smv.c!
     */
    if (argc <= optind)
        return (EXIT_FAILURE);
    argc -= optind;
    argv += optind;


    /* XXX It really makes no sense to accept more than one input file
     * on the command line, because the frames will just overwrite
     * each other!
     *
     * If requested, output the canonicalized absolute pathname of the
     * files just read.  This abuses ep.
     *
     * XXX This is duplication w.r.t. tiff2smv.c.
     */
    for (i = 0; i < argc; i++) {
        stream = fopen(argv[i], "r");
        if (stream == NULL)
            err(EXIT_FAILURE, "Failed to open %s", argv[i]);

        if (read_dm(stream, _frame_write, (void *)&output) != 0) {
            /* XXX Should have been err(EXIT_FAILURE, ...).
             */
            warn("Failed to read %s", argv[i]);

            for (i = 0; i < 80; i++)
                printf("%3d ", fgetc(stream));
            printf("\n");
            fclose(stream);
        }

        fclose(stream);

        if (verbose > 2) {
            ep = realpath(argv[i], NULL);
            if (ep != NULL) {
                printf("%s\n", ep);
                free(ep);
            }
        }
    }

    return (EXIT_SUCCESS);
}
