1    | /***************************************
2    |   $Header$
3    | 
4    |   This file contains the functions
5    |   +latex+{\tt IlluMultiSave()}, {\tt IlluMultiLoad()} and {\tt IlluMultiRead()}
6    |   +html+ <tt>IlluMultiSave()</tt>, <tt>IlluMultiLoad()</tt> and
7    |   +html+ <tt>IlluMultiRead()</tt>
8    |   designed to handle distributed storage and retrieval of data on local drives
9    |   of machines in a Beowulf cluster.  This should allow rapid loading of
10   |   timestep data for "playback" from what is essentially a giant RAID-0 array of
11   |   distributed disks, at enormously higher speeds than via NFS from a hard drive
12   |   or RAID array on the head node.  The danger of course is that if one node's
13   |   disk goes down, you don't have a valid data set any more, but that's the
14   |   nature of RAID-0, right?
15   |   +latex+\par
16   |   +html+ <p>
17   |   The filenames saved are:
18   |   +latex+\begin{itemize} \item {\tt $<$basename$>$.cpu\#\#\#\#.meta}, where
19   |   +latex+{\tt \#\#\#\#}
20   |   +html+ <ul><li><tt>&lt;basename&gt.cpu####.meta</tt>, where <tt>####</tt>
21   |   is replaced by the CPU number (more than four digits if there are more than
22   |   9999 CPUs :-), which has the metadata for the whole thing in XML format
23   |   (written by
24   |   +latex+{\tt GNOME libxml}),
25   |   +html+ <tt>GNOME libxml</tt>),
26   |   as described in the notes on the
27   |   +latex+{\tt IlluMultiStoreXML()} function.
28   |   +html+ <tt>IlluMultiStoreXML()</tt> function.
29   |   +latex+\item {\tt $<$basename$>$.cpu\#\#\#\#.data}
30   |   +html+ </li><li><tt>&lt;basename&gt.cpu####.data</tt>
31   |   which is simply a stream of the raw data, optionally compressed by
32   |   +latex+{\tt gzip}. \end{itemize}
33   |   +html+ <tt>gzip</tt>.</li></ul>
34   |   If one were saving timesteps, one might include a timestep number in the
35   |   basename, and also timestep and simulation time in the metadata.  The
36   |   metadata can also hold simulation parameters, etc.
37   |   +latex+\par
38   |   +html+ <p>
39   |   This supports 1-D, 2-D and 3-D distributed arrays.  As an extra feature, you
40   |   can load a multi-CPU distributed array scattered over lots of files into a
41   |   single CPU, to facilitate certain modes of data visualization.
42   | ***************************************/
43   | 
44   | 
45   | #include "illuminator.h"     /* Also includes petscda.h */
46   | #include <glib.h>            /* for guint*, GUINT*_SWAP_LE_BE */
47   | #include <petscblaslapack.h> /* for BLAScopy_() */
48   | #include <libxml/tree.h>     /* To build the XML tree to store */
49   | #include <libxml/parser.h>   /* XML parsing */
50   | #include <stdio.h>           /* FILE, etc. */
51   | #include <errno.h>           /* errno */
52   | #include <string.h>          /* strncpy(), etc. */
53   | #include <stdlib.h>          /* malloc() and free() */
54   | 
55   | /* Build with -DDEBUG for debugging output */
56   | #undef DPRINTF
57   | #ifdef DEBUG
58   | #define DPRINTF(fmt, args...) ierr = PetscPrintf (comm, "%s: " fmt, __FUNCT__, args); CHKERRQ (ierr)
59   | #else
60   | #define DPRINTF(fmt, args...)
61   | #endif
62   | 
63   | 
64   | #undef __FUNCT__
65   | #define __FUNCT__ "IlluMultiParseXML"
66   | 
67   | /*++++++++++++++++++++++++++++++++++++++
68   |   This function reads in the XML metadata document and returns the various
69   |   parameter values in the addresses pointed to by the arguments.  It is called
70   |   by IlluMultiLoad() and IlluMultiRead().
71   | 
72   |   int IlluMultiParseXML It returns zero or an error code.
73   | 
74   |   MPI_Comm comm MPI communicator, if NULL it uses PETSC_COMM_WORLD.
75   | 
76   |   char *basename Base file name.
77   | 
78   |   int rank CPU number to read data for.
79   | 
80   |   int *compressed Data compression: if zero then no compression (fastest), 1-9
81   |   then gzip compression level, 10-15 then gzip --best.  If 16-31 then save
82   |   guint32s representing relative values between min and max for each field,
83   |   compressed according to this value minus 16.  Likewise for 32-47 and
84   |   guint16s, and 48-63 for guint8s.  Yes, these alternative formats lose
85   |   information and can't be used for accurate checkpointing, but they should
86   |   retain enough data for visualization (except perhaps for the guint8s, which
87   |   are possibly acceptable for vectors but likely not contours).
88   | 
89   |   int *wrongendian Tells whether the data are stored in the opposite endian
90   |   format from this platform, and thus must be switched when the data are
91   |   streamed in.
92   | 
93   |   int *dim Dimensionality of the space.
94   | 
95   |   int *px Number of processors in the
96   |   +latex+$x$-direction.
97   |   +html+ <i>x</i>-direction.
98   | 
99   |   int *py Number of processors in the
100  |   +latex+$y$-direction.
101  |   +html+ <i>y</i>-direction.
102  | 
103  |   int *pz Number of processors in the
104  |   +latex+$z$-direction.
105  |   +html+ <i>z</i>-direction.
106  | 
107  |   int *nx Number of grid points over the entire array in the
108  |   +latex+$x$-direction.
109  |   +html+ <i>x</i>-direction.
110  | 
111  |   int *ny Number of grid points over the entire array in the
112  |   +latex+$y$-direction.
113  |   +html+ <i>y</i>-direction.
114  | 
115  |   int *nz Number of grid points over the entire array in the
116  |   +latex+$z$-direction.
117  |   +html+ <i>z</i>-direction.
118  | 
119  |   PetscScalar *wx Physical overall width in the
120  |   +latex+$x$-direction, {\tt PETSC\_NULL} if not needed.
121  |   +html+ <i>x</i>-direction, <tt>PETSC_NULL</tt> if not needed.
122  | 
123  |   PetscScalar *wy Physical overall width in the
124  |   +latex+$y$-direction, {\tt PETSC\_NULL} if not needed.
125  |   +html+ <i>y</i>-direction, <tt>PETSC_NULL</tt> if not needed.
126  | 
127  |   PetscScalar *wz Physical overall width in the
128  |   +latex+$z$-direction, {\tt PETSC\_NULL} if not needed.
129  |   +html+ <i>z</i>-direction, <tt>PETSC_NULL</tt> if not needed.
130  | 
131  |   int *xm Number of grid points over the local part of the array in the
132  |   +latex+$x$-direction.
133  |   +html+ <i>x</i>-direction.
134  | 
135  |   int *ym Number of grid points over the local part of the array in the
136  |   +latex+$y$-direction.
137  |   +html+ <i>y</i>-direction.
138  | 
139  |   int *zm Number of grid points over the local part of the array in the
140  |   +latex+$z$-direction.
141  |   +html+ <i>z</i>-direction.
142  | 
143  |   int *dof Degrees of freedom at each node, a.k.a. number of field variables.
144  | 
145  |   int *sw Stencil width.
146  | 
147  |   DAStencilType *st Stencil type, given by the
148  |   +latex+{\tt PETSc enum} values.
149  |   +html+ <tt>PETSc enum</tt> values.
150  | 
151  |   DAPeriodicType *wrap Periodic type, given by the
152  |   +latex+{\tt PETSc enum} values.
153  |   +html+ <tt>PETSc enum</tt> values.
154  | 
155  |   char ***fieldnames Names of the field variables.
156  | 
157  |   field_plot_type **fieldtypes Data (plot) types for field variables,
158  |   +latex+{\tt PETSC\_NULL} if not needed.
159  |   +html+ <tt>PETSC_NULL</tt> if not needed.
160  | 
161  |   PetscScalar **fieldmin Minimum value of each field variable.
162  | 
163  |   PetscScalar **fieldmax Maximum value of each field variable.
164  | 
165  |   int *usermetacount Number of user metadata parameters.
166  | 
167  |   char ***usermetanames User metadata parameter names.
168  | 
169  |   char ***usermetadata User metadata parameter strings.
170  |   ++++++++++++++++++++++++++++++++++++++*/
171  | 
172  | static int IlluMultiParseXML
173  | (MPI_Comm comm, char *basename, int rank, int *compressed, int *wrongendian,
174  |  int *dim, int *px,int *py,int *pz, int *nx,int *ny,int *nz,
175  |  PetscScalar *wx,PetscScalar *wy,PetscScalar *wz, int *xm,int *ym,int *zm,
176  |  int *dof, int *sw, DAStencilType *st, DAPeriodicType *wrap,
177  |  char ***fieldnames, field_plot_type **fieldtypes, PetscScalar **fieldmin,
178  |  PetscScalar **fieldmax,
179  |  int *usermetacount, char ***usermetanames, char ***usermetadata)
180  | {
181  |   xmlDocPtr thedoc;
182  |   xmlNodePtr thenode;
183  |   char filename [1000];
184  |   xmlChar *buffer, *version;
185  |   int field=0, ierr;
186  | 
187  |   if (snprintf (filename, 999, "%s.cpu%.4d.meta", basename, rank) > 999)
188  |     SETERRQ1 (PETSC_ERR_ARG_SIZ, "Base file name too long (%d chars, 960 max)",
189  | 	      strlen (basename));
190  | 
191  |   DPRINTF ("Parsing XML file %s\n", filename);
192  |   thedoc = xmlParseFile (filename);
193  |   if (thedoc == NULL)
194  |     SETERRQ (PETSC_ERR_FILE_OPEN, "xmlParseFile returned NULL\n");
195  | 
196  |   version = xmlGetProp (thedoc -> children, BAD_CAST "version");
197  |   if (strncmp ((char *) version, "0.1", 3) &&
198  |       strncmp ((char *) version, "0.2", 3))
199  |     {
200  |       free (version);
201  |       SETERRQ (PETSC_ERR_FILE_UNEXPECTED,
202  | 	       "Version is neither 0.1 nor 0.2, can't parse\n");
203  |     }
204  | 
205  |   *wrongendian = 0;
206  |   buffer = xmlGetProp (thedoc -> children, BAD_CAST "endian");
207  | #ifdef WORDS_BIGENDIAN
208  |   if (strncasecmp ((char *) buffer, "big", 3))
209  |     *wrongendian = 1;
210  | #else
211  |   if (!strncasecmp ((char *) buffer, "big", 3))
212  |     *wrongendian = 1;
213  | #endif
214  |   free (buffer);
215  | 
216  |   buffer = xmlGetProp (thedoc -> children, BAD_CAST "compression");
217  |   /* Have to handle all of the various compressed string values */
218  |   /* TODO: make this deal with "float" and "complex" */
219  |   if (buffer[0] == 'l' || buffer[0] == 'L')
220  |     {
221  |       if (buffer[4] == 'n' || buffer[4] == 'N')
222  | 	*compressed = COMPRESS_INT_LONG | COMPRESS_GZIP_NONE;
223  |       else if (buffer[4] >= '1' && buffer[4] <= '9')
224  | 	{
225  | 	  sscanf ((char *) buffer+4, "%d", compressed);
226  | 	  *compressed |= COMPRESS_INT_LONG;
227  | 	}
228  |       else if (buffer[4] == 'b' || buffer[4] == 'B')
229  | 	*compressed = COMPRESS_INT_LONG | COMPRESS_GZIP_BEST;
230  |       else
231  | 	SETERRQ1 (PETSC_ERR_ARG_OUTOFRANGE,
232  | 		 "Unrecognized compression type %s in .meta file", buffer);
233  |     }
234  |   else if (buffer[0] == 's' || buffer[0] == 'S')
235  |     {
236  |       if (buffer[5] == 'n' || buffer[5] == 'N')
237  | 	*compressed = COMPRESS_INT_SHORT | COMPRESS_GZIP_NONE;
238  |       else if (buffer[5] >= '1' && buffer[5] <= '9')
239  | 	{
240  | 	  sscanf ((char *) buffer+5, "%d", compressed);
241  | 	  *compressed |= COMPRESS_INT_SHORT;
242  | 	}
243  |       else if (buffer[5] == 'b' || buffer[5] == 'B')
244  | 	*compressed = COMPRESS_INT_SHORT | COMPRESS_GZIP_BEST;
245  |       else
246  | 	SETERRQ1 (PETSC_ERR_ARG_OUTOFRANGE,
247  | 		 "Unrecognized compression type %s in .meta file", buffer);
248  |     }
249  |   else if (buffer[0] == 'c' || buffer[0] == 'C')
250  |     {
251  |       if (buffer[4] == 'n' || buffer[4] == 'N')
252  | 	*compressed = COMPRESS_INT_CHAR | COMPRESS_GZIP_NONE;
253  |       else if (buffer[4] >= '1' && buffer[4] <= '9')
254  | 	{
255  | 	  sscanf ((char *) buffer+4, "%d", compressed);
256  | 	  *compressed |= COMPRESS_INT_CHAR;
257  | 	}
258  |       else if (buffer[4] == 'b' || buffer[4] == 'B')
259  | 	*compressed = COMPRESS_INT_CHAR | COMPRESS_GZIP_BEST;
260  |       else
261  | 	SETERRQ1 (PETSC_ERR_ARG_OUTOFRANGE,
262  | 		 "Unrecognized compression type %s in .meta file", buffer);
263  |     }
264  |   else
265  |     {
266  |       if (buffer[0] == 'n' || buffer[0] == 'N')
267  | 	*compressed = COMPRESS_INT_NONE | COMPRESS_GZIP_NONE;
268  |       else if (buffer[0] >= '1' && buffer[0] <= '9')
269  | 	{
270  | 	  sscanf ((char *) buffer, "%d", compressed);
271  | 	  *compressed |= COMPRESS_INT_NONE;
272  | 	}
273  |       else if (buffer[0] == 'b' || buffer[0] == 'B')
274  | 	*compressed = COMPRESS_INT_NONE | COMPRESS_GZIP_BEST;
275  |       else
276  | 	SETERRQ1 (PETSC_ERR_ARG_OUTOFRANGE,
277  | 		 "Unrecognized compression type %s in .meta file", buffer);
278  |     }
279  |   free (buffer);
280  |   DPRINTF ("Root node: version %s, %s endian, compression %d|%d\n",
281  | 	   version, *wrongendian ? "switched" : "native",
282  | 	   *compressed & COMPRESS_INT_MASK, *compressed & COMPRESS_GZIP_MASK);
283  | 
284  |   *fieldnames = PETSC_NULL;
285  |   if (fieldtypes != PETSC_NULL)
286  |     *fieldtypes = PETSC_NULL;
287  |   *fieldmin = *fieldmax = PETSC_NULL;
288  |   *usermetacount = 0;
289  |   *usermetanames = *usermetadata = PETSC_NULL;
290  | 
291  |   /* The main parsing loop; because xmlStringGetNodeList() doesn't work. */
292  |   for (thenode = thedoc->children->xmlChildrenNode;
293  |        thenode;
294  |        thenode = thenode->next)
295  |     {
296  |       DPRINTF ("Looping through nodes, this is %s, fieldnames 0x%lx\n",
297  | 	       thenode->name, *fieldnames);
298  |       if (!strncasecmp ((char *) thenode->name, "GlobalCPUs", 10))
299  | 	{
300  | 	  buffer = xmlGetProp (thenode, BAD_CAST "dimensions");
301  | 	  sscanf ((char *) buffer, "%d", dim);
302  | 	  free (buffer);
303  | 
304  | 	  buffer = xmlGetProp (thenode, BAD_CAST "xwidth");
305  | 	  sscanf ((char *) buffer, "%d", px);
306  | 	  free (buffer);
307  | 
308  | 	  if (*dim > 1)
309  | 	    {
310  | 	      buffer = xmlGetProp (thenode, BAD_CAST "ywidth");
311  | 	      sscanf ((char *) buffer, "%d", py);
312  | 	      free (buffer);
313  | 
314  | 	      if (*dim > 2)
315  | 		{
316  | 		  buffer = xmlGetProp (thenode, BAD_CAST "zwidth");
317  | 		  sscanf ((char *) buffer, "%d", pz);
318  | 		  free (buffer);
319  | 		}
320  | 	      else
321  | 		*pz = 1;
322  | 	    }
323  | 	  else
324  | 	    *py = 1;
325  | 	}
326  | 
327  |       else if (!strncasecmp ((char *) thenode->name, "GlobalSize", 10))
328  | 	{
329  | 	  buffer = xmlGetProp (thenode, BAD_CAST "xwidth");
330  | 	  sscanf ((char *) buffer, "%d", nx);
331  | 	  free (buffer);
332  | 	  /*+ For GlobalSize, since there's no *size attribute (for an 0.1
333  | 	    version document), assume 1. +*/
334  | 	  buffer = xmlGetProp (thenode, BAD_CAST "xsize");
335  | 	  if (wx != PETSC_NULL)
336  | 	    {
337  | 	      if (buffer)
338  | #ifdef PETSC_USE_SINGLE
339  | 		sscanf ((char *) buffer, "%f", wx);
340  | #else
341  | 		sscanf ((char *) buffer, "%lf", wx);
342  | #endif
343  | 	      else
344  | 		*wx = 1.;
345  | 	    }
346  | 	  if (buffer)
347  | 	    free (buffer);
348  | 
349  | 	  if (*dim > 1)
350  | 	    {
351  | 	      buffer = xmlGetProp (thenode, BAD_CAST "ywidth");
352  | 	      sscanf ((char *) buffer, "%d", ny);
353  | 	      free (buffer);
354  | 	      buffer = xmlGetProp (thenode, BAD_CAST "ysize");
355  | 	      if (wy != PETSC_NULL)
356  | 		{
357  | 		  if (buffer)
358  | #ifdef PETSC_USE_SINGLE
359  | 		    sscanf ((char *) buffer, "%f", wy);
360  | #else
361  | 		    sscanf ((char *) buffer, "%lf", wy);
362  | #endif
363  | 		  else
364  | 		    *wy = 1.;
365  | 		}
366  | 	      if (buffer)
367  | 		free (buffer);
368  | 
369  | 	      if (*dim > 2)
370  | 		{
371  | 		  buffer = xmlGetProp (thenode, BAD_CAST "zwidth");
372  | 		  sscanf ((char *) buffer, "%d", nz);
373  | 		  free (buffer);
374  | 		  buffer = xmlGetProp (thenode, BAD_CAST "zsize");
375  | 		  if (wz != PETSC_NULL)
376  | 		    {
377  | 		      if (buffer)
378  | #ifdef PETSC_USE_SINGLE
379  | 			sscanf ((char *) buffer, "%f", wz);
380  | #else
381  | 			sscanf ((char *) buffer, "%lf", wz);
382  | #endif
383  | 		      else
384  | 			*wz = 1.;
385  | 		    }
386  | 		  if (buffer)
387  | 		    free (buffer);
388  | 		}
389  | 	      else
390  | 		*nz = 1;
391  | 	    }
392  | 	  else
393  | 	    *ny = *nz = 1;
394  | 
395  | 	  buffer = xmlGetProp (thenode, BAD_CAST "fields");
396  | 	  sscanf ((char *) buffer, "%d", dof);
397  | 	  free (buffer);
398  | 
399  | 	  if (*dof > field)
400  | 	    {
401  | 	      if (!(*fieldnames = (char **) realloc
402  | 		    (*fieldnames, *dof * sizeof (char *))))
403  | 		SETERRQ (PETSC_ERR_MEM, "Can't allocate field names");
404  | 	      if (fieldtypes != PETSC_NULL)
405  | 		if (!(*fieldtypes = (field_plot_type *) realloc
406  | 		      (*fieldtypes, *dof * sizeof(field_plot_type))))
407  | 		SETERRQ (PETSC_ERR_MEM, "Can't allocate field types");
408  | 	      if (!(*fieldmin = (PetscScalar *) realloc
409  | 		    (*fieldmin, *dof * sizeof(PetscScalar))))
410  | 		SETERRQ (PETSC_ERR_MEM, "Can't allocate field params");
411  | 	      if (!(*fieldmax = (PetscScalar *) realloc
412  | 		    (*fieldmax, *dof * sizeof(PetscScalar))))
413  | 		SETERRQ (PETSC_ERR_MEM, "Can't allocate field params");
414  | 	    }
415  | 	}
416  | 
417  |       else if (!strncasecmp ((char *) thenode->name, "LocalSize", 9))
418  | 	{
419  | 	  buffer = xmlGetProp (thenode, BAD_CAST "xwidth");
420  | 	  sscanf ((char *) buffer, "%d", xm);
421  | 	  free (buffer);
422  | 
423  | 	  if (*dim > 1)
424  | 	    {
425  | 	      buffer = xmlGetProp (thenode, BAD_CAST "ywidth");
426  | 	      sscanf ((char *) buffer, "%d", ym);
427  | 	      free (buffer);
428  | 
429  | 	      if (*dim > 2)
430  | 		{
431  | 		  buffer = xmlGetProp (thenode, BAD_CAST "zwidth");
432  | 		  sscanf ((char *) buffer, "%d", zm);
433  | 		  free (buffer);
434  | 		}
435  | 	      else
436  | 		*zm = 1;
437  | 	    }
438  | 	  else
439  | 	    *ym = 1;
440  | 	}
441  | 
442  |       else if (!strncasecmp ((char *) thenode->name, "Stencil", 7))
443  | 	{
444  | 	  buffer = xmlGetProp (thenode, BAD_CAST "width");
445  | 	  sscanf ((char *) buffer, "%d", sw);
446  | 	  free (buffer);
447  | 
448  | 	  buffer = xmlGetProp (thenode, BAD_CAST "type");
449  | 	  sscanf ((char *) buffer, "%d", st);
450  | 	  free (buffer);
451  | 
452  | 	  buffer = xmlGetProp (thenode, BAD_CAST "periodic");
453  | 	  sscanf ((char *) buffer, "%d", wrap);
454  | 	  free (buffer);
455  | 	}
456  | 
457  |       else if (!strncasecmp ((char *) thenode->name, "Field", 5))
458  | 	{
459  | 	  if (!*fieldnames || !*fieldmax || !*fieldmin)
460  | 	    {
461  | 	      printf ("Warning: reading a Field, but the number of them is unknown!\n");
462  | 	    }
463  | 
464  | 	  if (field >= *dof)
465  | 	    {
466  | 	      printf ("Warning: more <Field> tags than declared degrees of freedom.\nThis may be because tags are out of order.\n");
467  | 	      *fieldnames = (char **) realloc
468  | 		(*fieldnames, (field+1) * sizeof (char *));
469  | 	      if (fieldtypes != PETSC_NULL)
470  | 		*fieldtypes = (field_plot_type *) realloc
471  | 		  (*fieldtypes, (field+1) * sizeof (field_plot_type));
472  | 	      *fieldmin = (PetscScalar *) realloc
473  | 		(*fieldmin, (field+1) * sizeof (PetscScalar));
474  | 	      *fieldmax = (PetscScalar *) realloc
475  | 		(*fieldmax, (field+1) * sizeof (PetscScalar));
476  | 	    }
477  | 
478  | 	  (*fieldnames) [field] =
479  | 	    (char *) xmlGetProp (thenode, BAD_CAST "name");
480  | 	  /*+ If the type attribute is missing from the Field node (as it is in
481  | 	    version 0.1 documents), assume
482  | 	    +latex+{\tt FIELD\_SCALAR}.
483  | 	    +html+ <tt>FIELD_SCALAR</tt>. +*/
484  | 	  buffer = xmlGetProp (thenode, BAD_CAST "type");
485  | 	  if (fieldtypes)
486  | 	    {
487  | 	      if (buffer)
488  | 		sscanf ((char *) buffer, "0x%x", *fieldtypes + field);
489  | 	      else
490  | 		(*fieldtypes) [field] = FIELD_SCALAR;
491  | 	    }
492  | 	  if (buffer)
493  | 	    free(buffer);
494  | 	  buffer = xmlGetProp (thenode, BAD_CAST "min");
495  | #ifdef PETSC_USE_SINGLE
496  | 	  sscanf ((char *) buffer, "%f", *fieldmin + field);
497  | #else
498  | 	  sscanf ((char *) buffer, "%lf", *fieldmin + field);
499  | #endif
500  | 	  free (buffer);
501  | 	  buffer = xmlGetProp (thenode, BAD_CAST "max");
502  | #ifdef PETSC_USE_SINGLE
503  | 	  sscanf ((char *) buffer, "%f", *fieldmax + field);
504  | #else
505  | 	  sscanf ((char *) buffer, "%lf", *fieldmax + field);
506  | #endif
507  | 	  free (buffer);
508  | 	  field++;
509  | 	}
510  | 
511  |       else if (!strncasecmp ((char *) thenode->name, "User", 4))
512  | 	{
513  | 	  (*usermetacount)++;
514  | 	  (*usermetanames) = (char **) realloc
515  | 	    (*usermetanames, (*usermetacount) * sizeof (char *));
516  | 	  (*usermetadata) = (char **) realloc
517  | 	    (*usermetadata, (*usermetacount) * sizeof (char *));
518  | 	  (*usermetanames) [(*usermetacount)-1] =
519  | 	    (char *) xmlGetProp (thenode, BAD_CAST "name");
520  | 	  (*usermetadata) [(*usermetacount)-1] =
521  | 	    (char *) xmlGetProp (thenode, BAD_CAST "value");
522  | 	}
523  |     }
524  | 
525  |   /* This is another way to do things, which would be a whole lot easier, if it
526  |      worked... (See Debian's libxml bug list.)
527  |   thenode = xmlStringGetNodeList (thedoc, "GlobalCPUs");
528  |   printf ("thenode = 0x%lx\n", thenode);
529  |   if (!thenode)
530  |     return 1;
531  |   thenode = xmlStringGetNodeList (thedoc, "GlobalSize");
532  |   if (!thenode)
533  |     return 1;
534  |   thenode = xmlStringGetNodeList (thedoc, "LocalSize");
535  |   if (!thenode)
536  |     return 1;
537  |   thenode = xmlStringGetNodeList (thedoc, "Stencil");
538  |   if (!thenode)
539  |     return 1;
540  | 
541  |   i = 0;
542  |   thenode = xmlStringGetNodeList (thedoc, "Field");
543  |   if (!thenode)
544  |     return 1;
545  |   while (thenode)
546  |     {
547  |       i++;
548  |       thenode = thenode -> next;
549  |     }
550  | 
551  |   thenode = xmlStringGetNodeList (thedoc, "User");
552  |   while (thenode)
553  |     {
554  |       thenode = thenode -> next;
555  |       } */
556  | 
557  |   free (version);
558  |   xmlFreeDoc (thedoc);
559  | 
560  |   return 0;
561  | }
562  | 
563  | 
564  | #undef __FUNCT__
565  | #define __FUNCT__ "IlluMultiParseData"
566  | 
567  | /*++++++++++++++++++++++++++++++++++++++
568  |   This function reads in the data stored by IlluMultiStoreData(), complete with
569  |   int/gzip compression.
570  | 
571  |   int IlluMultiParseData It returns zero or an error code.
572  | 
573  |   MPI_Comm comm MPI communicator, if NULL it uses PETSC_COMM_WORLD.
574  | 
575  |   PetscScalar *globalarray Array into which to load the (local) data.
576  | 
577  |   char *basename Base file name.
578  | 
579  |   int rank CPU number to read data for.
580  | 
581  |   int compressed Data compression: if zero then no compression (fastest), 1-9
582  |   then gzip compression level, 10-15 then gzip --best.  If 16-31 then save
583  |   guint32s representing relative values between min and max for each field,
584  |   compressed according to this value minus 16.  Likewise for 32-47 and
585  |   guint16s, and 48-63 for guint8s.  Yes, these alternative formats lose
586  |   information and can't be used for accurate checkpointing, but they should
587  |   retain enough data for visualization (except perhaps for the guint8s, which
588  |   are possibly acceptable for vectors but likely not contours).
589  | 
590  |   int gridpoints Number of gridpoints to read data for.
591  | 
592  |   int dof Degrees of freedom at each node, a.k.a. number of field variables.
593  | 
594  |   int wrongendian Tells whether the data are stored in the opposite endian
595  |   format from this platform, and thus must be switched when the data are
596  |   streamed in.
597  | 
598  |   PetscScalar *fieldmin Minimum value of each field variable.
599  | 
600  |   PetscScalar *fieldmax Maximum value of each field variable.
601  |   ++++++++++++++++++++++++++++++++++++++*/
602  | 
603  | static int IlluMultiParseData
604  | (MPI_Comm comm, PetscScalar *globalarray, char *basename, int rank,
605  |  int compressed, int gridpoints, int dof, int wrongendian,
606  |  PetscScalar *fieldmin, PetscScalar *fieldmax)
607  | {
608  |   int i;
609  |   char filename [1000];
610  |   FILE *infile;
611  |   size_t readoutput;
612  | 
613  |   if (compressed & COMPRESS_GZIP_MASK)
614  |     {
615  |       if (snprintf (filename, 999, "gunzip -c < %s.cpu%.4d.data",
616  | 		    basename, rank) > 999)
617  | 	SETERRQ1 (PETSC_ERR_ARG_SIZ,
618  | 		  "Base file name too long (%d chars, 960 max)",
619  | 		  strlen (basename));
620  |       if (!(infile = popen (filename, "r")))
621  | 	SETERRQ4 (PETSC_ERR_FILE_OPEN,
622  | 		  "CPU %d: error %d (%s) opening input pipe \"%s\"",
623  | 		  rank, errno, strerror (errno), filename);
624  |     }
625  |   else
626  |     {
627  |       if (snprintf (filename, 999, "%s.cpu%.4d.data", basename, rank) > 999)
628  | 	SETERRQ1 (PETSC_ERR_ARG_SIZ,
629  | 		  "Base file name too long (%d chars, 960 max)",
630  | 		  strlen (basename));
631  |       if (!(infile = fopen (filename, "r")))
632  | 	SETERRQ4 (PETSC_ERR_FILE_OPEN,
633  | 		  "CPU %d: error %d (%s) opening input file \"%s\"",
634  | 		  rank, errno, strerror (errno), filename);
635  |     }
636  | 
637  |   /* Read and store the data */
638  |   /* TODO: make this deal with float and complex */
639  |   switch (compressed & COMPRESS_INT_MASK)
640  |     {
641  |       /* Yes, this way of doing it is a bit redundant, but it seems better than
642  | 	 the multitude of subconditionals which would be required if I did it
643  | 	 the other way. */
644  |     case COMPRESS_INT_LONG:
645  |       {
646  | 	guint32 *storage;
647  | 	if (!(storage = (guint32 *) malloc
648  | 	      (gridpoints * dof * sizeof (guint32))))
649  | 	  SETERRQ (PETSC_ERR_MEM,
650  | 		   "Can't allocate data decompression buffer");
651  | 	readoutput = fread (storage, sizeof (guint32), gridpoints*dof, infile);
652  | 
653  | 	if (wrongendian)
654  | 	  {
655  | 	    for (i=0; i<gridpoints*dof; i++)
656  | 	      storage[i] = GUINT32_SWAP_LE_BE_CONSTANT (storage[i]);
657  | 	  }
658  | 	for (i=0; i<gridpoints*dof; i++)
659  | 	  globalarray[i] = fieldmin [i%dof] +
660  | 	    ((fieldmax [i%dof] - fieldmin [i%dof]) * storage[i]) / 4294967295.;
661  | 	free (storage);
662  | 	break;
663  |       }
664  |     case COMPRESS_INT_SHORT:
665  |       {
666  | 	guint16 *storage;
667  | 	if (!(storage = (guint16 *) malloc
668  | 	      (gridpoints * dof * sizeof (guint16))))
669  | 	  SETERRQ (PETSC_ERR_MEM,
670  | 		   "Can't allocate data decompression buffer");
671  | 	readoutput = fread (storage, sizeof (guint16), gridpoints*dof, infile);
672  | 
673  | 	if (wrongendian)
674  | 	  {
675  | 	    for (i=0; i<gridpoints*dof; i++)
676  | 	      storage[i] = GUINT16_SWAP_LE_BE_CONSTANT (storage[i]);
677  | 	  }
678  | 	for (i=0; i<gridpoints*dof; i++)
679  | 	  globalarray[i] = fieldmin [i%dof] +
680  | 	    ((fieldmax [i%dof] - fieldmin [i%dof]) * storage[i]) / 65535.;
681  | 	free (storage);
682  | 	break;
683  |       }
684  |     case COMPRESS_INT_CHAR:
685  |       {
686  | 	guint8 *storage;
687  | 	if (!(storage = (guint8 *) malloc
688  | 	      (gridpoints * dof * sizeof (guint8))))
689  | 	  SETERRQ (PETSC_ERR_MEM,
690  | 		   "Can't allocate data decompression buffer");
691  | 	readoutput = fread (storage, sizeof (guint8), gridpoints*dof, infile);
692  | 
693  | 	for (i=0; i<gridpoints*dof; i++)
694  | 	  globalarray[i] = fieldmin [i%dof] +
695  | 	    ((fieldmax [i%dof] - fieldmin [i%dof]) * storage[i]) / 255.;
696  | 	free (storage);
697  | 	break;
698  |       }
699  |     default:
700  |       {
701  | 	/* TODO: check for different size data, complex */
702  | 	readoutput = fread (globalarray, sizeof (PetscScalar), gridpoints*dof,
703  | 			    infile);
704  | 
705  | 	if (wrongendian)
706  | 	  {
707  | 	    if (sizeof (PetscScalar) == 8)
708  | 	      {
709  | 		for (i=0; i<gridpoints*dof; i++)
710  | 		  globalarray[i]= GUINT64_SWAP_LE_BE_CONSTANT (globalarray[i]);
711  | 	      }
712  | 	    else if (sizeof (PetscScalar) == 4)
713  | 	      {
714  | 		for (i=0; i<gridpoints*dof; i++)
715  | 		  globalarray[i]= GUINT32_SWAP_LE_BE_CONSTANT (globalarray[i]);
716  | 	      }
717  | 	    else
718  | 	      SETERRQ1 (PETSC_ERR_ARG_UNKNOWN_TYPE,
719  | 			"Unknown PetscScalar size %d (4-8 supported)",
720  | 			sizeof (PetscScalar));
721  | 	  }
722  | 
723  |       }
724  |     }
725  | 
726  |   if (compressed & COMPRESS_GZIP_MASK)
727  |     pclose (infile);
728  |   else
729  |     fclose (infile);
730  | 
731  |   if (readoutput < gridpoints*dof)
732  |     {
733  |       int readerr = ferror (infile);
734  |       SETERRQ5 (PETSC_ERR_FILE_READ,
735  | 		"CPU %d: error %d (%s) during read, got %d of %d items",
736  | 		rank, readerr, strerror (readerr), readoutput, gridpoints*dof);
737  |     }
738  | 
739  |   return 0;
740  | }
741  | 
742  | 
743  | #undef __FUNCT__
744  | #define __FUNCT__ "IlluMultiStoreXML"
745  | 
746  | /*++++++++++++++++++++++++++++++++++++++
747  |   This function opens, stores and closes the XML metadata file for IlluMulti
748  |   format storage.  It is called by IlluMultiSave().
749  | 
750  |   int IlluMultiStoreXML It returns zero or an error code.
751  | 
752  |   MPI_Comm comm MPI communicator, if NULL it uses PETSC_COMM_WORLD.
753  | 
754  |   char *basename Base file name.
755  | 
756  |   int rank CPU number to store data for.
757  | 
758  |   int compressed Data compression: if zero then no compression (fastest), 1-9
759  |   then gzip compression level, 10-15 then gzip --best.  If 16-31 then save
760  |   guint32s representing relative values between min and max for each field,
761  |   compressed according to this value minus 16.  Likewise for 32-47 and
762  |   guint16s, and 48-63 for guint8s.  Yes, these alternative formats lose
763  |   information and can't be used for accurate checkpointing, but they should
764  |   retain enough data for visualization (except perhaps for the guint8s, which
765  |   are possibly acceptable for vectors but certainly not contours).
766  | 
767  |   int dim Dimensionality of the space.
768  | 
769  |   int px Number of processors in the
770  |   +latex+$x$-direction.
771  |   +html+ <i>x</i>-direction.
772  | 
773  |   int py Number of processors in the
774  |   +latex+$y$-direction.
775  |   +html+ <i>y</i>-direction.
776  | 
777  |   int pz Number of processors in the
778  |   +latex+$z$-direction.
779  |   +html+ <i>z</i>-direction.
780  | 
781  |   int nx Number of grid points over the entire array in the
782  |   +latex+$x$-direction.
783  |   +html+ <i>x</i>-direction.
784  | 
785  |   int ny Number of grid points over the entire array in the
786  |   +latex+$y$-direction.
787  |   +html+ <i>y</i>-direction.
788  | 
789  |   int nz Number of grid points over the entire array in the
790  |   +latex+$z$-direction.
791  |   +html+ <i>z</i>-direction.
792  | 
793  |   PetscScalar wx Physical overall width in the
794  |   +latex+$x$-direction.
795  |   +html+ <i>x</i>-direction.
796  | 
797  |   PetscScalar wy Physical overall width in the
798  |   +latex+$y$-direction.
799  |   +html+ <i>y</i>-direction.
800  | 
801  |   PetscScalar wz Physical overall width in the
802  |   +latex+$z$-direction.
803  |   +html+ <i>z</i>-direction.
804  | 
805  |   int xm Number of grid points over the local part of the array in the
806  |   +latex+$x$-direction.
807  |   +html+ <i>x</i>-direction.
808  | 
809  |   int ym Number of grid points over the local part of the array in the
810  |   +latex+$y$-direction.
811  |   +html+ <i>y</i>-direction.
812  | 
813  |   int zm Number of grid points over the local part of the array in the
814  |   +latex+$z$-direction.
815  |   +html+ <i>z</i>-direction.
816  | 
817  |   int dof Degrees of freedom at each node, a.k.a. number of field variables.
818  | 
819  |   int sw Stencil width.
820  | 
821  |   int st Stencil type, given by the
822  |   +latex+{\tt PETSc enum} values.
823  |   +html+ <tt>PETSc enum</tt> values.
824  | 
825  |   int wrap Periodic type, given by the
826  |   +latex+{\tt PETSc enum} values.
827  |   +html+ <tt>PETSc enum</tt> values.
828  | 
829  |   char **fieldnames Names of the field variables.
830  | 
831  |   field_plot_type *fieldtypes Data (plot) types for field variables.
832  | 
833  |   PetscReal *fieldmin Minimum value of each field variable.
834  | 
835  |   PetscReal *fieldmax Maximum value of each field variable.
836  | 
837  |   int usermetacount Number of user metadata parameters.
838  | 
839  |   char **usermetanames User metadata parameter names.
840  | 
841  |   char **usermetadata User metadata parameter strings.
842  |   ++++++++++++++++++++++++++++++++++++++*/
843  | 
844  | static int IlluMultiStoreXML
845  | (MPI_Comm comm, char *basename, int rank, int compressed,
846  |  int dim, int px,int py,int pz, int nx,int ny,int nz,
847  |  PetscScalar wx,PetscScalar wy,PetscScalar wz, int xm,int ym,int zm,
848  |  int dof, int sw, int st, int wrap, char **fieldnames,
849  |  field_plot_type *fieldtypes, PetscReal *fieldmin, PetscReal *fieldmax,
850  |  int usermetacount, char **usermetanames, char **usermetadata)
851  | {
852  |   xmlDocPtr thedoc;
853  |   xmlNodePtr thenode;
854  |   FILE *outfile;
855  |   char filename [1000];
856  |   xmlChar buffer [1000];
857  |   int i;
858  | 
859  |   /*+The XML tags in the .meta file consist of:
860  |     +latex+\begin{center} \begin{tabular}{|l|l|} \hline
861  |     +html+ <center><table BORDER>
862  |     +latex+{\tt IlluMulti} & Primary tag, with attributes {\tt version},
863  |     +latex+\\ & {\tt endian} ({\tt big} or {\tt little}) and
864  |     +latex+{\tt compression} \\ & (none, 1-9, best, float*, long*, short* or
865  |     +latex+char*\footnote{longnone, long1-long9 or longbest compresses each
866  |     +latex+double to a 32-bit unsigned int, with 0 representing the minimum for
867  |     +latex+that field and 2$^{32}-1$ representing the maximum; likewise
868  |     +latex+shortnone, short1-short9, shortbest uses 16-bit unsigned ints, and
869  |     +latex+char* uses 8-bit unsigned ints.  float is there to indicate that
870  |     +latex+PetscScalar is 4 bytes at save time, loading should adjust
871  |     +latex+accordingly; that's not fully supported just yet.  At some point
872  |     +latex+I'll have to figure out how to represent complex.}). \\ \hline
873  |     +html+ <tr><td><tt>IlluMulti</tt></td><td>Primary tag, with attributes
874  |     +html+ <tt>version</tt>,<br><tt>endian</tt> (<tt>big</tt> or
875  |     +html+ <tt>little</tt>) and <tt>compression</tt><br>(none, 1-9, best;
876  |     +html+ long*, short* or char*)</td></tr>
877  |     +*/
878  | 
879  |   thedoc = xmlNewDoc (BAD_CAST "1.0");
880  |   thedoc->children = xmlNewDocNode (thedoc,NULL, BAD_CAST"IlluMulti", NULL);
881  |   xmlSetProp (thedoc->children, BAD_CAST "version", BAD_CAST "0.2");
882  | #ifdef WORDS_BIGENDIAN
883  |   xmlSetProp (thedoc->children, BAD_CAST "endian", BAD_CAST "big");
884  | #else
885  |   xmlSetProp (thedoc->children, BAD_CAST "endian", BAD_CAST "little");
886  | #endif
887  |   if ((compressed & COMPRESS_INT_MASK) == COMPRESS_INT_LONG)
888  |     strcpy ((char *) buffer, "long");
889  |   else if ((compressed & COMPRESS_INT_MASK) == COMPRESS_INT_SHORT)
890  |     strcpy ((char *) buffer, "short");
891  |   else if ((compressed & COMPRESS_INT_MASK) == COMPRESS_INT_CHAR)
892  |     strcpy ((char *) buffer, "char");
893  |   /* Note: this doesn't really support complex types yet... */
894  |   else if (sizeof (PetscScalar) == 4)
895  |     strcpy ((char *) buffer, "float");
896  |   else
897  |     *buffer = '\0';
898  |   if ((compressed & COMPRESS_GZIP_MASK) == 0)
899  |     strcat ((char *) buffer, "none");
900  |   else if ((compressed & COMPRESS_GZIP_MASK) >= 1 &&
901  | 	   (compressed & COMPRESS_GZIP_MASK) <= 9)
902  |     sprintf ((char *) buffer + strlen ((char *) buffer), "%d",
903  | 	     compressed & COMPRESS_GZIP_MASK);
904  |   else
905  |     strcat ((char *) buffer, "best");
906  |   xmlSetProp (thedoc->children, BAD_CAST "compression", buffer);
907  | 
908  |   /*+
909  |     +latex+{\tt GlobalCPUs} & Number of CPUs in each direction, with
910  |     +latex+\\ & attributes {\tt dimensions}, {\tt xwidth}, {\tt ywidth} and
911  |     +latex+{\tt zwidth} \\ \hline
912  |     +html+ <tr><td><tt>GlobalCPUs</tt></td><td>Number of CPUs in each
913  |     +html+ direction, with<br>attributes <tt>dimensions</tt>, <tt>xwidth</tt>,
914  |     +html+ <tt>ywidth</tt> and <tt>zwidth</tt></td></tr>
915  |     +*/
916  | 
917  |   thenode = xmlNewChild (thedoc->children, NULL, BAD_CAST "GlobalCPUs", NULL);
918  |   snprintf ((char *) buffer, 999, "%d", dim);
919  |   xmlSetProp (thenode, BAD_CAST "dimensions", buffer);
920  |   snprintf ((char *) buffer, 999, "%d", px);
921  |   xmlSetProp (thenode, BAD_CAST "xwidth", buffer);
922  |   if (dim > 1)
923  |     {
924  |       snprintf ((char *) buffer, 999, "%d", py);
925  |       xmlSetProp (thenode, BAD_CAST "ywidth", buffer);
926  |       if (dim > 2)
927  | 	{
928  | 	  snprintf ((char *) buffer, 999, "%d", pz);
929  | 	  xmlSetProp (thenode, BAD_CAST "zwidth", buffer);
930  | 	}
931  |     }
932  | 
933  |   /*+
934  |     +latex+{\tt GlobalSize} & Size of the entire distributed array, with
935  |     +latex+\\ & attributes {\tt xwidth}, {\tt ywidth},
936  |     +latex+{\tt zwidth}, {\tt xsize}**, {\tt ysize}**, {\tt zsize}** and
937  |     +latex+{\tt fields} \\ \hline
938  |     +html+ <tr><td><tt>GlobalSize</tt></td><td>Size of the entire distributed
939  |     +html+ array, with<br>attributes <tt>xwidth</tt>, <tt>ywidth</tt>,
940  |     +html+ <tt>zwidth</tt>, <tt>xsize</tt>**, <tt>ysize</tt>**,
941  |     +html+ <tt>zsize</tt>** and <tt>fields</tt></td></tr>
942  |     +*/
943  | 
944  |   thenode = xmlNewChild (thedoc->children, NULL, BAD_CAST "GlobalSize", NULL);
945  |   snprintf ((char *) buffer, 999, "%d", nx);
946  |   xmlSetProp (thenode, BAD_CAST "xwidth", buffer);
947  |   snprintf ((char *) buffer, 999, "%g", wx);
948  |   xmlSetProp (thenode, BAD_CAST "xsize", buffer);
949  |   if (dim > 1)
950  |     {
951  |       snprintf ((char *) buffer, 999, "%d", ny);
952  |       xmlSetProp (thenode, BAD_CAST "ywidth", buffer);
953  |       snprintf ((char *) buffer, 999, "%g", wy);
954  |       xmlSetProp (thenode, BAD_CAST "ysize", buffer);
955  |       if (dim > 2)
956  | 	{
957  | 	  snprintf ((char *) buffer, 999, "%d", nz);
958  | 	  xmlSetProp (thenode, BAD_CAST "zwidth", buffer);
959  | 	  snprintf ((char *) buffer, 999, "%g", wz);
960  | 	  xmlSetProp (thenode, BAD_CAST "zsize", buffer);
961  | 	}
962  |     }
963  |   snprintf ((char *) buffer, 999, "%d", dof);
964  |   xmlSetProp (thenode, BAD_CAST "fields", buffer);
965  | 
966  |   /*+
967  |     +latex+{\tt LocalSize} & Size of the entire local part of the array,
968  |     +latex+\\ & with attributes {\tt xwidth}, {\tt ywidth} and {\tt zwidth}
969  |     +latex+\\ \hline
970  |     +html+ <tr><td><tt>LocalSize</tt></td><td>Size of the local part of the
971  |     +html+ array, with<br>attributes <tt>xwidth</tt>, <tt>ywidth</tt> and
972  |     +html+ <tt>zwidth</tt></td></tr>
973  |     +*/
974  | 
975  |   thenode = xmlNewChild (thedoc->children, NULL, BAD_CAST "LocalSize", NULL);
976  |   snprintf ((char *) buffer, 999, "%d", xm);
977  |   xmlSetProp (thenode, BAD_CAST "xwidth", buffer);
978  |   if (dim > 1)
979  |     {
980  |       snprintf ((char *) buffer, 999, "%d", ym);
981  |       xmlSetProp (thenode, BAD_CAST "ywidth", buffer);
982  |       if (dim > 2)
983  | 	{
984  | 	  snprintf ((char *) buffer, 999, "%d", zm);
985  | 	  xmlSetProp (thenode, BAD_CAST "zwidth", buffer);
986  | 	}
987  |     }
988  | 
989  |   /*+
990  |     +latex+{\tt Stencil} & Stencil and periodic data, with attributes
991  |     +latex+\\ & {\tt width}, {\tt type} and {\tt periodic} (using {\tt PETSc
992  |     +latex+enum} values) \\ \hline
993  |     +html+ <tr><td><tt>Stencil</tt></td><td>Stencil and periodic data, with
994  |     +html+ attributes<br><tt>width</tt>, <tt>type</tt> and <tt>periodic</tt>
995  |     +html+ (using <tt>PETSc enum</tt> values)</td></tr>
996  |     +*/
997  | 
998  |   thenode = xmlNewChild (thedoc->children, NULL, BAD_CAST "Stencil", NULL);
999  |   snprintf ((char *) buffer, 999, "%d", sw);
1000 |   xmlSetProp (thenode, BAD_CAST "width", buffer);
1001 |   snprintf ((char *) buffer, 999, "%d", (int) st);
1002 |   xmlSetProp (thenode, BAD_CAST "type", buffer);
1003 |   snprintf ((char *) buffer, 999, "%d", (int) wrap);
1004 |   xmlSetProp (thenode, BAD_CAST "periodic", buffer);
1005 | 
1006 |   /*+
1007 |     +latex+{\tt Field} & Data on each field, attributes {\tt name},
1008 |     +latex+{\tt type}**, {\tt min} and {\tt max} \\ \hline
1009 |     +html+ <tr><td><tt>Field</tt></td><td>Data on each field, attributes
1010 |     +html+ <tt>name</tt>, <tt>type</tt>*, <tt>min</tt> and
1011 |     +html+ <tt>max</tt></td></tr>
1012 |     +*/
1013 | 
1014 |   for (i=0; i<dof; i++)
1015 |     {
1016 |       thenode = xmlNewChild (thedoc->children, NULL, BAD_CAST "Field", NULL);
1017 |       xmlSetProp (thenode, BAD_CAST "name", BAD_CAST fieldnames [i]);
1018 |       snprintf ((char *) buffer, 999, "0x%.2x", fieldtypes [i]);
1019 |       xmlSetProp (thenode, BAD_CAST "type", buffer);
1020 |       snprintf ((char *) buffer, 999, "%g", fieldmin [i]);
1021 |       xmlSetProp (thenode, BAD_CAST "min", buffer);
1022 |       snprintf ((char *) buffer, 999, "%g", fieldmax [i]);
1023 |       xmlSetProp (thenode, BAD_CAST "max", buffer);
1024 |     }
1025 | 
1026 |   /*+
1027 |     +latex+{\tt User} & User parameters, attributes {\tt name} and {\tt value}
1028 |     +latex+\\ \hline \end{tabular} \end{center}
1029 |     +html+ <tr><td><tt>User</tt></td><td>User parameters, attributes
1030 |     +html+ <tt>name</tt> and <tt>value</tt></td></tr></table></center>
1031 |     *Lossy compression to smaller data types.
1032 |     +latex+\par
1033 |     +html+ <br>
1034 |     **Represents new attribute for IlluMulti 0.2 file format.
1035 |     +*/
1036 | 
1037 |   for (i=0; i<usermetacount; i++)
1038 |     {
1039 |       thenode = xmlNewChild (thedoc->children, NULL, BAD_CAST "User", NULL);
1040 |       xmlSetProp (thenode, BAD_CAST "name", BAD_CAST usermetanames [i]);
1041 |       xmlSetProp (thenode, BAD_CAST "value", BAD_CAST usermetadata [i]);
1042 |     }
1043 | 
1044 |   if (snprintf ((char *)filename, 999, "%s.cpu%.4d.meta", basename, rank) >999)
1045 |     SETERRQ1 (PETSC_ERR_ARG_SIZ, "Base file name too long (%d chars, 960 max)",
1046 | 	      strlen (basename));
1047 |   if (!(outfile = fopen (filename, "w")))
1048 |     SETERRQ4 (PETSC_ERR_FILE_OPEN,
1049 | 	      "CPU %d: error %d (%s) opening output file \"%s\"",
1050 | 	      rank, errno, strerror (errno), filename);
1051 |   xmlDocDump (outfile, thedoc);
1052 |   xmlFreeDoc (thedoc);
1053 |   fclose(outfile);
1054 | 
1055 |   return 0;
1056 | }
1057 | 
1058 | 
1059 | #undef __FUNCT__
1060 | #define __FUNCT__ "IlluMultiStoreData"
1061 | 
1062 | /*++++++++++++++++++++++++++++++++++++++
1063 |   This function stores the data file.
1064 | 
1065 |   int IlluMultiStoreData It returns zero or an error code.
1066 | 
1067 |   MPI_Comm comm MPI communicator, if NULL it uses PETSC_COMM_WORLD.
1068 | 
1069 |   PetscScalar *globalarray Array from which to save the (local) data.
1070 | 
1071 |   char *basename Base file name.
1072 | 
1073 |   int rank CPU number to read data for.
1074 | 
1075 |   int compressed Data compression: if zero then no compression (fastest), 1-9
1076 |   then gzip compression level, 10-15 then gzip --best.  If 16-31 then save
1077 |   guint32s representing relative values between min and max for each field,
1078 |   compressed according to this value minus 16.  Likewise for 32-47 and
1079 |   guint16s, and 48-63 for guint8s.  Yes, these alternative formats lose
1080 |   information and can't be used for accurate checkpointing, but they should
1081 |   retain enough data for visualization (except perhaps for the guint8s, which
1082 |   are possibly acceptable for vectors but likely not contours).
1083 | 
1084 |   int gridpoints Number of gridpoints to store data for.
1085 | 
1086 |   int dof Degrees of freedom at each node, a.k.a. number of field variables.
1087 | 
1088 |   PetscScalar *fieldmin Minimum value of each field variable.
1089 | 
1090 |   PetscScalar *fieldmax Maximum value of each field variable.
1091 |   ++++++++++++++++++++++++++++++++++++++*/
1092 | 
1093 | static int IlluMultiStoreData
1094 | (MPI_Comm comm, PetscScalar *globalarray, char *basename, int rank,
1095 |  int compressed, int gridpoints, int dof, PetscScalar *fieldmin,
1096 |  PetscScalar *fieldmax)
1097 | {
1098 |   int i;
1099 |   char filename [1000];
1100 |   FILE *outfile;
1101 |   size_t writeout;
1102 | 
1103 |   if (compressed & COMPRESS_GZIP_MASK)
1104 |     {
1105 |       if ((compressed & COMPRESS_GZIP_MASK) >= 1 &&
1106 | 	  (compressed & COMPRESS_GZIP_MASK) <= 9)
1107 | 	{
1108 | 	  if (snprintf (filename, 999, "gzip -c -%d > %s.cpu%.4d.data",
1109 | 			compressed & COMPRESS_GZIP_MASK, basename, rank) > 999)
1110 | 	    SETERRQ1 (PETSC_ERR_ARG_SIZ,
1111 | 		      "Base file name too long (%d chars, 960 max)",
1112 | 		      strlen (basename));
1113 | 	}
1114 |       else
1115 | 	{
1116 | 	  if (snprintf (filename,999, "gzip -c --best > %s.cpu%.4d.data",
1117 | 			basename,rank) > 999)
1118 | 	    SETERRQ1 (PETSC_ERR_ARG_SIZ,
1119 | 		      "Base file name too long (%d chars, 960 max)",
1120 | 		      strlen (basename));
1121 | 	}
1122 |       if (!(outfile = popen (filename, "w")))
1123 | 	SETERRQ4 (PETSC_ERR_FILE_OPEN,
1124 | 		  "CPU %d: error %d (%s) opening output pipe \"%s\"",
1125 | 		  rank, errno, strerror (errno), filename);
1126 |     }
1127 |   else
1128 |     {
1129 |       if (snprintf (filename, 999, "%s.cpu%.4d.data", basename, rank) > 999)
1130 | 	SETERRQ1 (PETSC_ERR_ARG_SIZ,
1131 | 		  "Base file name too long (%d chars, 960 max)",
1132 | 		  strlen (basename));
1133 |       if (!(outfile = fopen (filename, "w")))
1134 | 	SETERRQ4 (PETSC_ERR_FILE_OPEN,
1135 | 		  "CPU %d: error %d (%s) opening output file \"%s\"",
1136 | 		  rank, errno, strerror (errno), filename);
1137 |     }
1138 | 
1139 |   switch (compressed & COMPRESS_INT_MASK)
1140 |     {
1141 |     case COMPRESS_INT_LONG:
1142 |       {
1143 | 	guint32 *storage;
1144 | 	if (!(storage = (guint32 *) malloc
1145 | 	      (gridpoints * dof * sizeof (guint32))))
1146 | 	  SETERRQ (PETSC_ERR_MEM, "Can't allocate data compression buffer");
1147 | 	for (i=0; i<gridpoints*dof; i++)
1148 | 	  storage [i] = (guint32)
1149 | 	    (4294967295. * (globalarray [i] - fieldmin [i%dof]) /
1150 | 	     (fieldmax [i%dof] - fieldmin [i%dof]));
1151 | 	writeout = fwrite (storage, sizeof (guint32), gridpoints*dof, outfile);
1152 | 	free (storage);
1153 | 	break;
1154 |       }
1155 |     case COMPRESS_INT_SHORT:
1156 |       {
1157 | 	guint16 *storage;
1158 | 	if (!(storage = (guint16 *) malloc
1159 | 	      (gridpoints * dof * sizeof (guint16))))
1160 | 	  SETERRQ (PETSC_ERR_MEM, "Can't allocate data compression buffer");
1161 | 	for (i=0; i<gridpoints*dof; i++)
1162 | 	  storage [i] = (guint16)
1163 | 	    (65535. * (globalarray [i] - fieldmin [i%dof]) /
1164 | 	     (fieldmax [i%dof] - fieldmin [i%dof]));
1165 | 	writeout = fwrite (storage, sizeof (guint16),  gridpoints*dof,outfile);
1166 | 	free (storage);
1167 | 	break;
1168 |       }
1169 |     case COMPRESS_INT_CHAR:
1170 |       {
1171 | 	guint8 *storage;
1172 | 	if (!(storage = (guint8 *) malloc
1173 | 	      (gridpoints * dof * sizeof (guint8))))
1174 | 	  SETERRQ (PETSC_ERR_MEM, "Can't allocate data compression buffer");
1175 | 	for (i=0; i<gridpoints*dof; i++)
1176 | 	  storage [i] = (guint8)
1177 | 	    (255. * (globalarray [i] - fieldmin [i%dof]) /
1178 | 	     (fieldmax [i%dof] - fieldmin [i%dof]));
1179 | 	writeout = fwrite (storage, sizeof (guint8),  gridpoints*dof, outfile);
1180 | 	free (storage);
1181 | 	break;
1182 |       }
1183 |     default:
1184 |       writeout = fwrite (globalarray, sizeof (PetscScalar), gridpoints*dof,
1185 | 			 outfile);
1186 |     }
1187 | 
1188 |   if (compressed & COMPRESS_GZIP_MASK)
1189 |     pclose (outfile);
1190 |   else
1191 |     fclose (outfile);
1192 | 
1193 |   if (writeout < gridpoints*dof)
1194 |     {
1195 |       int writerr = ferror (outfile);
1196 |       SETERRQ5 (PETSC_ERR_FILE_READ,
1197 | 		"CPU %d: error %d (%s) during write, sent %d of %d items",
1198 | 		rank, writerr, strerror (writerr), writeout, gridpoints*dof);
1199 |     }
1200 | 
1201 |   return 0;
1202 | }
1203 | 
1204 | 
1205 | #undef __FUNCT__
1206 | #define __FUNCT__ "checkagree"
1207 | 
1208 | /*++++++++++++++++++++++++++++++++++++++
1209 |   Ancillary routine for
1210 |   +latex+{\tt IlluMultiRead()}:
1211 |   +html+ <tt>IlluMultiRead()</tt>:
1212 |   checks agreement of parameters and reports disagreement if necessary.
1213 | 
1214 |   int checkagree Returns 0 if they agree, 1 otherwise.
1215 | 
1216 |   MPI_Comm comm MPI communicator, if NULL it uses PETSC_COMM_WORLD.
1217 | 
1218 |   int da Integer parameter from the existing DA.
1219 | 
1220 |   int file Integer parameter read from the file.
1221 | 
1222 |   char *parameter Parameter name for reporting.
1223 |   ++++++++++++++++++++++++++++++++++++++*/
1224 | 
1225 | static inline int checkagree (MPI_Comm comm, int da, int file, char *parameter)
1226 | {
1227 |   int ierr;
1228 | 
1229 |   if (da == file)
1230 |     return 0;
1231 |   SETERRQ3 (PETSC_ERR_ARG_OUTOFRANGE,
1232 | 	    "Disagreement in parameter %s: da has %d, file %d\n",
1233 | 	    parameter, da, file);
1234 | }
1235 | 
1236 | 
1237 | #undef __FUNCT__
1238 | #define __FUNCT__ "IlluMultiRead"
1239 | 
1240 | /*++++++++++++++++++++++++++++++++++++++
1241 |   This reads the data into an existing distributed array and vector, checking
1242 |   that the sizes are right etc.
1243 | 
1244 |   int IlluMultiRead It returns zero or an error code.
1245 | 
1246 |   MPI_Comm comm MPI communicator, if NULL it uses PETSC_COMM_WORLD.
1247 | 
1248 |   DA theda Distributed array object controlling the data to read.
1249 | 
1250 |   Vec X Vector into which to read the data.
1251 | 
1252 |   char *basename Base file name.
1253 | 
1254 |   int *usermetacount Pointer to an int where we put the number of user metadata
1255 |   parameters loaded.
1256 | 
1257 |   char ***usermetanames Pointer to a char ** where the loaded parameter names
1258 |   are stored.  This is
1259 |   +latex+{\tt malloc}ed by this function, so a call to {\tt free()}
1260 |   +html+ <tt>malloc</tt>ed by this function, so a call to <tt>free()</tt>
1261 |   is needed to free up its data.
1262 | 
1263 |   char ***usermetadata Pointer to a char ** where the loaded parameter strings
1264 |   are stored.  This is
1265 |   +latex+{\tt malloc}ed by this function, so a call to {\tt free()}
1266 |   +html+ <tt>malloc</tt>ed by this function, so a call to <tt>free()</tt>
1267 |   is needed to free up its data.
1268 |   ++++++++++++++++++++++++++++++++++++++*/
1269 | 
1270 | int IlluMultiRead
1271 | (MPI_Comm comm, DA theda, Vec X, char *basename,
1272 |  int *usermetacount, char ***usermetanames, char ***usermetadata)
1273 | {
1274 |   int dim, px,py,pz, nx,ny,nz, xm,ym,zm, dof, sw, size, rank, ierr;
1275 |   DAPeriodicType wrap, fwrap;
1276 |   DAStencilType st, fst;
1277 |   int fdim, fpx,fpy,fpz, fnx,fny,fnz, fxm,fym,fzm, fdof, fsw, fsize = 1;
1278 |   int compressed, wrongendian;
1279 |   char filename[1000], **fieldnames;
1280 |   FILE *infile;
1281 |   PetscScalar *globalarray, *fieldmin, *fieldmax;
1282 | 
1283 |   if (!comm)
1284 |     comm = PETSC_COMM_WORLD;
1285 | 
1286 |   /*+ First it gets the properties of the distributed array for comparison with
1287 |     the metadata. +*/
1288 |   ierr = DAGetInfo (theda, &dim, &nx,&ny,&nz, &px,&py,&pz, &dof, &sw, &wrap,
1289 | 		    &st); CHKERRQ (ierr);
1290 |   ierr = DAGetCorners (theda, PETSC_NULL,PETSC_NULL,PETSC_NULL, &xm,&ym,&zm);
1291 |   CHKERRQ (ierr);
1292 | 
1293 |   /*+ Next it parses the XML metadata file into the document tree, and reads
1294 |     its content into the appropriate structures, comparing parameters with
1295 |     those of the existing distributed array structure. +*/
1296 |   ierr = MPI_Comm_size (comm, &size); CHKERRQ (ierr);
1297 |   ierr = MPI_Comm_rank (comm, &rank); CHKERRQ (ierr);
1298 | 
1299 |   DPRINTF ("Parsing XML metadata file from basename %s\n", basename);
1300 |   if (ierr = IlluMultiParseXML
1301 |       (comm, basename, rank, &compressed, &wrongendian, &fdim, &fpx,&fpy,&fpz,
1302 |        &fnx,&fny,&fnz, PETSC_NULL,PETSC_NULL,PETSC_NULL, &fxm,&fym,&fzm, &fdof,
1303 |        &fsw, &fst, &fwrap, &fieldnames, PETSC_NULL, &fieldmin, &fieldmax,
1304 |        usermetacount, usermetanames, usermetadata))
1305 |     return ierr;
1306 | 
1307 |   /* The size>1 checks are because we support loading multiple CPUs' data into
1308 |      a 1-CPU distributed array, as long as the global dimensions are right. */
1309 |   DPRINTF ("Checking size agreement between DA and data to read\n",0);
1310 |   fsize = fpx * ((dim > 1) ? fpy : 1) * ((dim > 2) ? fpz : 1);
1311 |   if (ierr = checkagree (comm, dim, fdim, "dimensions")) return ierr;
1312 |   if (size > 1 || fsize == 1) {
1313 |     if (ierr = checkagree (comm, px, fpx, "cpu xwidth")) return ierr;
1314 |     if (dim > 1) {
1315 |       if (ierr = checkagree (comm, py, fpy, "cpu ywidth")) return ierr;
1316 |       if (dim > 2) {
1317 | 	if (ierr = checkagree (comm, pz, fpz, "cpu zwidth")) return ierr; }}}
1318 |   if (ierr = checkagree (comm, nx, fnx, "global xwidth")) return ierr;
1319 |   if (dim > 1) {
1320 |     if (ierr = checkagree (comm, ny, fny, "global ywidth")) return ierr;
1321 |     if (dim > 2) {
1322 |       if (ierr = checkagree (comm, nz, fnz, "global zwidth")) return ierr; }}
1323 |   if (size > 1 || fsize == 1) {
1324 |     if (ierr = checkagree (comm, xm, fxm, "local xwidth")) return ierr;
1325 |     if (dim > 1) {
1326 |       if (ierr = checkagree (comm, ym, fym, "local ywidth")) return ierr;
1327 |       if (dim > 2) {
1328 | 	if (ierr = checkagree (comm, zm, fzm, "local zwidth")) return ierr; }}}
1329 |   if (ierr = checkagree (comm, dof, fdof, "degrees of freedom")) return ierr;
1330 |   if (ierr = checkagree (comm, sw, fsw, "stencil width")) return ierr;
1331 |   if (ierr = checkagree (comm, (int)st, (int)fst, "stencil type")) return ierr;
1332 |   if (ierr = checkagree (comm, (int)wrap, (int)fwrap, "periodic type"))
1333 |     return ierr;
1334 | 
1335 |   /*+ Then it streams in the data in one big slurp. +*/
1336 |   ierr = VecGetArray (X, &globalarray); CHKERRQ (ierr);
1337 |   if (size > 1 || fsize == 1) /* Default condition */
1338 |     {
1339 |       DPRINTF ("Reading data\n",0);
1340 |       ierr = IlluMultiParseData
1341 | 	(comm, globalarray, basename, rank, compressed,
1342 | 	 xm * ((dim>1)?ym:1) * ((dim>2)?zm:1), dof, wrongendian, fieldmin,
1343 | 	 fieldmax);
1344 |       if (ierr)
1345 | 	{
1346 | 	  xm = VecRestoreArray (X, &globalarray); CHKERRQ (xm);
1347 | 	  return ierr;
1348 | 	}
1349 |     }
1350 |   else /* Getting distributed data into a single array */
1351 |     {
1352 |       int i,j,k, xs,ys,zs;
1353 |       PetscScalar *storage;
1354 | 
1355 |       /* Incrementally read in data to storage, store it in globalarray */
1356 |       /* First loop through the stored CPUs */
1357 |       for     (pz=0, zs=0; pz<((dim>2)?fpz:1); pz++, zs+=fzm)
1358 | 	for   (py=0, ys=0; py<((dim>1)?fpy:1); py++, ys+=fym)
1359 | 	  for (px=0, xs=0; px<fpx;            px++, xs+=fxm, rank++)
1360 | 	    {
1361 | 	      int gridpoints;
1362 | 
1363 | 	      DPRINTF ("Parsing XML metadata file for CPU %d\n", rank);
1364 | 	      if (ierr = IlluMultiParseXML
1365 | 		  (comm, basename, rank, &compressed, &wrongendian, &fdim,
1366 | 		   &fpx,&fpy,&fpz, &fnx,&fny,&fnz,
1367 | 		   PETSC_NULL,PETSC_NULL,PETSC_NULL,
1368 | 		   &fxm,&fym,&fzm, &fdof, &fsw, &fst, &fwrap,
1369 | 		   &fieldnames, PETSC_NULL, &fieldmin, &fieldmax,
1370 | 		   usermetacount, usermetanames, usermetadata))
1371 | 		return ierr;
1372 | 	      gridpoints = fxm * ((dim>1)?fym:1) * ((dim>2)?fzm:1);
1373 | 
1374 | 	      /* This allocate/read/copy/free storage business is annoying.
1375 | 		 Replacing it with "zero-copy" would involve changing the
1376 | 		 IlluMultiParseData() API to allow loading into part of the
1377 | 		 local array.  But I'll leave that for future expansion, when
1378 | 		 this will be able to do arbitrary m->n CPU loading as long as
1379 | 		 the global sizes match. */
1380 | 	      storage = (PetscScalar *) malloc
1381 | 		(gridpoints * dof * sizeof (PetscScalar));
1382 | 	      DPRINTF ("Reading data for CPU %d\n", rank);
1383 | 	      ierr = IlluMultiParseData
1384 | 		(comm, storage, basename, rank, compressed, gridpoints, dof,
1385 | 		 wrongendian, fieldmin, fieldmax);
1386 | 
1387 | 	      fxm *= dof; /* so fxm is the number of PetscScalars to copy */
1388 | 	      i=1;
1389 | 	      /* Now distribute */
1390 | 	      for (k=0; k<((dim>2)?fzm:1); k++)
1391 | 		for (j=0; j<((dim>1)?fym:1); j++)
1392 | 		  BLAScopy_ (&fxm, storage + (k*fym + j)*fxm /* *dof */, &i,
1393 | 			   globalarray + (((zs+k)*ny + ys+j)*nx + xs)*dof, &i);
1394 | 
1395 | 	      free (storage);
1396 | 	      fxm /= dof;
1397 | 	    }
1398 |     }
1399 | 
1400 |   ierr = VecRestoreArray (X, &globalarray); CHKERRQ (ierr);
1401 | 
1402 |   return 0;
1403 | }
1404 | 
1405 | 
1406 | #undef __FUNCT__
1407 | #define __FUNCT__ "IlluMultiLoad"
1408 | 
1409 | /*++++++++++++++++++++++++++++++++++++++
1410 |   This creates a new distributed array of the appropriate size and loads the
1411 |   data into the vector contained in it (as retrieved by
1412 |   +latex+{\tt DAGetVector()}).
1413 |   +html+ <tt>DAGetVector()</tt>).
1414 |   It also reads the user metadata parameters into arrays stored at the supplied
1415 |   pointers.
1416 | 
1417 |   int IlluMultiLoad It returns zero or an error code.
1418 | 
1419 |   MPI_Comm comm MPI communicator, if NULL it uses PETSC_COMM_WORLD.
1420 | 
1421 |   char *basename Base file name.
1422 | 
1423 |   DA *theda Pointer to a DA object (to be created by this function).
1424 | 
1425 |   PetscScalar *wx Physical overall width in the
1426 |   +latex+$x$-direction.
1427 |   +html+ <i>x</i>-direction.
1428 | 
1429 |   PetscScalar *wy Physical overall width in the
1430 |   +latex+$y$-direction.
1431 |   +html+ <i>y</i>-direction.
1432 | 
1433 |   PetscScalar *wz Physical overall width in the
1434 |   +latex+$z$-direction.
1435 |   +html+ <i>z</i>-direction.
1436 | 
1437 |   field_plot_type **fieldtypes Data (plot) types for field variables.
1438 | 
1439 |   int *usermetacount Pointer to an int where we put the number of user metadata
1440 |   parameters loaded.
1441 | 
1442 |   char ***usermetanames Pointer to a char ** where the loaded parameter names
1443 |   are stored.  This is
1444 |   +latex+{\tt malloc}ed by this function, so a call to {\tt free()}
1445 |   +html+ <tt>malloc</tt>ed by this function, so a call to <tt>free()</tt>
1446 |   is needed to free up its data.
1447 | 
1448 |   char ***usermetadata Pointer to a char ** where the loaded parameter strings
1449 |   are stored.  This is
1450 |   +latex+{\tt malloc}ed by this function, so a call to {\tt free()}
1451 |   +html+ <tt>malloc</tt>ed by this function, so a call to <tt>free()</tt>
1452 |   is needed to free up its data.
1453 |   ++++++++++++++++++++++++++++++++++++++*/
1454 | 
1455 | int IlluMultiLoad
1456 | (MPI_Comm comm, char *basename, DA *theda, PetscScalar *wx,PetscScalar *wy,
1457 |  PetscScalar *wz, field_plot_type **fieldtypes, int *usermetacount,
1458 |  char ***usermetanames, char ***usermetadata)
1459 | {
1460 |   int dim, px,py,pz, nx,ny,nz, dof, sw, fxm,fym,fzm, rank,size, ierr;
1461 |   int compressed, wrongendian, fpx,fpy,fpz, fsize, xm,ym,zm;
1462 |   DAPeriodicType wrap;
1463 |   DAStencilType st;
1464 |   char filename[1000], **fieldnames;
1465 |   xmlChar *buffer;
1466 |   FILE *infile;
1467 |   xmlDocPtr thedoc;
1468 |   xmlNodePtr thenode;
1469 |   PetscScalar *globalarray, *fieldmin, *fieldmax;
1470 |   Vec X;
1471 | 
1472 |   if (!comm)
1473 |     comm = PETSC_COMM_WORLD;
1474 | 
1475 |   ierr = MPI_Comm_rank (comm, &rank); CHKERRQ (ierr);
1476 |   ierr = MPI_Comm_size (comm, &size); CHKERRQ (ierr);
1477 | 
1478 |   /*+ First it gets the parameters from the XML file. +*/
1479 |   DPRINTF ("Parsing XML metadata file from basename %s\n", basename);
1480 |   if (ierr = IlluMultiParseXML
1481 |       (comm, basename, rank, &compressed, &wrongendian, &dim, &fpx,&fpy,&fpz,
1482 |        &nx,&ny,&nz, wx,wy,wz, &fxm,&fym,&fzm, &dof, &sw, &st, &wrap,
1483 |        &fieldnames, fieldtypes, &fieldmin, &fieldmax,
1484 |        usermetacount, usermetanames, usermetadata))
1485 |     return ierr;
1486 | 
1487 |   fsize = fpx * ((dim > 1) ? fpy : 1) * ((dim > 2) ? fpz : 1);
1488 |   if (size == fsize) { /* Default condition: n->n */
1489 |     px = fpx; py = fpy; pz = fpz; }
1490 |   else if (size == 1)  /* Special condition: n->1 */
1491 |     px = py = pz = 1;
1492 |   else                 /* Error: incorrect number of CPUs */
1493 |     SETERRQ5 (PETSC_ERR_ARG_SIZ,
1494 | 	      "Incorrect CPU count: current %d, stored %dx%dx%d=%d\n",
1495 | 	      size, fpx,fpy,fpz, fsize);
1496 | 
1497 |   /*+ Next it creates a distributed array based on those parameters, and sets
1498 |     the names of its fields. +*/
1499 |   switch (dim)
1500 |     {
1501 |     case 1:
1502 |       {
1503 | 	DPRINTF ("Creating %d-%d 1-D DA\n", nx, dof);
1504 | 	ierr = DACreate1d (comm, wrap, nx, dof, sw, PETSC_NULL,
1505 | 			   theda); CHKERRQ (ierr);
1506 | 	break;
1507 |       }
1508 |     case 2:
1509 |       {
1510 | 	DPRINTF ("Creating %dx%d-%d 2-D DA\n", nx,ny, dof);
1511 | 	ierr = DACreate2d (comm, wrap, st, nx,ny, px,py, dof, sw,
1512 | 			   PETSC_NULL, PETSC_NULL, theda); CHKERRQ (ierr);
1513 | 	break;
1514 |       }
1515 |     case 3:
1516 |       {
1517 | 	DPRINTF ("Creating %dx%dx%d-%d 3-D DA\n", nx,ny,nz, dof);
1518 | 	ierr = DACreate3d (comm, wrap, st, nx,ny,nz, px,py,pz, dof,
1519 | 			   sw, PETSC_NULL, PETSC_NULL, PETSC_NULL, theda);
1520 | 	CHKERRQ (ierr);
1521 | 	break;
1522 |       }
1523 |     default:
1524 |       SETERRQ1 (PETSC_ERR_ARG_OUTOFRANGE,
1525 | 		"Unsupported number of dimensions %d\n", dim);
1526 |     }
1527 | 
1528 |   ierr = DAGetCorners (*theda, PETSC_NULL, PETSC_NULL, PETSC_NULL,
1529 | 		       &xm,&ym,&zm); CHKERRQ (ierr);
1530 |   if(size==fsize && (xm != fxm || ym != fym || zm != fzm))
1531 |     SETERRQ6 (PETSC_ERR_ARG_SIZ,
1532 | 	      "Local array size mismatch: DA %dx%dx%d, stored %dx%dx%d\n",
1533 | 	      xm,ym,zm, fxm,fym,fzm);
1534 | 
1535 |   for (px=0; px<dof; px++)
1536 |     DASetFieldName (*theda, px, fieldnames [px]);
1537 | 
1538 |   /*+ Then it streams the data into the distributed array's vector in one big
1539 |     slurp. +*/
1540 |   DPRINTF ("Getting global vector and array\n",0);
1541 |   ierr = DAGetGlobalVector (*theda, &X); CHKERRQ (ierr);
1542 |   ierr = VecGetArray (X, &globalarray); CHKERRQ (ierr);
1543 |   if (size > 1 || fsize == 1) /* Default condition */
1544 |     {
1545 |       DPRINTF ("Loading data in parallel\n",0);
1546 |       ierr = IlluMultiParseData
1547 | 	(comm, globalarray, basename, rank, compressed,
1548 | 	 fxm * ((dim>1)?fym:1) * ((dim>2)?fzm:1), dof, wrongendian, fieldmin,
1549 | 	 fieldmax);
1550 |       if (ierr)
1551 | 	{
1552 | 	  fxm = VecRestoreArray (X, &globalarray); CHKERRQ (fxm);
1553 | 	  return ierr;
1554 | 	}
1555 |     }
1556 |   else /* Getting distributed data into a single array */
1557 |     {
1558 |       int i,j,k, xs,ys,zs;
1559 |       PetscScalar *storage;
1560 | 
1561 |       /* Incrementally read in data to storage, store it in globalarray */
1562 |       /* First loop through the stored CPUs */
1563 |       DPRINTF ("Loading data into a single array on 1 CPU\n",0);
1564 |       for     (pz=0, zs=0; pz<((dim>2)?fpz:1); pz++, zs+=fzm)
1565 | 	for   (py=0, ys=0; py<((dim>1)?fpy:1); py++, ys+=fym)
1566 | 	  for (px=0, xs=0; px<fpx;             px++, xs+=fxm, rank++)
1567 | 	    {
1568 | 	      int gridpoints, fdim, fnx,fny,fnz, fdof, fsw;
1569 | 	      DAPeriodicType fwrap;
1570 | 	      DAStencilType fst;
1571 | 
1572 | 	      if (ierr = IlluMultiParseXML
1573 | 		  (comm, basename, rank, &compressed, &wrongendian, &fdim,
1574 | 		   &fpx,&fpy,&fpz, &fnx,&fny,&fnz,
1575 | 		   PETSC_NULL,PETSC_NULL,PETSC_NULL,
1576 | 		   &fxm,&fym,&fzm, &fdof, &fsw, &fst, &fwrap,
1577 | 		   &fieldnames, PETSC_NULL, &fieldmin, &fieldmax,
1578 | 		   usermetacount, usermetanames, usermetadata))
1579 | 		return ierr;
1580 | 	      gridpoints = fxm * ((dim>1)?fym:1) * ((dim>2)?fzm:1);
1581 | 
1582 | 	      /* This allocate/read/copy/free storage business is annoying.
1583 | 		 Replacing it with "zero-copy" would involve changing the
1584 | 		 IlluMultiParseData() API to allow loading into part of the
1585 | 		 local array.  But I'll leave that for future expansion, when
1586 | 		 this will be able to do arbitrary m->n CPU loading as long as
1587 | 		 the global sizes match. */
1588 | 	      storage = (PetscScalar *) malloc
1589 | 		(gridpoints * dof * sizeof (PetscScalar));
1590 | 	      ierr = IlluMultiParseData
1591 | 		(comm, storage, basename, rank, compressed, gridpoints, dof,
1592 | 		 wrongendian, fieldmin, fieldmax);
1593 | 
1594 | 	      fxm *= dof; /* so fxm is the number of PetscScalars to copy */
1595 | 	      i=1;
1596 | 	      /* Now distribute */
1597 | 	      for (k=0; k<((dim>2)?fzm:1); k++)
1598 | 		for (j=0; j<((dim>1)?fym:1); j++)
1599 | 		  BLAScopy_ (&fxm, storage + (k*fym + j)*fxm /* *dof */, &i,
1600 | 			   globalarray + (((zs+k)*ny + ys+j)*nx + xs)*dof, &i);
1601 | 
1602 | 	      free (storage);
1603 | 	      fxm /= dof;
1604 | 	    }
1605 |     }
1606 | 
1607 |   DPRINTF ("Done.\n",0);
1608 |   ierr = VecRestoreArray (X, &globalarray); CHKERRQ (ierr);
1609 |   ierr = DARestoreGlobalVector (*theda, &X); CHKERRQ (ierr);
1610 | 
1611 |   return 0;
1612 | }
1613 | 
1614 | 
1615 | #undef __FUNCT__
1616 | #define __FUNCT__ "IlluMultiSave"
1617 | 
1618 | /*++++++++++++++++++++++++++++++++++++++
1619 |   This saves the vector X in multiple files, two per process.
1620 | 
1621 |   int IlluMultiSave it returns zero or an error code.
1622 | 
1623 |   MPI_Comm comm MPI communicator, if NULL it uses PETSC_COMM_WORLD.
1624 | 
1625 |   DA theda Distributed array object controlling data saved.
1626 | 
1627 |   Vec X Vector whose data are actually being saved.
1628 | 
1629 |   char *basename Base file name.
1630 | 
1631 |   int usermetacount Number of user metadata parameters.
1632 | 
1633 |   char **usermetanames User metadata parameter names.
1634 | 
1635 |   char **usermetadata User metadata parameter strings.
1636 | 
1637 |   int compressed Data compression: if zero then no compression (fastest), 1-9
1638 |   then gzip compression level, 10-15 then gzip --best.  If 16-31 then save
1639 |   guint32s representing relative values between min and max for each field,
1640 |   compressed according to this value minus 16.  Likewise for 32-47 and
1641 |   guint16s, and 48-63 for guint8s.  Yes, these alternative formats lose
1642 |   information and can't be used for accurate checkpointing, but they should
1643 |   retain enough data for visualization (except perhaps for the guint8s, which
1644 |   are possibly acceptable for vectors but certainly not contours).
1645 |   ++++++++++++++++++++++++++++++++++++++*/
1646 | 
1647 | int IlluMultiSave
1648 | (MPI_Comm comm, DA theda, Vec X, char *basename, PetscScalar wx,PetscScalar wy,
1649 |  PetscScalar wz, field_plot_type *fieldtypes, int usermetacount,
1650 |  char **usermetanames, char **usermetadata, int compressed)
1651 | {
1652 |   int dim, nx,ny,nz, px,py,pz, dof, sw, xs,ys,zs, xm,ym,zm, rank, i, ierr;
1653 |   DAPeriodicType wrap;
1654 |   DAStencilType st;
1655 |   FILE *outfile;
1656 |   char filename[1000], **fieldnames;
1657 |   PetscReal *fieldmin, *fieldmax;
1658 |   PetscScalar *globalarray;
1659 |   guint64 *array64;
1660 |   guint32 *array32;
1661 |   guint16 *array16;
1662 |   guint8 *array8;
1663 | 
1664 |   if (!comm)
1665 |     comm = PETSC_COMM_WORLD;
1666 | 
1667 |   /*+First a check to verify a supported value of
1668 |     +latex+{\tt compressed},
1669 |     +html+ <tt>compressed</tt>,
1670 |     but no fancy guint* compression for complex!
1671 |     +*/
1672 | #ifdef PETSC_USE_COMPLEX
1673 |   if (compressed < 0 || compressed > COMPRESS_GZIP_MASK)
1674 |     SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Unsupported compression type code 0x%x",
1675 | 	     compressed);
1676 | #else
1677 |   if (compressed < 0 || compressed > (COMPRESS_GZIP_MASK | COMPRESS_INT_MASK))
1678 |     SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Unsupported compression type code 0x%x",
1679 | 	     compressed);
1680 | #endif
1681 | 
1682 |   /*+Then get the distributed array parameters and processor number, and store
1683 |     all this data in the XML .meta file. +*/
1684 |   ierr = DAGetInfo (theda, &dim, &nx,&ny,&nz, &px,&py,&pz, &dof, &sw, &wrap,
1685 | 		    &st); CHKERRQ (ierr);
1686 |   ierr = DAGetCorners (theda, &xs,&ys,&zs, &xm,&ym,&zm); CHKERRQ (ierr);
1687 |   ierr = MPI_Comm_rank (comm, &rank); CHKERRQ (ierr);
1688 | 
1689 |   if (!(fieldnames = (char **) malloc (dof * sizeof (char *))))
1690 |     SETERRQ (PETSC_ERR_MEM, "Can't allocate field names");
1691 |   if (!(fieldmin = (PetscScalar *) malloc (2 * dof * sizeof (PetscScalar))))
1692 |     SETERRQ (PETSC_ERR_MEM, "Can't allocate field params");
1693 |   fieldmax = fieldmin + dof;
1694 |   for (i=0; i<dof; i++)
1695 |     {
1696 |       ierr = DAGetFieldName (theda, i, fieldnames + i); CHKERRQ (ierr);
1697 |       ierr = VecStrideMin (X, i, PETSC_NULL, fieldmin + i); CHKERRQ (ierr);
1698 |       ierr = VecStrideMax (X, i, PETSC_NULL, fieldmax + i); CHKERRQ (ierr);
1699 |     }
1700 | 
1701 |   if (ierr = IlluMultiStoreXML
1702 |       (comm, basename, rank, compressed, dim, px,py,pz, nx,ny,nz, wx,wy,wz,
1703 |        xm,ym,zm, dof, sw, (int) st, (int) wrap, fieldnames,fieldtypes,
1704 |        fieldmin,fieldmax, usermetacount, usermetanames, usermetadata))
1705 |     return ierr;
1706 | 
1707 |   /*+ Finally, the data just stream out to the data file or gzip pipe in one
1708 |     big lump. +*/
1709 |   ierr = VecGetArray (X, &globalarray); CHKERRQ (ierr);
1710 |   IlluMultiStoreData (comm, globalarray, basename, rank, compressed,
1711 | 		      xm * ((dim>1)?ym:1) * ((dim>2)?zm:1), dof,
1712 | 		      fieldmin, fieldmax);
1713 |   ierr = VecRestoreArray (X, &globalarray); CHKERRQ (ierr);
1714 | 
1715 |   free (fieldmin);
1716 |   free (fieldnames);
1717 | 
1718 |   return 0;
1719 | }