1    | /***************************************
2    |   $Header: /cvsroot/petscgraphics/illumulti.c,v 1.34 2006/01/27 03:04:12 hazelsct Exp $
3    | 
4    |   This file contains the functions
5    |   +latex+{\tt IlluMultiSave()}, {\tt IlluMultiLoad()} and {\tt IlluMultiRead()}
6    |   +html+ <tt>IlluMultiSave()</tt>, <tt>IlluMultiLoad()</tt> and
7    |   +html+ <tt>IlluMultiRead()</tt>
8    |   designed to handle distributed storage and retrieval of data on local drives
9    |   of machines in a Beowulf cluster.  This should allow rapid loading of
10   |   timestep data for "playback" from what is essentially a giant RAID-0 array of
11   |   distributed disks, at enormously higher speeds than via NFS from a hard drive
12   |   or RAID array on the head node.  The danger of course is that if one node's
13   |   disk goes down, you don't have a valid data set any more, but that's the
14   |   nature of RAID-0, right?
15   |   +latex+\par
16   |   +html+ <p>
17   |   The filenames saved are:
18   |   +latex+\begin{itemize} \item {\tt $<$basename$>$.cpu\#\#\#\#.meta}, where
19   |   +latex+{\tt \#\#\#\#}
20   |   +html+ <ul><li><tt>&lt;basename&gt.cpu####.meta</tt>, where <tt>####</tt>
21   |   is replaced by the CPU number (more than four digits if there are more than
22   |   9999 CPUs :-), which has the metadata for the whole thing in XML format
23   |   (written by
24   |   +latex+{\tt GNOME libxml}),
25   |   +html+ <tt>GNOME libxml</tt>),
26   |   as described in the notes on the
27   |   +latex+{\tt IlluMultiStoreXML()} function.
28   |   +html+ <tt>IlluMultiStoreXML()</tt> function.
29   |   +latex+\item {\tt $<$basename$>$.cpu\#\#\#\#.data}
30   |   +html+ </li><li><tt>&lt;basename&gt.cpu####.data</tt>
31   |   which is simply a stream of the raw data, optionally compressed by
32   |   +latex+{\tt gzip}. \end{itemize}
33   |   +html+ <tt>gzip</tt>.</li></ul>
34   |   If one were saving timesteps, one might include a timestep number in the
35   |   basename, and also timestep and simulation time in the metadata.  The
36   |   metadata can also hold simulation parameters, etc.
37   |   +latex+\par
38   |   +html+ <p>
39   |   This supports 1-D, 2-D and 3-D distributed arrays.  As an extra feature, you
40   |   can load a multi-CPU distributed array scattered over lots of files into a
41   |   single CPU, to facilitate certain modes of data visualization.
42   | ***************************************/
43   | 
44   | 
45   | #include "illuminator.h"     /* Also includes petscda.h */
46   | #include <glib.h>            /* for guint*, GUINT*_SWAP_LE_BE */
47   | #include <petscblaslapack.h> /* for BLAScopy_() */
48   | #include <libxml/tree.h>     /* To build the XML tree to store */
49   | #include <libxml/parser.h>   /* XML parsing */
50   | #include <stdio.h>           /* FILE, etc. */
51   | #include <errno.h>           /* errno */
52   | #include <string.h>          /* strncpy(), etc. */
53   | #include <stdlib.h>          /* malloc() and free() */
54   | 
55   | /* Build with -DDEBUG for debugging output */
56   | #undef DPRINTF
57   | #ifdef DEBUG
58   | #define DPRINTF(fmt, args...) ierr = PetscPrintf (comm, "%s: " fmt, __FUNCT__, args); CHKERRQ (ierr)
59   | #else
60   | #define DPRINTF(fmt, args...)
61   | #endif
62   | 
63   | 
64   | #undef __FUNCT__
65   | #define __FUNCT__ "IlluMultiParseXML"
66   | 
67   | /*++++++++++++++++++++++++++++++++++++++
68   |   This function reads in the XML metadata document and returns the various
69   |   parameter values in the addresses pointed to by the arguments.  It is called
70   |   by IlluMultiLoad() and IlluMultiRead().
71   | 
72   |   int IlluMultiParseXML It returns zero or an error code.
73   | 
74   |   MPI_Comm comm MPI communicator, if NULL it uses PETSC_COMM_WORLD.
75   | 
76   |   char *basename Base file name.
77   | 
78   |   int rank CPU number to read data for.
79   | 
80   |   int *compressed Data compression: if zero then no compression (fastest), 1-9
81   |   then gzip compression level, 10-15 then gzip --best.  If 16-31 then save
82   |   guint32s representing relative values between min and max for each field,
83   |   compressed according to this value minus 16.  Likewise for 32-47 and
84   |   guint16s, and 48-63 for guint8s.  Yes, these alternative formats lose
85   |   information and can't be used for accurate checkpointing, but they should
86   |   retain enough data for visualization (except perhaps for the guint8s, which
87   |   are possibly acceptable for vectors but likely not contours).
88   | 
89   |   int *wrongendian Tells whether the data are stored in the opposite endian
90   |   format from this platform, and thus must be switched when the data are
91   |   streamed in.
92   | 
93   |   int *dim Dimensionality of the space.
94   | 
95   |   int *px Number of processors in the
96   |   +latex+$x$-direction.
97   |   +html+ <i>x</i>-direction.
98   | 
99   |   int *py Number of processors in the
100  |   +latex+$y$-direction.
101  |   +html+ <i>y</i>-direction.
102  | 
103  |   int *pz Number of processors in the
104  |   +latex+$z$-direction.
105  |   +html+ <i>z</i>-direction.
106  | 
107  |   int *nx Number of grid points over the entire array in the
108  |   +latex+$x$-direction.
109  |   +html+ <i>x</i>-direction.
110  | 
111  |   int *ny Number of grid points over the entire array in the
112  |   +latex+$y$-direction.
113  |   +html+ <i>y</i>-direction.
114  | 
115  |   int *nz Number of grid points over the entire array in the
116  |   +latex+$z$-direction.
117  |   +html+ <i>z</i>-direction.
118  | 
119  |   PetscScalar *wx Physical overall width in the
120  |   +latex+$x$-direction, {\tt PETSC\_NULL} if not needed.
121  |   +html+ <i>x</i>-direction, <tt>PETSC_NULL</tt> if not needed.
122  | 
123  |   PetscScalar *wy Physical overall width in the
124  |   +latex+$y$-direction, {\tt PETSC\_NULL} if not needed.
125  |   +html+ <i>y</i>-direction, <tt>PETSC_NULL</tt> if not needed.
126  | 
127  |   PetscScalar *wz Physical overall width in the
128  |   +latex+$z$-direction, {\tt PETSC\_NULL} if not needed.
129  |   +html+ <i>z</i>-direction, <tt>PETSC_NULL</tt> if not needed.
130  | 
131  |   int *xm Number of grid points over the local part of the array in the
132  |   +latex+$x$-direction.
133  |   +html+ <i>x</i>-direction.
134  | 
135  |   int *ym Number of grid points over the local part of the array in the
136  |   +latex+$y$-direction.
137  |   +html+ <i>y</i>-direction.
138  | 
139  |   int *zm Number of grid points over the local part of the array in the
140  |   +latex+$z$-direction.
141  |   +html+ <i>z</i>-direction.
142  | 
143  |   int *dof Degrees of freedom at each node, a.k.a. number of field variables.
144  | 
145  |   int *sw Stencil width.
146  | 
147  |   DAStencilType *st Stencil type, given by the
148  |   +latex+{\tt PETSc enum} values.
149  |   +html+ <tt>PETSc enum</tt> values.
150  | 
151  |   DAPeriodicType *wrap Periodic type, given by the
152  |   +latex+{\tt PETSc enum} values.
153  |   +html+ <tt>PETSc enum</tt> values.
154  | 
155  |   char ***fieldnames Names of the field variables.
156  | 
157  |   field_plot_type **fieldtypes Data (plot) types for field variables,
158  |   +latex+{\tt PETSC\_NULL} if not needed.
159  |   +html+ <tt>PETSC_NULL</tt> if not needed.
160  | 
161  |   PetscScalar **fieldmin Minimum value of each field variable.
162  | 
163  |   PetscScalar **fieldmax Maximum value of each field variable.
164  | 
165  |   int *usermetacount Number of user metadata parameters.
166  | 
167  |   char ***usermetanames User metadata parameter names.
168  | 
169  |   char ***usermetadata User metadata parameter strings.
170  |   ++++++++++++++++++++++++++++++++++++++*/
171  | 
172  | static int IlluMultiParseXML
173  | (MPI_Comm comm, char *basename, int rank, int *compressed, int *wrongendian,
174  |  int *dim, int *px,int *py,int *pz, int *nx,int *ny,int *nz,
175  |  PetscScalar *wx,PetscScalar *wy,PetscScalar *wz, int *xm,int *ym,int *zm,
176  |  int *dof, int *sw, DAStencilType *st, DAPeriodicType *wrap,
177  |  char ***fieldnames, field_plot_type **fieldtypes, PetscScalar **fieldmin,
178  |  PetscScalar **fieldmax,
179  |  int *usermetacount, char ***usermetanames, char ***usermetadata)
180  | {
181  |   xmlDocPtr thedoc;
182  |   xmlNodePtr thenode;
183  |   char filename [1000];
184  |   xmlChar *buffer, *version;
185  |   int field=0, ierr;
186  | 
187  |   if (snprintf (filename, 999, "%s.cpu%.4d.meta", basename, rank) > 999)
188  |     return 1;
189  | 
190  |   DPRINTF ("Parsing XML file %s\n", filename);
191  |   thedoc = xmlParseFile (filename);
192  |   if (thedoc == NULL)
193  |     IllErrorHandler (PETSC_ERR_FILE_OPEN, "xmlParseFile returned NULL\n");
194  | 
195  |   version = xmlGetProp (thedoc -> children, BAD_CAST "version");
196  |   if (strncmp ((char *) version, "0.1", 3) &&
197  |       strncmp ((char *) version, "0.2", 3))
198  |     {
199  |       free (version);
200  |       IllErrorHandler (PETSC_ERR_FILE_UNEXPECTED,
201  | 		       "Version is neither 0.1 nor 0.2, can't parse\n");
202  |     }
203  | 
204  |   *wrongendian = 0;
205  |   buffer = xmlGetProp (thedoc -> children, BAD_CAST "endian");
206  | #ifdef WORDS_BIGENDIAN
207  |   if (strncasecmp ((char *) buffer, "big", 3))
208  |     *wrongendian = 1;
209  | #else
210  |   if (!strncasecmp ((char *) buffer, "big", 3))
211  |     *wrongendian = 1;
212  | #endif
213  |   free (buffer);
214  | 
215  |   buffer = xmlGetProp (thedoc -> children, BAD_CAST "compression");
216  |   /* Have to handle all of the various compressed string values */
217  |   /* TODO: make this deal with "float" and "complex" */
218  |   if (buffer[0] == 'l' || buffer[0] == 'L')
219  |     {
220  |       if (buffer[4] == 'n' || buffer[4] == 'N')
221  | 	*compressed = COMPRESS_INT_LONG | COMPRESS_GZIP_NONE;
222  |       else if (buffer[4] >= '1' && buffer[4] <= '9')
223  | 	{
224  | 	  sscanf ((char *) buffer+4, "%d", compressed);
225  | 	  *compressed |= COMPRESS_INT_LONG;
226  | 	}
227  |       else if (buffer[4] == 'b' || buffer[4] == 'B')
228  | 	*compressed = COMPRESS_INT_LONG | COMPRESS_GZIP_BEST;
229  |       else
230  | 	return 1;
231  |     }
232  |   else if (buffer[0] == 's' || buffer[0] == 'S')
233  |     {
234  |       if (buffer[5] == 'n' || buffer[5] == 'N')
235  | 	*compressed = COMPRESS_INT_SHORT | COMPRESS_GZIP_NONE;
236  |       else if (buffer[5] >= '1' && buffer[5] <= '9')
237  | 	{
238  | 	  sscanf ((char *) buffer+5, "%d", compressed);
239  | 	  *compressed |= COMPRESS_INT_SHORT;
240  | 	}
241  |       else if (buffer[5] == 'b' || buffer[5] == 'B')
242  | 	*compressed = COMPRESS_INT_SHORT | COMPRESS_GZIP_BEST;
243  |       else
244  | 	return 1;
245  |     }
246  |   else if (buffer[0] == 'c' || buffer[0] == 'C')
247  |     {
248  |       if (buffer[4] == 'n' || buffer[4] == 'N')
249  | 	*compressed = COMPRESS_INT_CHAR | COMPRESS_GZIP_NONE;
250  |       else if (buffer[4] >= '1' && buffer[4] <= '9')
251  | 	{
252  | 	  sscanf ((char *) buffer+4, "%d", compressed);
253  | 	  *compressed |= COMPRESS_INT_CHAR;
254  | 	}
255  |       else if (buffer[4] == 'b' || buffer[4] == 'B')
256  | 	*compressed = COMPRESS_INT_CHAR | COMPRESS_GZIP_BEST;
257  |       else
258  | 	return 1;
259  |     }
260  |   else
261  |     {
262  |       if (buffer[0] == 'n' || buffer[0] == 'N')
263  | 	*compressed = COMPRESS_INT_NONE | COMPRESS_GZIP_NONE;
264  |       else if (buffer[0] >= '1' && buffer[0] <= '9')
265  | 	{
266  | 	  sscanf ((char *) buffer, "%d", compressed);
267  | 	  *compressed |= COMPRESS_INT_NONE;
268  | 	}
269  |       else if (buffer[0] == 'b' || buffer[0] == 'B')
270  | 	*compressed = COMPRESS_INT_NONE | COMPRESS_GZIP_BEST;
271  |       else
272  | 	return 1;
273  |     }
274  |   free (buffer);
275  |   DPRINTF ("Root node: version %s, %s endian, compression %d|%d\n",
276  | 	   version, *wrongendian ? "switched" : "native",
277  | 	   *compressed & COMPRESS_INT_MASK, *compressed & COMPRESS_GZIP_MASK);
278  | 
279  |   *fieldnames = PETSC_NULL;
280  |   if (fieldtypes != PETSC_NULL)
281  |     *fieldtypes = PETSC_NULL;
282  |   *fieldmin = *fieldmax = PETSC_NULL;
283  |   *usermetacount = 0;
284  |   *usermetanames = *usermetadata = PETSC_NULL;
285  | 
286  |   /* The main parsing loop; because xmlStringGetNodeList() doesn't work. */
287  |   for (thenode = thedoc->children->xmlChildrenNode;
288  |        thenode;
289  |        thenode = thenode->next)
290  |     {
291  |       DPRINTF ("Looping through nodes, this is %s, fieldnames 0x%lx\n",
292  | 	       thenode->name, *fieldnames);
293  |       if (!strncasecmp ((char *) thenode->name, "GlobalCPUs", 10))
294  | 	{
295  | 	  buffer = xmlGetProp (thenode, BAD_CAST "dimensions");
296  | 	  sscanf ((char *) buffer, "%d", dim);
297  | 	  free (buffer);
298  | 
299  | 	  buffer = xmlGetProp (thenode, BAD_CAST "xwidth");
300  | 	  sscanf ((char *) buffer, "%d", px);
301  | 	  free (buffer);
302  | 
303  | 	  if (*dim > 1)
304  | 	    {
305  | 	      buffer = xmlGetProp (thenode, BAD_CAST "ywidth");
306  | 	      sscanf ((char *) buffer, "%d", py);
307  | 	      free (buffer);
308  | 
309  | 	      if (*dim > 2)
310  | 		{
311  | 		  buffer = xmlGetProp (thenode, BAD_CAST "zwidth");
312  | 		  sscanf ((char *) buffer, "%d", pz);
313  | 		  free (buffer);
314  | 		}
315  | 	      else
316  | 		*pz = 1;
317  | 	    }
318  | 	  else
319  | 	    *py = 1;
320  | 	}
321  | 
322  |       else if (!strncasecmp ((char *) thenode->name, "GlobalSize", 10))
323  | 	{
324  | 	  buffer = xmlGetProp (thenode, BAD_CAST "xwidth");
325  | 	  sscanf ((char *) buffer, "%d", nx);
326  | 	  free (buffer);
327  | 	  /*+ For GlobalSize, since there's no *size attribute (for an 0.1
328  | 	    version document), assume 1. +*/
329  | 	  buffer = xmlGetProp (thenode, BAD_CAST "xsize");
330  | 	  if (wx != PETSC_NULL)
331  | 	    {
332  | 	      if (buffer)
333  | #ifdef PETSC_USE_SINGLE
334  | 		sscanf ((char *) buffer, "%f", wx);
335  | #else
336  | 		sscanf ((char *) buffer, "%lf", wx);
337  | #endif
338  | 	      else
339  | 		*wx = 1.;
340  | 	    }
341  | 	  if (buffer)
342  | 	    free (buffer);
343  | 
344  | 	  if (*dim > 1)
345  | 	    {
346  | 	      buffer = xmlGetProp (thenode, BAD_CAST "ywidth");
347  | 	      sscanf ((char *) buffer, "%d", ny);
348  | 	      free (buffer);
349  | 	      buffer = xmlGetProp (thenode, BAD_CAST "ysize");
350  | 	      if (wy != PETSC_NULL)
351  | 		{
352  | 		  if (buffer)
353  | #ifdef PETSC_USE_SINGLE
354  | 		    sscanf ((char *) buffer, "%f", wy);
355  | #else
356  | 		    sscanf ((char *) buffer, "%lf", wy);
357  | #endif
358  | 		  else
359  | 		    *wy = 1.;
360  | 		}
361  | 	      if (buffer)
362  | 		free (buffer);
363  | 
364  | 	      if (*dim > 2)
365  | 		{
366  | 		  buffer = xmlGetProp (thenode, BAD_CAST "zwidth");
367  | 		  sscanf ((char *) buffer, "%d", nz);
368  | 		  free (buffer);
369  | 		  buffer = xmlGetProp (thenode, BAD_CAST "zsize");
370  | 		  if (wz != PETSC_NULL)
371  | 		    {
372  | 		      if (buffer)
373  | #ifdef PETSC_USE_SINGLE
374  | 			sscanf ((char *) buffer, "%f", wz);
375  | #else
376  | 			sscanf ((char *) buffer, "%lf", wz);
377  | #endif
378  | 		      else
379  | 			*wz = 1.;
380  | 		    }
381  | 		  if (buffer)
382  | 		    free (buffer);
383  | 		}
384  | 	      else
385  | 		*nz = 1;
386  | 	    }
387  | 	  else
388  | 	    *ny = *nz = 1;
389  | 
390  | 	  buffer = xmlGetProp (thenode, BAD_CAST "fields");
391  | 	  sscanf ((char *) buffer, "%d", dof);
392  | 	  free (buffer);
393  | 
394  | 	  if (*dof > field)
395  | 	    {
396  | 	      if (!(*fieldnames = (char **) realloc
397  | 		    (*fieldnames, *dof * sizeof (char *))))
398  | 		return 1;
399  | 	      if (fieldtypes != PETSC_NULL)
400  | 		if (!(*fieldtypes = (field_plot_type *) realloc
401  | 		      (*fieldtypes, *dof * sizeof(field_plot_type))))
402  | 		  return 1;
403  | 	      if (!(*fieldmin = (PetscScalar *) realloc
404  | 		    (*fieldmin, *dof * sizeof(PetscScalar))))
405  | 		return 1;
406  | 	      if (!(*fieldmax = (PetscScalar *) realloc
407  | 		    (*fieldmax, *dof * sizeof(PetscScalar))))
408  | 		return 1;
409  | 	    }
410  | 	}
411  | 
412  |       else if (!strncasecmp ((char *) thenode->name, "LocalSize", 9))
413  | 	{
414  | 	  buffer = xmlGetProp (thenode, BAD_CAST "xwidth");
415  | 	  sscanf ((char *) buffer, "%d", xm);
416  | 	  free (buffer);
417  | 
418  | 	  if (*dim > 1)
419  | 	    {
420  | 	      buffer = xmlGetProp (thenode, BAD_CAST "ywidth");
421  | 	      sscanf ((char *) buffer, "%d", ym);
422  | 	      free (buffer);
423  | 
424  | 	      if (*dim > 2)
425  | 		{
426  | 		  buffer = xmlGetProp (thenode, BAD_CAST "zwidth");
427  | 		  sscanf ((char *) buffer, "%d", zm);
428  | 		  free (buffer);
429  | 		}
430  | 	      else
431  | 		*zm = 1;
432  | 	    }
433  | 	  else
434  | 	    *ym = 1;
435  | 	}
436  | 
437  |       else if (!strncasecmp ((char *) thenode->name, "Stencil", 7))
438  | 	{
439  | 	  buffer = xmlGetProp (thenode, BAD_CAST "width");
440  | 	  sscanf ((char *) buffer, "%d", sw);
441  | 	  free (buffer);
442  | 
443  | 	  buffer = xmlGetProp (thenode, BAD_CAST "type");
444  | 	  sscanf ((char *) buffer, "%d", st);
445  | 	  free (buffer);
446  | 
447  | 	  buffer = xmlGetProp (thenode, BAD_CAST "periodic");
448  | 	  sscanf ((char *) buffer, "%d", wrap);
449  | 	  free (buffer);
450  | 	}
451  | 
452  |       else if (!strncasecmp ((char *) thenode->name, "Field", 5))
453  | 	{
454  | 	  if (!*fieldnames || !*fieldmax || !*fieldmin)
455  | 	    {
456  | 	      printf ("Warning: reading a Field, but the number of them is unknown!\n");
457  | 	    }
458  | 
459  | 	  if (field >= *dof)
460  | 	    {
461  | 	      printf ("Warning: more <Field> tags than declared degrees of freedom.\nThis may be because tags are out of order.\n");
462  | 	      *fieldnames = (char **) realloc
463  | 		(*fieldnames, (field+1) * sizeof (char *));
464  | 	      if (fieldtypes != PETSC_NULL)
465  | 		*fieldtypes = (field_plot_type *) realloc
466  | 		  (*fieldtypes, (field+1) * sizeof (field_plot_type));
467  | 	      *fieldmin = (PetscScalar *) realloc
468  | 		(*fieldmin, (field+1) * sizeof (PetscScalar));
469  | 	      *fieldmax = (PetscScalar *) realloc
470  | 		(*fieldmax, (field+1) * sizeof (PetscScalar));
471  | 	    }
472  | 
473  | 	  (*fieldnames) [field] =
474  | 	    (char *) xmlGetProp (thenode, BAD_CAST "name");
475  | 	  /*+ If the type attribute is missing from the Field node (as it is in
476  | 	    version 0.1 documents), assume
477  | 	    +latex+{\tt FIELD\_SCALAR}.
478  | 	    +html+ <tt>FIELD_SCALAR</tt>. +*/
479  | 	  buffer = xmlGetProp (thenode, BAD_CAST "type");
480  | 	  if (fieldtypes)
481  | 	    {
482  | 	      if (buffer)
483  | 		sscanf ((char *) buffer, "0x%x", *fieldtypes + field);
484  | 	      else
485  | 		(*fieldtypes) [field] = FIELD_SCALAR;
486  | 	    }
487  | 	  if (buffer)
488  | 	    free(buffer);
489  | 	  buffer = xmlGetProp (thenode, BAD_CAST "min");
490  | #ifdef PETSC_USE_SINGLE
491  | 	  sscanf ((char *) buffer, "%f", *fieldmin + field);
492  | #else
493  | 	  sscanf ((char *) buffer, "%lf", *fieldmin + field);
494  | #endif
495  | 	  free (buffer);
496  | 	  buffer = xmlGetProp (thenode, BAD_CAST "max");
497  | #ifdef PETSC_USE_SINGLE
498  | 	  sscanf ((char *) buffer, "%f", *fieldmax + field);
499  | #else
500  | 	  sscanf ((char *) buffer, "%lf", *fieldmax + field);
501  | #endif
502  | 	  free (buffer);
503  | 	  field++;
504  | 	}
505  | 
506  |       else if (!strncasecmp ((char *) thenode->name, "User", 4))
507  | 	{
508  | 	  (*usermetacount)++;
509  | 	  (*usermetanames) = (char **) realloc
510  | 	    (*usermetanames, (*usermetacount) * sizeof (char *));
511  | 	  (*usermetadata) = (char **) realloc
512  | 	    (*usermetadata, (*usermetacount) * sizeof (char *));
513  | 	  (*usermetanames) [(*usermetacount)-1] =
514  | 	    (char *) xmlGetProp (thenode, BAD_CAST "name");
515  | 	  (*usermetadata) [(*usermetacount)-1] =
516  | 	    (char *) xmlGetProp (thenode, BAD_CAST "value");
517  | 	}
518  |     }
519  | 
520  |   /* This is another way to do things, which would be a whole lot easier, if it
521  |      worked... (See Debian's libxml bug list.)
522  |   thenode = xmlStringGetNodeList (thedoc, "GlobalCPUs");
523  |   printf ("thenode = 0x%lx\n", thenode);
524  |   if (!thenode)
525  |     return 1;
526  |   thenode = xmlStringGetNodeList (thedoc, "GlobalSize");
527  |   if (!thenode)
528  |     return 1;
529  |   thenode = xmlStringGetNodeList (thedoc, "LocalSize");
530  |   if (!thenode)
531  |     return 1;
532  |   thenode = xmlStringGetNodeList (thedoc, "Stencil");
533  |   if (!thenode)
534  |     return 1;
535  | 
536  |   i = 0;
537  |   thenode = xmlStringGetNodeList (thedoc, "Field");
538  |   if (!thenode)
539  |     return 1;
540  |   while (thenode)
541  |     {
542  |       i++;
543  |       thenode = thenode -> next;
544  |     }
545  | 
546  |   thenode = xmlStringGetNodeList (thedoc, "User");
547  |   while (thenode)
548  |     {
549  |       thenode = thenode -> next;
550  |       } */
551  | 
552  |   free (version);
553  |   xmlFreeDoc (thedoc);
554  | 
555  |   return 0;
556  | }
557  | 
558  | 
559  | #undef __FUNCT__
560  | #define __FUNCT__ "IlluMultiParseData"
561  | 
562  | /*++++++++++++++++++++++++++++++++++++++
563  |   This function reads in the data stored by IlluMultiStoreData(), complete with
564  |   int/gzip compression.
565  | 
566  |   int IlluMultiParseData It returns zero or an error code.
567  | 
568  |   MPI_Comm comm MPI communicator, if NULL it uses PETSC_COMM_WORLD.
569  | 
570  |   PetscScalar *globalarray Array into which to load the (local) data.
571  | 
572  |   char *basename Base file name.
573  | 
574  |   int rank CPU number to read data for.
575  | 
576  |   int compressed Data compression: if zero then no compression (fastest), 1-9
577  |   then gzip compression level, 10-15 then gzip --best.  If 16-31 then save
578  |   guint32s representing relative values between min and max for each field,
579  |   compressed according to this value minus 16.  Likewise for 32-47 and
580  |   guint16s, and 48-63 for guint8s.  Yes, these alternative formats lose
581  |   information and can't be used for accurate checkpointing, but they should
582  |   retain enough data for visualization (except perhaps for the guint8s, which
583  |   are possibly acceptable for vectors but likely not contours).
584  | 
585  |   int gridpoints Number of gridpoints to read data for.
586  | 
587  |   int dof Degrees of freedom at each node, a.k.a. number of field variables.
588  | 
589  |   int wrongendian Tells whether the data are stored in the opposite endian
590  |   format from this platform, and thus must be switched when the data are
591  |   streamed in.
592  | 
593  |   PetscScalar *fieldmin Minimum value of each field variable.
594  | 
595  |   PetscScalar *fieldmax Maximum value of each field variable.
596  |   ++++++++++++++++++++++++++++++++++++++*/
597  | 
598  | static int IlluMultiParseData
599  | (MPI_Comm comm, PetscScalar *globalarray, char *basename, int rank,
600  |  int compressed, int gridpoints, int dof, int wrongendian,
601  |  PetscScalar *fieldmin, PetscScalar *fieldmax)
602  | {
603  |   int i;
604  |   char filename [1000];
605  |   FILE *infile;
606  |   size_t readoutput;
607  | 
608  |   if (compressed & COMPRESS_GZIP_MASK)
609  |     {
610  |       if (snprintf (filename, 999, "gunzip -c < %s.cpu%.4d.data",
611  | 		    basename, rank) > 999)
612  | 	return 1;
613  |       if (!(infile = popen (filename, "r")))
614  | 	{
615  | 	  char openerror [1050];
616  | 	  snprintf (openerror, 1049,
617  | 		    "CPU %d: error %d (%s) opening input pipe \"%s\"",
618  | 		    rank, errno, strerror (errno), filename);
619  | 	  SETERRQ (PETSC_ERR_FILE_OPEN, openerror);
620  | 	}
621  |     }
622  |   else
623  |     {
624  |       if (snprintf (filename, 999, "%s.cpu%.4d.data", basename, rank) > 999)
625  | 	return 1;
626  |       if (!(infile = fopen (filename, "r")))
627  | 	{
628  | 	  char openerror [1050];
629  | 	  snprintf (openerror, 1049,
630  | 		    "CPU %d: error %d (%s) opening input file %s",
631  | 		    rank, errno, strerror (errno), filename);
632  | 	  SETERRQ (PETSC_ERR_FILE_OPEN, openerror);
633  | 	}
634  |     }
635  | 
636  |   /* Read and store the data */
637  |   /* TODO: make this deal with float and complex */
638  |   switch (compressed & COMPRESS_INT_MASK)
639  |     {
640  |       /* Yes, this way of doing it is a bit redundant, but it seems better than
641  | 	 the multitude of subconditionals which would be required if I did it
642  | 	 the other way. */
643  |     case COMPRESS_INT_LONG:
644  |       {
645  | 	guint32 *storage;
646  | 	if (!(storage = (guint32 *) malloc
647  | 	      (gridpoints * dof * sizeof (guint32))))
648  | 	  return 1;
649  | 	readoutput = fread (storage, sizeof (guint32), gridpoints*dof, infile);
650  | 
651  | 	if (wrongendian)
652  | 	  {
653  | 	    for (i=0; i<gridpoints*dof; i++)
654  | 	      storage[i] = GUINT32_SWAP_LE_BE_CONSTANT (storage[i]);
655  | 	  }
656  | 	for (i=0; i<gridpoints*dof; i++)
657  | 	  globalarray[i] = fieldmin [i%dof] +
658  | 	    ((fieldmax [i%dof] - fieldmin [i%dof]) * storage[i]) / 4294967295.;
659  | 	free (storage);
660  | 	break;
661  |       }
662  |     case COMPRESS_INT_SHORT:
663  |       {
664  | 	guint16 *storage;
665  | 	if (!(storage = (guint16 *) malloc
666  | 	      (gridpoints * dof * sizeof (guint16))))
667  | 	  return 1;
668  | 	readoutput = fread (storage, sizeof (guint16), gridpoints*dof, infile);
669  | 
670  | 	if (wrongendian)
671  | 	  {
672  | 	    for (i=0; i<gridpoints*dof; i++)
673  | 	      storage[i] = GUINT16_SWAP_LE_BE_CONSTANT (storage[i]);
674  | 	  }
675  | 	for (i=0; i<gridpoints*dof; i++)
676  | 	  globalarray[i] = fieldmin [i%dof] +
677  | 	    ((fieldmax [i%dof] - fieldmin [i%dof]) * storage[i]) / 65535.;
678  | 	free (storage);
679  | 	break;
680  |       }
681  |     case COMPRESS_INT_CHAR:
682  |       {
683  | 	guint8 *storage;
684  | 	if (!(storage = (guint8 *) malloc
685  | 	      (gridpoints * dof * sizeof (guint8))))
686  | 	  return 1;
687  | 	readoutput = fread (storage, sizeof (guint8), gridpoints*dof, infile);
688  | 
689  | 	for (i=0; i<gridpoints*dof; i++)
690  | 	  globalarray[i] = fieldmin [i%dof] +
691  | 	    ((fieldmax [i%dof] - fieldmin [i%dof]) * storage[i]) / 255.;
692  | 	free (storage);
693  | 	break;
694  |       }
695  |     default:
696  |       {
697  | 	/* TODO: check for different size data, complex */
698  | 	readoutput = fread (globalarray, sizeof (PetscScalar), gridpoints*dof,
699  | 			    infile);
700  | 
701  | 	if (wrongendian)
702  | 	  {
703  | 	    if (sizeof (PetscScalar) == 8)
704  | 	      {
705  | 		for (i=0; i<gridpoints*dof; i++)
706  | 		  globalarray[i]= GUINT64_SWAP_LE_BE_CONSTANT (globalarray[i]);
707  | 	      }
708  | 	    else if (sizeof (PetscScalar) == 4)
709  | 	      {
710  | 		for (i=0; i<gridpoints*dof; i++)
711  | 		  globalarray[i]= GUINT32_SWAP_LE_BE_CONSTANT (globalarray[i]);
712  | 	      }
713  | 	    else return 1;
714  | 	  }
715  | 
716  |       }
717  |     }
718  | 
719  |   if (compressed & COMPRESS_GZIP_MASK)
720  |     pclose (infile);
721  |   else
722  |     fclose (infile);
723  | 
724  |   if (readoutput < gridpoints*dof)
725  |     {
726  |       char readerror [1050];
727  |       int readerr = ferror (infile);
728  |       snprintf (readerror,1049, "CPU %d: error %d (%s) during read, got %d of %d items",
729  | 		rank, readerr, strerror (readerr), readoutput, gridpoints*dof);
730  |       SETERRQ (PETSC_ERR_FILE_READ, readerror);
731  |     }
732  | 
733  |   return 0;
734  | }
735  | 
736  | 
737  | #undef __FUNCT__
738  | #define __FUNCT__ "IlluMultiStoreXML"
739  | 
740  | /*++++++++++++++++++++++++++++++++++++++
741  |   This function opens, stores and closes the XML metadata file for IlluMulti
742  |   format storage.  It is called by IlluMultiSave().
743  | 
744  |   int IlluMultiStoreXML It returns zero or an error code.
745  | 
746  |   MPI_Comm comm MPI communicator, if NULL it uses PETSC_COMM_WORLD.
747  | 
748  |   char *basename Base file name.
749  | 
750  |   int rank CPU number to store data for.
751  | 
752  |   int compressed Data compression: if zero then no compression (fastest), 1-9
753  |   then gzip compression level, 10-15 then gzip --best.  If 16-31 then save
754  |   guint32s representing relative values between min and max for each field,
755  |   compressed according to this value minus 16.  Likewise for 32-47 and
756  |   guint16s, and 48-63 for guint8s.  Yes, these alternative formats lose
757  |   information and can't be used for accurate checkpointing, but they should
758  |   retain enough data for visualization (except perhaps for the guint8s, which
759  |   are possibly acceptable for vectors but certainly not contours).
760  | 
761  |   int dim Dimensionality of the space.
762  | 
763  |   int px Number of processors in the
764  |   +latex+$x$-direction.
765  |   +html+ <i>x</i>-direction.
766  | 
767  |   int py Number of processors in the
768  |   +latex+$y$-direction.
769  |   +html+ <i>y</i>-direction.
770  | 
771  |   int pz Number of processors in the
772  |   +latex+$z$-direction.
773  |   +html+ <i>z</i>-direction.
774  | 
775  |   int nx Number of grid points over the entire array in the
776  |   +latex+$x$-direction.
777  |   +html+ <i>x</i>-direction.
778  | 
779  |   int ny Number of grid points over the entire array in the
780  |   +latex+$y$-direction.
781  |   +html+ <i>y</i>-direction.
782  | 
783  |   int nz Number of grid points over the entire array in the
784  |   +latex+$z$-direction.
785  |   +html+ <i>z</i>-direction.
786  | 
787  |   PetscScalar wx Physical overall width in the
788  |   +latex+$x$-direction.
789  |   +html+ <i>x</i>-direction.
790  | 
791  |   PetscScalar wy Physical overall width in the
792  |   +latex+$y$-direction.
793  |   +html+ <i>y</i>-direction.
794  | 
795  |   PetscScalar wz Physical overall width in the
796  |   +latex+$z$-direction.
797  |   +html+ <i>z</i>-direction.
798  | 
799  |   int xm Number of grid points over the local part of the array in the
800  |   +latex+$x$-direction.
801  |   +html+ <i>x</i>-direction.
802  | 
803  |   int ym Number of grid points over the local part of the array in the
804  |   +latex+$y$-direction.
805  |   +html+ <i>y</i>-direction.
806  | 
807  |   int zm Number of grid points over the local part of the array in the
808  |   +latex+$z$-direction.
809  |   +html+ <i>z</i>-direction.
810  | 
811  |   int dof Degrees of freedom at each node, a.k.a. number of field variables.
812  | 
813  |   int sw Stencil width.
814  | 
815  |   int st Stencil type, given by the
816  |   +latex+{\tt PETSc enum} values.
817  |   +html+ <tt>PETSc enum</tt> values.
818  | 
819  |   int wrap Periodic type, given by the
820  |   +latex+{\tt PETSc enum} values.
821  |   +html+ <tt>PETSc enum</tt> values.
822  | 
823  |   char **fieldnames Names of the field variables.
824  | 
825  |   field_plot_type *fieldtypes Data (plot) types for field variables.
826  | 
827  |   PetscReal *fieldmin Minimum value of each field variable.
828  | 
829  |   PetscReal *fieldmax Maximum value of each field variable.
830  | 
831  |   int usermetacount Number of user metadata parameters.
832  | 
833  |   char **usermetanames User metadata parameter names.
834  | 
835  |   char **usermetadata User metadata parameter strings.
836  |   ++++++++++++++++++++++++++++++++++++++*/
837  | 
838  | static int IlluMultiStoreXML
839  | (MPI_Comm comm, char *basename, int rank, int compressed,
840  |  int dim, int px,int py,int pz, int nx,int ny,int nz,
841  |  PetscScalar wx,PetscScalar wy,PetscScalar wz, int xm,int ym,int zm,
842  |  int dof, int sw, int st, int wrap, char **fieldnames,
843  |  field_plot_type *fieldtypes, PetscReal *fieldmin, PetscReal *fieldmax,
844  |  int usermetacount, char **usermetanames, char **usermetadata)
845  | {
846  |   xmlDocPtr thedoc;
847  |   xmlNodePtr thenode;
848  |   FILE *outfile;
849  |   char filename [1000];
850  |   xmlChar buffer [1000];
851  |   int i;
852  | 
853  |   /*+The XML tags in the .meta file consist of:
854  |     +latex+\begin{center} \begin{tabular}{|l|l|} \hline
855  |     +html+ <center><table BORDER>
856  |     +latex+{\tt IlluMulti} & Primary tag, with attributes {\tt version},
857  |     +latex+\\ & {\tt endian} ({\tt big} or {\tt little}) and
858  |     +latex+{\tt compression} \\ & (none, 1-9, best, float*, long*, short* or
859  |     +latex+char*\footnote{longnone, long1-long9 or longbest compresses each
860  |     +latex+double to a 32-bit unsigned int, with 0 representing the minimum for
861  |     +latex+that field and 2$^{32}-1$ representing the maximum; likewise
862  |     +latex+shortnone, short1-short9, shortbest uses 16-bit unsigned ints, and
863  |     +latex+char* uses 8-bit unsigned ints.  float is there to indicate that
864  |     +latex+PetscScalar is 4 bytes at save time, loading should adjust
865  |     +latex+accordingly; that's not fully supported just yet.  At some point
866  |     +latex+I'll have to figure out how to represent complex.}). \\ \hline
867  |     +html+ <tr><td><tt>IlluMulti</tt></td><td>Primary tag, with attributes
868  |     +html+ <tt>version</tt>,<br><tt>endian</tt> (<tt>big</tt> or
869  |     +html+ <tt>little</tt>) and <tt>compression</tt><br>(none, 1-9, best;
870  |     +html+ long*, short* or char*)</td></tr>
871  |     +*/
872  | 
873  |   thedoc = xmlNewDoc (BAD_CAST "1.0");
874  |   thedoc->children = xmlNewDocNode (thedoc,NULL, BAD_CAST"IlluMulti", NULL);
875  |   xmlSetProp (thedoc->children, BAD_CAST "version", BAD_CAST "0.2");
876  | #ifdef WORDS_BIGENDIAN
877  |   xmlSetProp (thedoc->children, BAD_CAST "endian", BAD_CAST "big");
878  | #else
879  |   xmlSetProp (thedoc->children, BAD_CAST "endian", BAD_CAST "little");
880  | #endif
881  |   if ((compressed & COMPRESS_INT_MASK) == COMPRESS_INT_LONG)
882  |     strcpy ((char *) buffer, "long");
883  |   else if ((compressed & COMPRESS_INT_MASK) == COMPRESS_INT_SHORT)
884  |     strcpy ((char *) buffer, "short");
885  |   else if ((compressed & COMPRESS_INT_MASK) == COMPRESS_INT_CHAR)
886  |     strcpy ((char *) buffer, "char");
887  |   /* Note: this doesn't really support complex types yet... */
888  |   else if (sizeof (PetscScalar) == 4)
889  |     strcpy ((char *) buffer, "float");
890  |   else
891  |     *buffer = '\0';
892  |   if ((compressed & COMPRESS_GZIP_MASK) == 0)
893  |     strcat ((char *) buffer, "none");
894  |   else if ((compressed & COMPRESS_GZIP_MASK) >= 1 &&
895  | 	   (compressed & COMPRESS_GZIP_MASK) <= 9)
896  |     sprintf ((char *) buffer + strlen ((char *) buffer), "%d",
897  | 	     compressed & COMPRESS_GZIP_MASK);
898  |   else
899  |     strcat ((char *) buffer, "best");
900  |   xmlSetProp (thedoc->children, BAD_CAST "compression", buffer);
901  | 
902  |   /*+
903  |     +latex+{\tt GlobalCPUs} & Number of CPUs in each direction, with
904  |     +latex+\\ & attributes {\tt dimensions}, {\tt xwidth}, {\tt ywidth} and
905  |     +latex+{\tt zwidth} \\ \hline
906  |     +html+ <tr><td><tt>GlobalCPUs</tt></td><td>Number of CPUs in each
907  |     +html+ direction, with<br>attributes <tt>dimensions</tt>, <tt>xwidth</tt>,
908  |     +html+ <tt>ywidth</tt> and <tt>zwidth</tt></td></tr>
909  |     +*/
910  | 
911  |   thenode = xmlNewChild (thedoc->children, NULL, BAD_CAST "GlobalCPUs", NULL);
912  |   snprintf ((char *) buffer, 999, "%d", dim);
913  |   xmlSetProp (thenode, BAD_CAST "dimensions", buffer);
914  |   snprintf ((char *) buffer, 999, "%d", px);
915  |   xmlSetProp (thenode, BAD_CAST "xwidth", buffer);
916  |   if (dim > 1)
917  |     {
918  |       snprintf ((char *) buffer, 999, "%d", py);
919  |       xmlSetProp (thenode, BAD_CAST "ywidth", buffer);
920  |       if (dim > 2)
921  | 	{
922  | 	  snprintf ((char *) buffer, 999, "%d", pz);
923  | 	  xmlSetProp (thenode, BAD_CAST "zwidth", buffer);
924  | 	}
925  |     }
926  | 
927  |   /*+
928  |     +latex+{\tt GlobalSize} & Size of the entire distributed array, with
929  |     +latex+\\ & attributes {\tt xwidth}, {\tt ywidth},
930  |     +latex+{\tt zwidth}, {\tt xsize}**, {\tt ysize}**, {\tt zsize}** and
931  |     +latex+{\tt fields} \\ \hline
932  |     +html+ <tr><td><tt>GlobalSize</tt></td><td>Size of the entire distributed
933  |     +html+ array, with<br>attributes <tt>xwidth</tt>, <tt>ywidth</tt>,
934  |     +html+ <tt>zwidth</tt>, <tt>xsize</tt>**, <tt>ysize</tt>**,
935  |     +html+ <tt>zsize</tt>** and <tt>fields</tt></td></tr>
936  |     +*/
937  | 
938  |   thenode = xmlNewChild (thedoc->children, NULL, BAD_CAST "GlobalSize", NULL);
939  |   snprintf ((char *) buffer, 999, "%d", nx);
940  |   xmlSetProp (thenode, BAD_CAST "xwidth", buffer);
941  |   snprintf ((char *) buffer, 999, "%g", wx);
942  |   xmlSetProp (thenode, BAD_CAST "xsize", buffer);
943  |   if (dim > 1)
944  |     {
945  |       snprintf ((char *) buffer, 999, "%d", ny);
946  |       xmlSetProp (thenode, BAD_CAST "ywidth", buffer);
947  |       snprintf ((char *) buffer, 999, "%g", wy);
948  |       xmlSetProp (thenode, BAD_CAST "ysize", buffer);
949  |       if (dim > 2)
950  | 	{
951  | 	  snprintf ((char *) buffer, 999, "%d", nz);
952  | 	  xmlSetProp (thenode, BAD_CAST "zwidth", buffer);
953  | 	  snprintf ((char *) buffer, 999, "%g", wz);
954  | 	  xmlSetProp (thenode, BAD_CAST "zsize", buffer);
955  | 	}
956  |     }
957  |   snprintf ((char *) buffer, 999, "%d", dof);
958  |   xmlSetProp (thenode, BAD_CAST "fields", buffer);
959  | 
960  |   /*+
961  |     +latex+{\tt LocalSize} & Size of the entire local part of the array,
962  |     +latex+\\ & with attributes {\tt xwidth}, {\tt ywidth} and {\tt zwidth}
963  |     +latex+\\ \hline
964  |     +html+ <tr><td><tt>LocalSize</tt></td><td>Size of the local part of the
965  |     +html+ array, with<br>attributes <tt>xwidth</tt>, <tt>ywidth</tt> and
966  |     +html+ <tt>zwidth</tt></td></tr>
967  |     +*/
968  | 
969  |   thenode = xmlNewChild (thedoc->children, NULL, BAD_CAST "LocalSize", NULL);
970  |   snprintf ((char *) buffer, 999, "%d", xm);
971  |   xmlSetProp (thenode, BAD_CAST "xwidth", buffer);
972  |   if (dim > 1)
973  |     {
974  |       snprintf ((char *) buffer, 999, "%d", ym);
975  |       xmlSetProp (thenode, BAD_CAST "ywidth", buffer);
976  |       if (dim > 2)
977  | 	{
978  | 	  snprintf ((char *) buffer, 999, "%d", zm);
979  | 	  xmlSetProp (thenode, BAD_CAST "zwidth", buffer);
980  | 	}
981  |     }
982  | 
983  |   /*+
984  |     +latex+{\tt Stencil} & Stencil and periodic data, with attributes
985  |     +latex+\\ & {\tt width}, {\tt type} and {\tt periodic} (using {\tt PETSc
986  |     +latex+enum} values) \\ \hline
987  |     +html+ <tr><td><tt>Stencil</tt></td><td>Stencil and periodic data, with
988  |     +html+ attributes<br><tt>width</tt>, <tt>type</tt> and <tt>periodic</tt>
989  |     +html+ (using <tt>PETSc enum</tt> values)</td></tr>
990  |     +*/
991  | 
992  |   thenode = xmlNewChild (thedoc->children, NULL, BAD_CAST "Stencil", NULL);
993  |   snprintf ((char *) buffer, 999, "%d", sw);
994  |   xmlSetProp (thenode, BAD_CAST "width", buffer);
995  |   snprintf ((char *) buffer, 999, "%d", (int) st);
996  |   xmlSetProp (thenode, BAD_CAST "type", buffer);
997  |   snprintf ((char *) buffer, 999, "%d", (int) wrap);
998  |   xmlSetProp (thenode, BAD_CAST "periodic", buffer);
999  | 
1000 |   /*+
1001 |     +latex+{\tt Field} & Data on each field, attributes {\tt name},
1002 |     +latex+{\tt type}**, {\tt min} and {\tt max} \\ \hline
1003 |     +html+ <tr><td><tt>Field</tt></td><td>Data on each field, attributes
1004 |     +html+ <tt>name</tt>, <tt>type</tt>*, <tt>min</tt> and
1005 |     +html+ <tt>max</tt></td></tr>
1006 |     +*/
1007 | 
1008 |   for (i=0; i<dof; i++)
1009 |     {
1010 |       thenode = xmlNewChild (thedoc->children, NULL, BAD_CAST "Field", NULL);
1011 |       xmlSetProp (thenode, BAD_CAST "name", BAD_CAST fieldnames [i]);
1012 |       snprintf ((char *) buffer, 999, "0x%.2x", fieldtypes [i]);
1013 |       xmlSetProp (thenode, BAD_CAST "type", buffer);
1014 |       snprintf ((char *) buffer, 999, "%g", fieldmin [i]);
1015 |       xmlSetProp (thenode, BAD_CAST "min", buffer);
1016 |       snprintf ((char *) buffer, 999, "%g", fieldmax [i]);
1017 |       xmlSetProp (thenode, BAD_CAST "max", buffer);
1018 |     }
1019 | 
1020 |   /*+
1021 |     +latex+{\tt User} & User parameters, attributes {\tt name} and {\tt value}
1022 |     +latex+\\ \hline \end{tabular} \end{center}
1023 |     +html+ <tr><td><tt>User</tt></td><td>User parameters, attributes
1024 |     +html+ <tt>name</tt> and <tt>value</tt></td></tr></table></center>
1025 |     *Lossy compression to smaller data types.
1026 |     +latex+\par
1027 |     +html+ <br>
1028 |     **Represents new attribute for IlluMulti 0.2 file format.
1029 |     +*/
1030 | 
1031 |   for (i=0; i<usermetacount; i++)
1032 |     {
1033 |       thenode = xmlNewChild (thedoc->children, NULL, BAD_CAST "User", NULL);
1034 |       xmlSetProp (thenode, BAD_CAST "name", BAD_CAST usermetanames [i]);
1035 |       xmlSetProp (thenode, BAD_CAST "value", BAD_CAST usermetadata [i]);
1036 |     }
1037 | 
1038 |   if (snprintf ((char *)filename, 999, "%s.cpu%.4d.meta", basename, rank) >999)
1039 |     return 1;
1040 |   if (!(outfile = fopen (filename, "w")))
1041 |     {
1042 |       char openerror [1050];
1043 |       snprintf (openerror,1049, "CPU %d: error %d (%s) opening output file %s",
1044 | 		rank, errno, strerror (errno), filename);
1045 |       SETERRQ (PETSC_ERR_FILE_OPEN, openerror);
1046 |     }
1047 |   xmlDocDump (outfile, thedoc);
1048 |   xmlFreeDoc (thedoc);
1049 |   fclose(outfile);
1050 | 
1051 |   return 0;
1052 | }
1053 | 
1054 | 
1055 | #undef __FUNCT__
1056 | #define __FUNCT__ "IlluMultiStoreData"
1057 | 
1058 | /*++++++++++++++++++++++++++++++++++++++
1059 |   This function stores the data file.
1060 | 
1061 |   int IlluMultiStoreData It returns zero or an error code.
1062 | 
1063 |   MPI_Comm comm MPI communicator, if NULL it uses PETSC_COMM_WORLD.
1064 | 
1065 |   PetscScalar *globalarray Array from which to save the (local) data.
1066 | 
1067 |   char *basename Base file name.
1068 | 
1069 |   int rank CPU number to read data for.
1070 | 
1071 |   int compressed Data compression: if zero then no compression (fastest), 1-9
1072 |   then gzip compression level, 10-15 then gzip --best.  If 16-31 then save
1073 |   guint32s representing relative values between min and max for each field,
1074 |   compressed according to this value minus 16.  Likewise for 32-47 and
1075 |   guint16s, and 48-63 for guint8s.  Yes, these alternative formats lose
1076 |   information and can't be used for accurate checkpointing, but they should
1077 |   retain enough data for visualization (except perhaps for the guint8s, which
1078 |   are possibly acceptable for vectors but likely not contours).
1079 | 
1080 |   int gridpoints Number of gridpoints to store data for.
1081 | 
1082 |   int dof Degrees of freedom at each node, a.k.a. number of field variables.
1083 | 
1084 |   PetscScalar *fieldmin Minimum value of each field variable.
1085 | 
1086 |   PetscScalar *fieldmax Maximum value of each field variable.
1087 |   ++++++++++++++++++++++++++++++++++++++*/
1088 | 
1089 | static int IlluMultiStoreData
1090 | (MPI_Comm comm, PetscScalar *globalarray, char *basename, int rank,
1091 |  int compressed, int gridpoints, int dof, PetscScalar *fieldmin,
1092 |  PetscScalar *fieldmax)
1093 | {
1094 |   int i;
1095 |   char filename [1000];
1096 |   FILE *outfile;
1097 |   size_t writeout;
1098 | 
1099 |   if (compressed & COMPRESS_GZIP_MASK)
1100 |     {
1101 |       if ((compressed & COMPRESS_GZIP_MASK) >= 1 &&
1102 | 	  (compressed & COMPRESS_GZIP_MASK) <= 9)
1103 | 	{
1104 | 	  if (snprintf (filename, 999, "gzip -c -%d > %s.cpu%.4d.data",
1105 | 			compressed & COMPRESS_GZIP_MASK, basename, rank) > 999)
1106 | 	  return 1;
1107 | 	}
1108 |       else
1109 | 	{
1110 | 	  if (snprintf (filename,999, "gzip -c --best > %s.cpu%.4d.data",
1111 | 			basename,rank) > 999)
1112 | 	    return 1;
1113 | 	}
1114 |       if (!(outfile = popen (filename, "w")))
1115 | 	{
1116 | 	  char openerror [1050];
1117 | 	  snprintf (openerror, 1049,
1118 | 		    "CPU %d: error %d (%s) opening output pipe \"%s\"",
1119 | 		    rank, errno, strerror (errno), filename);
1120 | 	  SETERRQ (PETSC_ERR_FILE_OPEN, openerror);
1121 | 	}
1122 |     }
1123 |   else
1124 |     {
1125 |       if (snprintf (filename, 999, "%s.cpu%.4d.data", basename, rank) > 999)
1126 | 	return 1;
1127 |       if (!(outfile = fopen (filename, "w")))
1128 | 	{
1129 | 	  char openerror [1050];
1130 | 	  snprintf (openerror, 1049,
1131 | 		    "CPU %d: error %d (%s) opening output file %s",
1132 | 		    rank, errno, strerror (errno), filename);
1133 | 	  SETERRQ (PETSC_ERR_FILE_OPEN, openerror);
1134 | 	}
1135 |     }
1136 | 
1137 |   switch (compressed & COMPRESS_INT_MASK)
1138 |     {
1139 |     case COMPRESS_INT_LONG:
1140 |       {
1141 | 	guint32 *storage;
1142 | 	if (!(storage = (guint32 *) malloc
1143 | 	      (gridpoints * dof * sizeof (guint32))))
1144 | 	  return 1;
1145 | 	for (i=0; i<gridpoints*dof; i++)
1146 | 	  storage [i] = (guint32)
1147 | 	    (4294967295. * (globalarray [i] - fieldmin [i%dof]) /
1148 | 	     (fieldmax [i%dof] - fieldmin [i%dof]));
1149 | 	writeout = fwrite (storage, sizeof (guint32), gridpoints*dof, outfile);
1150 | 	free (storage);
1151 | 	break;
1152 |       }
1153 |     case COMPRESS_INT_SHORT:
1154 |       {
1155 | 	guint16 *storage;
1156 | 	if (!(storage = (guint16 *) malloc
1157 | 	      (gridpoints * dof * sizeof (guint16))))
1158 | 	  return 1;
1159 | 	for (i=0; i<gridpoints*dof; i++)
1160 | 	  storage [i] = (guint16)
1161 | 	    (65535. * (globalarray [i] - fieldmin [i%dof]) /
1162 | 	     (fieldmax [i%dof] - fieldmin [i%dof]));
1163 | 	writeout = fwrite (storage, sizeof (guint16),  gridpoints*dof,outfile);
1164 | 	free (storage);
1165 | 	break;
1166 |       }
1167 |     case COMPRESS_INT_CHAR:
1168 |       {
1169 | 	guint8 *storage;
1170 | 	if (!(storage = (guint8 *) malloc
1171 | 	      (gridpoints * dof * sizeof (guint8))))
1172 | 	  return 1;
1173 | 	for (i=0; i<gridpoints*dof; i++)
1174 | 	  storage [i] = (guint8)
1175 | 	    (255. * (globalarray [i] - fieldmin [i%dof]) /
1176 | 	     (fieldmax [i%dof] - fieldmin [i%dof]));
1177 | 	writeout = fwrite (storage, sizeof (guint8),  gridpoints*dof, outfile);
1178 | 	free (storage);
1179 | 	break;
1180 |       }
1181 |     default:
1182 |       writeout = fwrite (globalarray, sizeof (PetscScalar), gridpoints*dof,
1183 | 			 outfile);
1184 |     }
1185 | 
1186 |   if (compressed & COMPRESS_GZIP_MASK)
1187 |     pclose (outfile);
1188 |   else
1189 |     fclose (outfile);
1190 | 
1191 |   if (writeout < gridpoints*dof)
1192 |     {
1193 |       char writerror [1050];
1194 |       int writerr = ferror (outfile);
1195 |       snprintf (writerror,1049, "CPU %d: error %d (%s) during write, sent %d of %d items",
1196 | 		rank, writerr, strerror (writerr), writeout, gridpoints*dof);
1197 |       SETERRQ (PETSC_ERR_FILE_READ, writerror);
1198 |     }
1199 | 
1200 |   return 0;
1201 | }
1202 | 
1203 | 
1204 | #undef __FUNCT__
1205 | #define __FUNCT__ "checkagree"
1206 | 
1207 | /*++++++++++++++++++++++++++++++++++++++
1208 |   Ancillary routine for
1209 |   +latex+{\tt IlluMultiRead()}:
1210 |   +html+ <tt>IlluMultiRead()</tt>:
1211 |   checks agreement of parameters and reports disagreement if necessary.
1212 | 
1213 |   int checkagree Returns 0 if they agree, 1 otherwise.
1214 | 
1215 |   MPI_Comm comm MPI communicator, if NULL it uses PETSC_COMM_WORLD.
1216 | 
1217 |   int da Integer parameter from the existing DA.
1218 | 
1219 |   int file Integer parameter read from the file.
1220 | 
1221 |   char *parameter Parameter name for reporting.
1222 |   ++++++++++++++++++++++++++++++++++++++*/
1223 | 
1224 | static inline int checkagree (MPI_Comm comm, int da, int file, char *parameter)
1225 | {
1226 |   int ierr;
1227 | 
1228 |   if (da == file)
1229 |     return 0;
1230 | 
1231 |   ierr = PetscPrintf (comm, "Disagreement in %s: da has %d, file %d\n",
1232 | 		      parameter, da, file); CHKERRQ (ierr);
1233 |   return 1;
1234 | }
1235 | 
1236 | 
1237 | #undef __FUNCT__
1238 | #define __FUNCT__ "IlluMultiRead"
1239 | 
1240 | /*++++++++++++++++++++++++++++++++++++++
1241 |   This reads the data into an existing distributed array and vector, checking
1242 |   that the sizes are right etc.
1243 | 
1244 |   int IlluMultiRead It returns zero or an error code.
1245 | 
1246 |   MPI_Comm comm MPI communicator, if NULL it uses PETSC_COMM_WORLD.
1247 | 
1248 |   DA theda Distributed array object controlling the data to read.
1249 | 
1250 |   Vec X Vector into which to read the data.
1251 | 
1252 |   char *basename Base file name.
1253 | 
1254 |   int *usermetacount Pointer to an int where we put the number of user metadata
1255 |   parameters loaded.
1256 | 
1257 |   char ***usermetanames Pointer to a char ** where the loaded parameter names
1258 |   are stored.  This is
1259 |   +latex+{\tt malloc}ed by this function, so a call to {\tt free()}
1260 |   +html+ <tt>malloc</tt>ed by this function, so a call to <tt>free()</tt>
1261 |   is needed to free up its data.
1262 | 
1263 |   char ***usermetadata Pointer to a char ** where the loaded parameter strings
1264 |   are stored.  This is
1265 |   +latex+{\tt malloc}ed by this function, so a call to {\tt free()}
1266 |   +html+ <tt>malloc</tt>ed by this function, so a call to <tt>free()</tt>
1267 |   is needed to free up its data.
1268 |   ++++++++++++++++++++++++++++++++++++++*/
1269 | 
1270 | int IlluMultiRead
1271 | (MPI_Comm comm, DA theda, Vec X, char *basename,
1272 |  int *usermetacount, char ***usermetanames, char ***usermetadata)
1273 | {
1274 |   int dim, px,py,pz, nx,ny,nz, xm,ym,zm, dof, sw, size, rank, ierr;
1275 |   DAPeriodicType wrap, fwrap;
1276 |   DAStencilType st, fst;
1277 |   int fdim, fpx,fpy,fpz, fnx,fny,fnz, fxm,fym,fzm, fdof, fsw, fsize = 1;
1278 |   int compressed, wrongendian;
1279 |   char filename[1000], **fieldnames;
1280 |   FILE *infile;
1281 |   PetscScalar *globalarray, *fieldmin, *fieldmax;
1282 | 
1283 |   if (!comm)
1284 |     comm = PETSC_COMM_WORLD;
1285 | 
1286 |   /*+ First it gets the properties of the distributed array for comparison with
1287 |     the metadata. +*/
1288 |   ierr = DAGetInfo (theda, &dim, &nx,&ny,&nz, &px,&py,&pz, &dof, &sw, &wrap,
1289 | 		    &st); CHKERRQ (ierr);
1290 |   ierr = DAGetCorners (theda, PETSC_NULL,PETSC_NULL,PETSC_NULL, &xm,&ym,&zm);
1291 |   CHKERRQ (ierr);
1292 | 
1293 |   /*+ Next it parses the XML metadata file into the document tree, and reads
1294 |     its content into the appropriate structures, comparing parameters with
1295 |     those of the existing distributed array structure. +*/
1296 |   ierr = MPI_Comm_size (comm, &size); CHKERRQ (ierr);
1297 |   ierr = MPI_Comm_rank (comm, &rank); CHKERRQ (ierr);
1298 | 
1299 |   DPRINTF ("Parsing XML metadata file from basename %s\n", basename);
1300 |   if (ierr = IlluMultiParseXML
1301 |       (comm, basename, rank, &compressed, &wrongendian, &fdim, &fpx,&fpy,&fpz,
1302 |        &fnx,&fny,&fnz, PETSC_NULL,PETSC_NULL,PETSC_NULL, &fxm,&fym,&fzm, &fdof,
1303 |        &fsw, &fst, &fwrap, &fieldnames, PETSC_NULL, &fieldmin, &fieldmax,
1304 |        usermetacount, usermetanames, usermetadata))
1305 |     return ierr;
1306 | 
1307 |   /* The size>1 checks are because we support loading multiple CPUs' data into
1308 |      a 1-CPU distributed array, as long as the global dimensions are right. */
1309 |   DPRINTF ("Checking size agreement between DA and data to read\n",0);
1310 |   fsize = fpx * ((dim > 1) ? fpy : 1) * ((dim > 2) ? fpz : 1);
1311 |   if (ierr = checkagree (comm, dim, fdim, "dimensions")) return ierr;
1312 |   if (size > 1 || fsize == 1) {
1313 |     if (ierr = checkagree (comm, px, fpx, "cpu xwidth")) return ierr;
1314 |     if (dim > 1) {
1315 |       if (ierr = checkagree (comm, py, fpy, "cpu ywidth")) return ierr;
1316 |       if (dim > 2) {
1317 | 	if (ierr = checkagree (comm, pz, fpz, "cpu zwidth")) return ierr; }}}
1318 |   if (ierr = checkagree (comm, nx, fnx, "global xwidth")) return ierr;
1319 |   if (dim > 1) {
1320 |     if (ierr = checkagree (comm, ny, fny, "global ywidth")) return ierr;
1321 |     if (dim > 2) {
1322 |       if (ierr = checkagree (comm, nz, fnz, "global zwidth")) return ierr; }}
1323 |   if (size > 1 || fsize == 1) {
1324 |     if (ierr = checkagree (comm, xm, fxm, "local xwidth")) return ierr;
1325 |     if (dim > 1) {
1326 |       if (ierr = checkagree (comm, ym, fym, "local ywidth")) return ierr;
1327 |       if (dim > 2) {
1328 | 	if (ierr = checkagree (comm, zm, fzm, "local zwidth")) return ierr; }}}
1329 |   if (ierr = checkagree (comm, dof, fdof, "degrees of freedom")) return ierr;
1330 |   if (ierr = checkagree (comm, sw, fsw, "stencil width")) return ierr;
1331 |   if (ierr = checkagree (comm, (int)st, (int)fst, "stencil type")) return ierr;
1332 |   if (ierr = checkagree (comm, (int)wrap, (int)fwrap, "periodic type"))
1333 |     return ierr;
1334 | 
1335 |   /*+ Then it streams in the data in one big slurp. +*/
1336 |   ierr = VecGetArray (X, &globalarray); CHKERRQ (ierr);
1337 |   if (size > 1 || fsize == 1) /* Default condition */
1338 |     {
1339 |       DPRINTF ("Reading data\n",0);
1340 |       ierr = IlluMultiParseData
1341 | 	(comm, globalarray, basename, rank, compressed,
1342 | 	 xm * ((dim>1)?ym:1) * ((dim>2)?zm:1), dof, wrongendian, fieldmin,
1343 | 	 fieldmax);
1344 |       if (ierr)
1345 | 	{
1346 | 	  xm = VecRestoreArray (X, &globalarray); CHKERRQ (xm);
1347 | 	  return ierr;
1348 | 	}
1349 |     }
1350 |   else /* Getting distributed data into a single array */
1351 |     {
1352 |       int i,j,k, xs,ys,zs;
1353 |       PetscScalar *storage;
1354 | 
1355 |       /* Incrementally read in data to storage, store it in globalarray */
1356 |       /* First loop through the stored CPUs */
1357 |       for     (pz=0, zs=0; pz<((dim>2)?fpz:1); pz++, zs+=fzm)
1358 | 	for   (py=0, ys=0; py<((dim>1)?fpy:1); py++, ys+=fym)
1359 | 	  for (px=0, xs=0; px<fpx;            px++, xs+=fxm, rank++)
1360 | 	    {
1361 | 	      int gridpoints;
1362 | 
1363 | 	      DPRINTF ("Parsing XML metadata file for CPU %d\n", rank);
1364 | 	      if (ierr = IlluMultiParseXML
1365 | 		  (comm, basename, rank, &compressed, &wrongendian, &fdim,
1366 | 		   &fpx,&fpy,&fpz, &fnx,&fny,&fnz,
1367 | 		   PETSC_NULL,PETSC_NULL,PETSC_NULL,
1368 | 		   &fxm,&fym,&fzm, &fdof, &fsw, &fst, &fwrap,
1369 | 		   &fieldnames, PETSC_NULL, &fieldmin, &fieldmax,
1370 | 		   usermetacount, usermetanames, usermetadata))
1371 | 		return ierr;
1372 | 	      gridpoints = fxm * ((dim>1)?fym:1) * ((dim>2)?fzm:1);
1373 | 
1374 | 	      /* This allocate/read/copy/free storage business is annoying.
1375 | 		 Replacing it with "zero-copy" would involve changing the
1376 | 		 IlluMultiParseData() API to allow loading into part of the
1377 | 		 local array.  But I'll leave that for future expansion, when
1378 | 		 this will be able to do arbitrary m->n CPU loading as long as
1379 | 		 the global sizes match. */
1380 | 	      storage = (PetscScalar *) malloc
1381 | 		(gridpoints * dof * sizeof (PetscScalar));
1382 | 	      DPRINTF ("Reading data for CPU %d\n", rank);
1383 | 	      ierr = IlluMultiParseData
1384 | 		(comm, storage, basename, rank, compressed, gridpoints, dof,
1385 | 		 wrongendian, fieldmin, fieldmax);
1386 | 
1387 | 	      fxm *= dof; /* so fxm is the number of PetscScalars to copy */
1388 | 	      i=1;
1389 | 	      /* Now distribute */
1390 | 	      for (k=0; k<((dim>2)?fzm:1); k++)
1391 | 		for (j=0; j<((dim>1)?fym:1); j++)
1392 | 		  BLAScopy_ (&fxm, storage + (k*fym + j)*fxm /* *dof */, &i,
1393 | 			   globalarray + (((zs+k)*ny + ys+j)*nx + xs)*dof, &i);
1394 | 
1395 | 	      free (storage);
1396 | 	      fxm /= dof;
1397 | 	    }
1398 |     }
1399 | 
1400 |   ierr = VecRestoreArray (X, &globalarray); CHKERRQ (ierr);
1401 | 
1402 |   return 0;
1403 | }
1404 | 
1405 | 
1406 | #undef __FUNCT__
1407 | #define __FUNCT__ "IlluMultiLoad"
1408 | 
1409 | /*++++++++++++++++++++++++++++++++++++++
1410 |   This creates a new distributed array of the appropriate size and loads the
1411 |   data into the vector contained in it (as retrieved by
1412 |   +latex+{\tt DAGetVector()}).
1413 |   +html+ <tt>DAGetVector()</tt>).
1414 |   It also reads the user metadata parameters into arrays stored at the supplied
1415 |   pointers.
1416 | 
1417 |   int IlluMultiLoad It returns zero or an error code.
1418 | 
1419 |   MPI_Comm comm MPI communicator, if NULL it uses PETSC_COMM_WORLD.
1420 | 
1421 |   char *basename Base file name.
1422 | 
1423 |   DA *theda Pointer to a DA object (to be created by this function).
1424 | 
1425 |   PetscScalar *wx Physical overall width in the
1426 |   +latex+$x$-direction.
1427 |   +html+ <i>x</i>-direction.
1428 | 
1429 |   PetscScalar *wy Physical overall width in the
1430 |   +latex+$y$-direction.
1431 |   +html+ <i>y</i>-direction.
1432 | 
1433 |   PetscScalar *wz Physical overall width in the
1434 |   +latex+$z$-direction.
1435 |   +html+ <i>z</i>-direction.
1436 | 
1437 |   field_plot_type **fieldtypes Data (plot) types for field variables.
1438 | 
1439 |   int *usermetacount Pointer to an int where we put the number of user metadata
1440 |   parameters loaded.
1441 | 
1442 |   char ***usermetanames Pointer to a char ** where the loaded parameter names
1443 |   are stored.  This is
1444 |   +latex+{\tt malloc}ed by this function, so a call to {\tt free()}
1445 |   +html+ <tt>malloc</tt>ed by this function, so a call to <tt>free()</tt>
1446 |   is needed to free up its data.
1447 | 
1448 |   char ***usermetadata Pointer to a char ** where the loaded parameter strings
1449 |   are stored.  This is
1450 |   +latex+{\tt malloc}ed by this function, so a call to {\tt free()}
1451 |   +html+ <tt>malloc</tt>ed by this function, so a call to <tt>free()</tt>
1452 |   is needed to free up its data.
1453 |   ++++++++++++++++++++++++++++++++++++++*/
1454 | 
1455 | int IlluMultiLoad
1456 | (MPI_Comm comm, char *basename, DA *theda, PetscScalar *wx,PetscScalar *wy,
1457 |  PetscScalar *wz, field_plot_type **fieldtypes, int *usermetacount,
1458 |  char ***usermetanames, char ***usermetadata)
1459 | {
1460 |   int dim, px,py,pz, nx,ny,nz, dof, sw, fxm,fym,fzm, rank,size, ierr;
1461 |   int compressed, wrongendian, fpx,fpy,fpz, fsize, xm,ym,zm;
1462 |   DAPeriodicType wrap;
1463 |   DAStencilType st;
1464 |   char filename[1000], **fieldnames;
1465 |   xmlChar *buffer;
1466 |   FILE *infile;
1467 |   xmlDocPtr thedoc;
1468 |   xmlNodePtr thenode;
1469 |   PetscScalar *globalarray, *fieldmin, *fieldmax;
1470 |   Vec X;
1471 | 
1472 |   if (!comm)
1473 |     comm = PETSC_COMM_WORLD;
1474 | 
1475 |   ierr = MPI_Comm_rank (comm, &rank); CHKERRQ (ierr);
1476 |   ierr = MPI_Comm_size (comm, &size); CHKERRQ (ierr);
1477 | 
1478 |   /*+ First it gets the parameters from the XML file. +*/
1479 |   DPRINTF ("Parsing XML metadata file from basename %s\n", basename);
1480 |   if (ierr = IlluMultiParseXML
1481 |       (comm, basename, rank, &compressed, &wrongendian, &dim, &fpx,&fpy,&fpz,
1482 |        &nx,&ny,&nz, wx,wy,wz, &fxm,&fym,&fzm, &dof, &sw, &st, &wrap,
1483 |        &fieldnames, fieldtypes, &fieldmin, &fieldmax,
1484 |        usermetacount, usermetanames, usermetadata))
1485 |     return ierr;
1486 | 
1487 |   fsize = fpx * ((dim > 1) ? fpy : 1) * ((dim > 2) ? fpz : 1);
1488 |   if (size == fsize) { /* Default condition: n->n */
1489 |     px = fpx; py = fpy; pz = fpz; }
1490 |   else if (size == 1)  /* Special condition: n->1 */
1491 |     px = py = pz = 1;
1492 |   else                 /* Error: incorrect number of CPUs */
1493 |     {
1494 |       char errstring[100];
1495 |       snprintf (errstring, 100,
1496 | 		"Incorrect CPU count: current %d, stored %dx%dx%d=%d\n",
1497 | 		size, fpx,fpy,fpz, fsize);
1498 |       SETERRQ (PETSC_ERR_ARG_SIZ, errstring);
1499 |     }
1500 | 
1501 |   /*+ Next it creates a distributed array based on those parameters, and sets
1502 |     the names of its fields. +*/
1503 |   switch (dim)
1504 |     {
1505 |     case 1:
1506 |       {
1507 | 	DPRINTF ("Creating %d-%d 1-D DA\n", nx, dof);
1508 | 	ierr = DACreate1d (comm, wrap, nx, dof, sw, PETSC_NULL,
1509 | 			   theda); CHKERRQ (ierr);
1510 | 	break;
1511 |       }
1512 |     case 2:
1513 |       {
1514 | 	DPRINTF ("Creating %dx%d-%d 2-D DA\n", nx,ny, dof);
1515 | 	ierr = DACreate2d (comm, wrap, st, nx,ny, px,py, dof, sw,
1516 | 			   PETSC_NULL, PETSC_NULL, theda); CHKERRQ (ierr);
1517 | 	break;
1518 |       }
1519 |     case 3:
1520 |       {
1521 | 	DPRINTF ("Creating %dx%dx%d-%d 3-D DA\n", nx,ny,nz, dof);
1522 | 	ierr = DACreate3d (comm, wrap, st, nx,ny,nz, px,py,pz, dof,
1523 | 			   sw, PETSC_NULL, PETSC_NULL, PETSC_NULL, theda);
1524 | 	CHKERRQ (ierr);
1525 | 	break;
1526 |       }
1527 |     default:
1528 |       return 1;
1529 |     }
1530 | 
1531 |   ierr = DAGetCorners (*theda, PETSC_NULL, PETSC_NULL, PETSC_NULL,
1532 | 		       &xm,&ym,&zm); CHKERRQ (ierr);
1533 |   if(size==fsize && (xm != fxm || ym != fym || zm != fzm))
1534 |     {
1535 |       char errstring[100];
1536 |       snprintf (errstring, 100,
1537 | 		"Local array size mismatch: DA %dx%dx%d, stored %dx%dx%d\n",
1538 | 		xm,ym,zm, fxm,fym,fzm);
1539 |       SETERRQ (PETSC_ERR_ARG_SIZ, errstring);
1540 |     }
1541 | 
1542 |   for (px=0; px<dof; px++)
1543 |     DASetFieldName (*theda, px, fieldnames [px]);
1544 | 
1545 |   /*+ Then it streams the data into the distributed array's vector in one big
1546 |     slurp. +*/
1547 |   DPRINTF ("Getting global vector and array\n",0);
1548 |   ierr = DAGetGlobalVector (*theda, &X); CHKERRQ (ierr);
1549 |   ierr = VecGetArray (X, &globalarray); CHKERRQ (ierr);
1550 |   if (size > 1 || fsize == 1) /* Default condition */
1551 |     {
1552 |       DPRINTF ("Loading data in parallel\n",0);
1553 |       ierr = IlluMultiParseData
1554 | 	(comm, globalarray, basename, rank, compressed,
1555 | 	 fxm * ((dim>1)?fym:1) * ((dim>2)?fzm:1), dof, wrongendian, fieldmin,
1556 | 	 fieldmax);
1557 |       if (ierr)
1558 | 	{
1559 | 	  fxm = VecRestoreArray (X, &globalarray); CHKERRQ (fxm);
1560 | 	  return ierr;
1561 | 	}
1562 |     }
1563 |   else /* Getting distributed data into a single array */
1564 |     {
1565 |       int i,j,k, xs,ys,zs;
1566 |       PetscScalar *storage;
1567 | 
1568 |       /* Incrementally read in data to storage, store it in globalarray */
1569 |       /* First loop through the stored CPUs */
1570 |       DPRINTF ("Loading data into a single array on 1 CPU\n",0);
1571 |       for     (pz=0, zs=0; pz<((dim>2)?fpz:1); pz++, zs+=fzm)
1572 | 	for   (py=0, ys=0; py<((dim>1)?fpy:1); py++, ys+=fym)
1573 | 	  for (px=0, xs=0; px<fpx;             px++, xs+=fxm, rank++)
1574 | 	    {
1575 | 	      int gridpoints, fdim, fnx,fny,fnz, fdof, fsw;
1576 | 	      DAPeriodicType fwrap;
1577 | 	      DAStencilType fst;
1578 | 
1579 | 	      if (ierr = IlluMultiParseXML
1580 | 		  (comm, basename, rank, &compressed, &wrongendian, &fdim,
1581 | 		   &fpx,&fpy,&fpz, &fnx,&fny,&fnz,
1582 | 		   PETSC_NULL,PETSC_NULL,PETSC_NULL,
1583 | 		   &fxm,&fym,&fzm, &fdof, &fsw, &fst, &fwrap,
1584 | 		   &fieldnames, PETSC_NULL, &fieldmin, &fieldmax,
1585 | 		   usermetacount, usermetanames, usermetadata))
1586 | 		return 1;
1587 | 	      gridpoints = fxm * ((dim>1)?fym:1) * ((dim>2)?fzm:1);
1588 | 
1589 | 	      /* This allocate/read/copy/free storage business is annoying.
1590 | 		 Replacing it with "zero-copy" would involve changing the
1591 | 		 IlluMultiParseData() API to allow loading into part of the
1592 | 		 local array.  But I'll leave that for future expansion, when
1593 | 		 this will be able to do arbitrary m->n CPU loading as long as
1594 | 		 the global sizes match. */
1595 | 	      storage = (PetscScalar *) malloc
1596 | 		(gridpoints * dof * sizeof (PetscScalar));
1597 | 	      ierr = IlluMultiParseData
1598 | 		(comm, storage, basename, rank, compressed, gridpoints, dof,
1599 | 		 wrongendian, fieldmin, fieldmax);
1600 | 
1601 | 	      fxm *= dof; /* so fxm is the number of PetscScalars to copy */
1602 | 	      i=1;
1603 | 	      /* Now distribute */
1604 | 	      for (k=0; k<((dim>2)?fzm:1); k++)
1605 | 		for (j=0; j<((dim>1)?fym:1); j++)
1606 | 		  BLAScopy_ (&fxm, storage + (k*fym + j)*fxm /* *dof */, &i,
1607 | 			   globalarray + (((zs+k)*ny + ys+j)*nx + xs)*dof, &i);
1608 | 
1609 | 	      free (storage);
1610 | 	      fxm /= dof;
1611 | 	    }
1612 |     }
1613 | 
1614 |   DPRINTF ("Done.\n",0);
1615 |   ierr = VecRestoreArray (X, &globalarray); CHKERRQ (ierr);
1616 |   ierr = DARestoreGlobalVector (*theda, &X); CHKERRQ (ierr);
1617 | 
1618 |   return 0;
1619 | }
1620 | 
1621 | 
1622 | #undef __FUNCT__
1623 | #define __FUNCT__ "IlluMultiSave"
1624 | 
1625 | /*++++++++++++++++++++++++++++++++++++++
1626 |   This saves the vector X in multiple files, two per process.
1627 | 
1628 |   int IlluMultiSave it returns zero or an error code.
1629 | 
1630 |   MPI_Comm comm MPI communicator, if NULL it uses PETSC_COMM_WORLD.
1631 | 
1632 |   DA theda Distributed array object controlling data saved.
1633 | 
1634 |   Vec X Vector whose data are actually being saved.
1635 | 
1636 |   char *basename Base file name.
1637 | 
1638 |   int usermetacount Number of user metadata parameters.
1639 | 
1640 |   char **usermetanames User metadata parameter names.
1641 | 
1642 |   char **usermetadata User metadata parameter strings.
1643 | 
1644 |   int compressed Data compression: if zero then no compression (fastest), 1-9
1645 |   then gzip compression level, 10-15 then gzip --best.  If 16-31 then save
1646 |   guint32s representing relative values between min and max for each field,
1647 |   compressed according to this value minus 16.  Likewise for 32-47 and
1648 |   guint16s, and 48-63 for guint8s.  Yes, these alternative formats lose
1649 |   information and can't be used for accurate checkpointing, but they should
1650 |   retain enough data for visualization (except perhaps for the guint8s, which
1651 |   are possibly acceptable for vectors but certainly not contours).
1652 |   ++++++++++++++++++++++++++++++++++++++*/
1653 | 
1654 | int IlluMultiSave
1655 | (MPI_Comm comm, DA theda, Vec X, char *basename, PetscScalar wx,PetscScalar wy,
1656 |  PetscScalar wz, field_plot_type *fieldtypes, int usermetacount,
1657 |  char **usermetanames, char **usermetadata, int compressed)
1658 | {
1659 |   int dim, nx,ny,nz, px,py,pz, dof, sw, xs,ys,zs, xm,ym,zm, rank, i, ierr;
1660 |   DAPeriodicType wrap;
1661 |   DAStencilType st;
1662 |   FILE *outfile;
1663 |   char filename[1000], **fieldnames;
1664 |   PetscReal *fieldmin, *fieldmax;
1665 |   PetscScalar *globalarray;
1666 |   guint64 *array64;
1667 |   guint32 *array32;
1668 |   guint16 *array16;
1669 |   guint8 *array8;
1670 | 
1671 |   if (!comm)
1672 |     comm = PETSC_COMM_WORLD;
1673 | 
1674 |   /*+First a check to verify a supported value of
1675 |     +latex+{\tt compressed},
1676 |     +html+ <tt>compressed</tt>,
1677 |     but no fancy guint* compression for complex!
1678 |     +*/
1679 | #ifdef PETSC_USE_COMPLEX
1680 |   if (compressed < 0 || compressed > COMPRESS_GZIP_MASK)
1681 |     return 1;
1682 | #else
1683 |   if (compressed < 0 || compressed > (COMPRESS_GZIP_MASK | COMPRESS_INT_MASK))
1684 |     return 1;
1685 | #endif
1686 | 
1687 |   /*+Then get the distributed array parameters and processor number, and store
1688 |     all this data in the XML .meta file. +*/
1689 |   ierr = DAGetInfo (theda, &dim, &nx,&ny,&nz, &px,&py,&pz, &dof, &sw, &wrap,
1690 | 		    &st); CHKERRQ (ierr);
1691 |   ierr = DAGetCorners (theda, &xs,&ys,&zs, &xm,&ym,&zm); CHKERRQ (ierr);
1692 |   ierr = MPI_Comm_rank (comm, &rank); CHKERRQ (ierr);
1693 | 
1694 |   if (!(fieldnames = (char **) malloc (dof * sizeof (char *))))
1695 |     return 1;
1696 |   if (!(fieldmin = (PetscScalar *) malloc (2 * dof * sizeof (PetscScalar))))
1697 |     return 1;
1698 |   fieldmax = fieldmin + dof;
1699 |   for (i=0; i<dof; i++)
1700 |     {
1701 |       ierr = DAGetFieldName (theda, i, fieldnames + i); CHKERRQ (ierr);
1702 |       ierr = VecStrideMin (X, i, PETSC_NULL, fieldmin + i); CHKERRQ (ierr);
1703 |       ierr = VecStrideMax (X, i, PETSC_NULL, fieldmax + i); CHKERRQ (ierr);
1704 |     }
1705 | 
1706 |   if (ierr = IlluMultiStoreXML
1707 |       (comm, basename, rank, compressed, dim, px,py,pz, nx,ny,nz, wx,wy,wz,
1708 |        xm,ym,zm, dof, sw, (int) st, (int) wrap, fieldnames,fieldtypes,
1709 |        fieldmin,fieldmax, usermetacount, usermetanames, usermetadata))
1710 |     return ierr;
1711 | 
1712 |   /*+ Finally, the data just stream out to the data file or gzip pipe in one
1713 |     big lump. +*/
1714 |   ierr = VecGetArray (X, &globalarray); CHKERRQ (ierr);
1715 |   IlluMultiStoreData (comm, globalarray, basename, rank, compressed,
1716 | 		      xm * ((dim>1)?ym:1) * ((dim>2)?zm:1), dof,
1717 | 		      fieldmin, fieldmax);
1718 |   ierr = VecRestoreArray (X, &globalarray); CHKERRQ (ierr);
1719 | 
1720 |   free (fieldmin);
1721 |   free (fieldnames);
1722 | 
1723 |   return 0;
1724 | }