1 | /*************************************** 2 | $Header: /cvsroot/petscgraphics/illumulti.c,v 1.34 2006/01/27 03:04:12 hazelsct Exp $ 3 | 4 | This file contains the functions 5 | +latex+{\tt IlluMultiSave()}, {\tt IlluMultiLoad()} and {\tt IlluMultiRead()} 6 | +html+ <tt>IlluMultiSave()</tt>, <tt>IlluMultiLoad()</tt> and 7 | +html+ <tt>IlluMultiRead()</tt> 8 | designed to handle distributed storage and retrieval of data on local drives 9 | of machines in a Beowulf cluster. This should allow rapid loading of 10 | timestep data for "playback" from what is essentially a giant RAID-0 array of 11 | distributed disks, at enormously higher speeds than via NFS from a hard drive 12 | or RAID array on the head node. The danger of course is that if one node's 13 | disk goes down, you don't have a valid data set any more, but that's the 14 | nature of RAID-0, right? 15 | +latex+\par 16 | +html+ <p> 17 | The filenames saved are: 18 | +latex+\begin{itemize} \item {\tt $<$basename$>$.cpu\#\#\#\#.meta}, where 19 | +latex+{\tt \#\#\#\#} 20 | +html+ <ul><li><tt><basename>.cpu####.meta</tt>, where <tt>####</tt> 21 | is replaced by the CPU number (more than four digits if there are more than 22 | 9999 CPUs :-), which has the metadata for the whole thing in XML format 23 | (written by 24 | +latex+{\tt GNOME libxml}), 25 | +html+ <tt>GNOME libxml</tt>), 26 | as described in the notes on the 27 | +latex+{\tt IlluMultiStoreXML()} function. 28 | +html+ <tt>IlluMultiStoreXML()</tt> function. 29 | +latex+\item {\tt $<$basename$>$.cpu\#\#\#\#.data} 30 | +html+ </li><li><tt><basename>.cpu####.data</tt> 31 | which is simply a stream of the raw data, optionally compressed by 32 | +latex+{\tt gzip}. \end{itemize} 33 | +html+ <tt>gzip</tt>.</li></ul> 34 | If one were saving timesteps, one might include a timestep number in the 35 | basename, and also timestep and simulation time in the metadata. The 36 | metadata can also hold simulation parameters, etc. 37 | +latex+\par 38 | +html+ <p> 39 | This supports 1-D, 2-D and 3-D distributed arrays. As an extra feature, you 40 | can load a multi-CPU distributed array scattered over lots of files into a 41 | single CPU, to facilitate certain modes of data visualization. 42 | ***************************************/ 43 | 44 | 45 | #include "illuminator.h" /* Also includes petscda.h */ 46 | #include <glib.h> /* for guint*, GUINT*_SWAP_LE_BE */ 47 | #include <petscblaslapack.h> /* for BLAScopy_() */ 48 | #include <libxml/tree.h> /* To build the XML tree to store */ 49 | #include <libxml/parser.h> /* XML parsing */ 50 | #include <stdio.h> /* FILE, etc. */ 51 | #include <errno.h> /* errno */ 52 | #include <string.h> /* strncpy(), etc. */ 53 | #include <stdlib.h> /* malloc() and free() */ 54 | 55 | /* Build with -DDEBUG for debugging output */ 56 | #undef DPRINTF 57 | #ifdef DEBUG 58 | #define DPRINTF(fmt, args...) ierr = PetscPrintf (comm, "%s: " fmt, __FUNCT__, args); CHKERRQ (ierr) 59 | #else 60 | #define DPRINTF(fmt, args...) 61 | #endif 62 | 63 | 64 | #undef __FUNCT__ 65 | #define __FUNCT__ "IlluMultiParseXML" 66 | 67 | /*++++++++++++++++++++++++++++++++++++++ 68 | This function reads in the XML metadata document and returns the various 69 | parameter values in the addresses pointed to by the arguments. It is called 70 | by IlluMultiLoad() and IlluMultiRead(). 71 | 72 | int IlluMultiParseXML It returns zero or an error code. 73 | 74 | MPI_Comm comm MPI communicator, if NULL it uses PETSC_COMM_WORLD. 75 | 76 | char *basename Base file name. 77 | 78 | int rank CPU number to read data for. 79 | 80 | int *compressed Data compression: if zero then no compression (fastest), 1-9 81 | then gzip compression level, 10-15 then gzip --best. If 16-31 then save 82 | guint32s representing relative values between min and max for each field, 83 | compressed according to this value minus 16. Likewise for 32-47 and 84 | guint16s, and 48-63 for guint8s. Yes, these alternative formats lose 85 | information and can't be used for accurate checkpointing, but they should 86 | retain enough data for visualization (except perhaps for the guint8s, which 87 | are possibly acceptable for vectors but likely not contours). 88 | 89 | int *wrongendian Tells whether the data are stored in the opposite endian 90 | format from this platform, and thus must be switched when the data are 91 | streamed in. 92 | 93 | int *dim Dimensionality of the space. 94 | 95 | int *px Number of processors in the 96 | +latex+$x$-direction. 97 | +html+ <i>x</i>-direction. 98 | 99 | int *py Number of processors in the 100 | +latex+$y$-direction. 101 | +html+ <i>y</i>-direction. 102 | 103 | int *pz Number of processors in the 104 | +latex+$z$-direction. 105 | +html+ <i>z</i>-direction. 106 | 107 | int *nx Number of grid points over the entire array in the 108 | +latex+$x$-direction. 109 | +html+ <i>x</i>-direction. 110 | 111 | int *ny Number of grid points over the entire array in the 112 | +latex+$y$-direction. 113 | +html+ <i>y</i>-direction. 114 | 115 | int *nz Number of grid points over the entire array in the 116 | +latex+$z$-direction. 117 | +html+ <i>z</i>-direction. 118 | 119 | PetscScalar *wx Physical overall width in the 120 | +latex+$x$-direction, {\tt PETSC\_NULL} if not needed. 121 | +html+ <i>x</i>-direction, <tt>PETSC_NULL</tt> if not needed. 122 | 123 | PetscScalar *wy Physical overall width in the 124 | +latex+$y$-direction, {\tt PETSC\_NULL} if not needed. 125 | +html+ <i>y</i>-direction, <tt>PETSC_NULL</tt> if not needed. 126 | 127 | PetscScalar *wz Physical overall width in the 128 | +latex+$z$-direction, {\tt PETSC\_NULL} if not needed. 129 | +html+ <i>z</i>-direction, <tt>PETSC_NULL</tt> if not needed. 130 | 131 | int *xm Number of grid points over the local part of the array in the 132 | +latex+$x$-direction. 133 | +html+ <i>x</i>-direction. 134 | 135 | int *ym Number of grid points over the local part of the array in the 136 | +latex+$y$-direction. 137 | +html+ <i>y</i>-direction. 138 | 139 | int *zm Number of grid points over the local part of the array in the 140 | +latex+$z$-direction. 141 | +html+ <i>z</i>-direction. 142 | 143 | int *dof Degrees of freedom at each node, a.k.a. number of field variables. 144 | 145 | int *sw Stencil width. 146 | 147 | DAStencilType *st Stencil type, given by the 148 | +latex+{\tt PETSc enum} values. 149 | +html+ <tt>PETSc enum</tt> values. 150 | 151 | DAPeriodicType *wrap Periodic type, given by the 152 | +latex+{\tt PETSc enum} values. 153 | +html+ <tt>PETSc enum</tt> values. 154 | 155 | char ***fieldnames Names of the field variables. 156 | 157 | field_plot_type **fieldtypes Data (plot) types for field variables, 158 | +latex+{\tt PETSC\_NULL} if not needed. 159 | +html+ <tt>PETSC_NULL</tt> if not needed. 160 | 161 | PetscScalar **fieldmin Minimum value of each field variable. 162 | 163 | PetscScalar **fieldmax Maximum value of each field variable. 164 | 165 | int *usermetacount Number of user metadata parameters. 166 | 167 | char ***usermetanames User metadata parameter names. 168 | 169 | char ***usermetadata User metadata parameter strings. 170 | ++++++++++++++++++++++++++++++++++++++*/ 171 | 172 | static int IlluMultiParseXML 173 | (MPI_Comm comm, char *basename, int rank, int *compressed, int *wrongendian, 174 | int *dim, int *px,int *py,int *pz, int *nx,int *ny,int *nz, 175 | PetscScalar *wx,PetscScalar *wy,PetscScalar *wz, int *xm,int *ym,int *zm, 176 | int *dof, int *sw, DAStencilType *st, DAPeriodicType *wrap, 177 | char ***fieldnames, field_plot_type **fieldtypes, PetscScalar **fieldmin, 178 | PetscScalar **fieldmax, 179 | int *usermetacount, char ***usermetanames, char ***usermetadata) 180 | { 181 | xmlDocPtr thedoc; 182 | xmlNodePtr thenode; 183 | char filename [1000]; 184 | xmlChar *buffer, *version; 185 | int field=0, ierr; 186 | 187 | if (snprintf (filename, 999, "%s.cpu%.4d.meta", basename, rank) > 999) 188 | return 1; 189 | 190 | DPRINTF ("Parsing XML file %s\n", filename); 191 | thedoc = xmlParseFile (filename); 192 | if (thedoc == NULL) 193 | IllErrorHandler (PETSC_ERR_FILE_OPEN, "xmlParseFile returned NULL\n"); 194 | 195 | version = xmlGetProp (thedoc -> children, BAD_CAST "version"); 196 | if (strncmp ((char *) version, "0.1", 3) && 197 | strncmp ((char *) version, "0.2", 3)) 198 | { 199 | free (version); 200 | IllErrorHandler (PETSC_ERR_FILE_UNEXPECTED, 201 | "Version is neither 0.1 nor 0.2, can't parse\n"); 202 | } 203 | 204 | *wrongendian = 0; 205 | buffer = xmlGetProp (thedoc -> children, BAD_CAST "endian"); 206 | #ifdef WORDS_BIGENDIAN 207 | if (strncasecmp ((char *) buffer, "big", 3)) 208 | *wrongendian = 1; 209 | #else 210 | if (!strncasecmp ((char *) buffer, "big", 3)) 211 | *wrongendian = 1; 212 | #endif 213 | free (buffer); 214 | 215 | buffer = xmlGetProp (thedoc -> children, BAD_CAST "compression"); 216 | /* Have to handle all of the various compressed string values */ 217 | /* TODO: make this deal with "float" and "complex" */ 218 | if (buffer[0] == 'l' || buffer[0] == 'L') 219 | { 220 | if (buffer[4] == 'n' || buffer[4] == 'N') 221 | *compressed = COMPRESS_INT_LONG | COMPRESS_GZIP_NONE; 222 | else if (buffer[4] >= '1' && buffer[4] <= '9') 223 | { 224 | sscanf ((char *) buffer+4, "%d", compressed); 225 | *compressed |= COMPRESS_INT_LONG; 226 | } 227 | else if (buffer[4] == 'b' || buffer[4] == 'B') 228 | *compressed = COMPRESS_INT_LONG | COMPRESS_GZIP_BEST; 229 | else 230 | return 1; 231 | } 232 | else if (buffer[0] == 's' || buffer[0] == 'S') 233 | { 234 | if (buffer[5] == 'n' || buffer[5] == 'N') 235 | *compressed = COMPRESS_INT_SHORT | COMPRESS_GZIP_NONE; 236 | else if (buffer[5] >= '1' && buffer[5] <= '9') 237 | { 238 | sscanf ((char *) buffer+5, "%d", compressed); 239 | *compressed |= COMPRESS_INT_SHORT; 240 | } 241 | else if (buffer[5] == 'b' || buffer[5] == 'B') 242 | *compressed = COMPRESS_INT_SHORT | COMPRESS_GZIP_BEST; 243 | else 244 | return 1; 245 | } 246 | else if (buffer[0] == 'c' || buffer[0] == 'C') 247 | { 248 | if (buffer[4] == 'n' || buffer[4] == 'N') 249 | *compressed = COMPRESS_INT_CHAR | COMPRESS_GZIP_NONE; 250 | else if (buffer[4] >= '1' && buffer[4] <= '9') 251 | { 252 | sscanf ((char *) buffer+4, "%d", compressed); 253 | *compressed |= COMPRESS_INT_CHAR; 254 | } 255 | else if (buffer[4] == 'b' || buffer[4] == 'B') 256 | *compressed = COMPRESS_INT_CHAR | COMPRESS_GZIP_BEST; 257 | else 258 | return 1; 259 | } 260 | else 261 | { 262 | if (buffer[0] == 'n' || buffer[0] == 'N') 263 | *compressed = COMPRESS_INT_NONE | COMPRESS_GZIP_NONE; 264 | else if (buffer[0] >= '1' && buffer[0] <= '9') 265 | { 266 | sscanf ((char *) buffer, "%d", compressed); 267 | *compressed |= COMPRESS_INT_NONE; 268 | } 269 | else if (buffer[0] == 'b' || buffer[0] == 'B') 270 | *compressed = COMPRESS_INT_NONE | COMPRESS_GZIP_BEST; 271 | else 272 | return 1; 273 | } 274 | free (buffer); 275 | DPRINTF ("Root node: version %s, %s endian, compression %d|%d\n", 276 | version, *wrongendian ? "switched" : "native", 277 | *compressed & COMPRESS_INT_MASK, *compressed & COMPRESS_GZIP_MASK); 278 | 279 | *fieldnames = PETSC_NULL; 280 | if (fieldtypes != PETSC_NULL) 281 | *fieldtypes = PETSC_NULL; 282 | *fieldmin = *fieldmax = PETSC_NULL; 283 | *usermetacount = 0; 284 | *usermetanames = *usermetadata = PETSC_NULL; 285 | 286 | /* The main parsing loop; because xmlStringGetNodeList() doesn't work. */ 287 | for (thenode = thedoc->children->xmlChildrenNode; 288 | thenode; 289 | thenode = thenode->next) 290 | { 291 | DPRINTF ("Looping through nodes, this is %s, fieldnames 0x%lx\n", 292 | thenode->name, *fieldnames); 293 | if (!strncasecmp ((char *) thenode->name, "GlobalCPUs", 10)) 294 | { 295 | buffer = xmlGetProp (thenode, BAD_CAST "dimensions"); 296 | sscanf ((char *) buffer, "%d", dim); 297 | free (buffer); 298 | 299 | buffer = xmlGetProp (thenode, BAD_CAST "xwidth"); 300 | sscanf ((char *) buffer, "%d", px); 301 | free (buffer); 302 | 303 | if (*dim > 1) 304 | { 305 | buffer = xmlGetProp (thenode, BAD_CAST "ywidth"); 306 | sscanf ((char *) buffer, "%d", py); 307 | free (buffer); 308 | 309 | if (*dim > 2) 310 | { 311 | buffer = xmlGetProp (thenode, BAD_CAST "zwidth"); 312 | sscanf ((char *) buffer, "%d", pz); 313 | free (buffer); 314 | } 315 | else 316 | *pz = 1; 317 | } 318 | else 319 | *py = 1; 320 | } 321 | 322 | else if (!strncasecmp ((char *) thenode->name, "GlobalSize", 10)) 323 | { 324 | buffer = xmlGetProp (thenode, BAD_CAST "xwidth"); 325 | sscanf ((char *) buffer, "%d", nx); 326 | free (buffer); 327 | /*+ For GlobalSize, since there's no *size attribute (for an 0.1 328 | version document), assume 1. +*/ 329 | buffer = xmlGetProp (thenode, BAD_CAST "xsize"); 330 | if (wx != PETSC_NULL) 331 | { 332 | if (buffer) 333 | #ifdef PETSC_USE_SINGLE 334 | sscanf ((char *) buffer, "%f", wx); 335 | #else 336 | sscanf ((char *) buffer, "%lf", wx); 337 | #endif 338 | else 339 | *wx = 1.; 340 | } 341 | if (buffer) 342 | free (buffer); 343 | 344 | if (*dim > 1) 345 | { 346 | buffer = xmlGetProp (thenode, BAD_CAST "ywidth"); 347 | sscanf ((char *) buffer, "%d", ny); 348 | free (buffer); 349 | buffer = xmlGetProp (thenode, BAD_CAST "ysize"); 350 | if (wy != PETSC_NULL) 351 | { 352 | if (buffer) 353 | #ifdef PETSC_USE_SINGLE 354 | sscanf ((char *) buffer, "%f", wy); 355 | #else 356 | sscanf ((char *) buffer, "%lf", wy); 357 | #endif 358 | else 359 | *wy = 1.; 360 | } 361 | if (buffer) 362 | free (buffer); 363 | 364 | if (*dim > 2) 365 | { 366 | buffer = xmlGetProp (thenode, BAD_CAST "zwidth"); 367 | sscanf ((char *) buffer, "%d", nz); 368 | free (buffer); 369 | buffer = xmlGetProp (thenode, BAD_CAST "zsize"); 370 | if (wz != PETSC_NULL) 371 | { 372 | if (buffer) 373 | #ifdef PETSC_USE_SINGLE 374 | sscanf ((char *) buffer, "%f", wz); 375 | #else 376 | sscanf ((char *) buffer, "%lf", wz); 377 | #endif 378 | else 379 | *wz = 1.; 380 | } 381 | if (buffer) 382 | free (buffer); 383 | } 384 | else 385 | *nz = 1; 386 | } 387 | else 388 | *ny = *nz = 1; 389 | 390 | buffer = xmlGetProp (thenode, BAD_CAST "fields"); 391 | sscanf ((char *) buffer, "%d", dof); 392 | free (buffer); 393 | 394 | if (*dof > field) 395 | { 396 | if (!(*fieldnames = (char **) realloc 397 | (*fieldnames, *dof * sizeof (char *)))) 398 | return 1; 399 | if (fieldtypes != PETSC_NULL) 400 | if (!(*fieldtypes = (field_plot_type *) realloc 401 | (*fieldtypes, *dof * sizeof(field_plot_type)))) 402 | return 1; 403 | if (!(*fieldmin = (PetscScalar *) realloc 404 | (*fieldmin, *dof * sizeof(PetscScalar)))) 405 | return 1; 406 | if (!(*fieldmax = (PetscScalar *) realloc 407 | (*fieldmax, *dof * sizeof(PetscScalar)))) 408 | return 1; 409 | } 410 | } 411 | 412 | else if (!strncasecmp ((char *) thenode->name, "LocalSize", 9)) 413 | { 414 | buffer = xmlGetProp (thenode, BAD_CAST "xwidth"); 415 | sscanf ((char *) buffer, "%d", xm); 416 | free (buffer); 417 | 418 | if (*dim > 1) 419 | { 420 | buffer = xmlGetProp (thenode, BAD_CAST "ywidth"); 421 | sscanf ((char *) buffer, "%d", ym); 422 | free (buffer); 423 | 424 | if (*dim > 2) 425 | { 426 | buffer = xmlGetProp (thenode, BAD_CAST "zwidth"); 427 | sscanf ((char *) buffer, "%d", zm); 428 | free (buffer); 429 | } 430 | else 431 | *zm = 1; 432 | } 433 | else 434 | *ym = 1; 435 | } 436 | 437 | else if (!strncasecmp ((char *) thenode->name, "Stencil", 7)) 438 | { 439 | buffer = xmlGetProp (thenode, BAD_CAST "width"); 440 | sscanf ((char *) buffer, "%d", sw); 441 | free (buffer); 442 | 443 | buffer = xmlGetProp (thenode, BAD_CAST "type"); 444 | sscanf ((char *) buffer, "%d", st); 445 | free (buffer); 446 | 447 | buffer = xmlGetProp (thenode, BAD_CAST "periodic"); 448 | sscanf ((char *) buffer, "%d", wrap); 449 | free (buffer); 450 | } 451 | 452 | else if (!strncasecmp ((char *) thenode->name, "Field", 5)) 453 | { 454 | if (!*fieldnames || !*fieldmax || !*fieldmin) 455 | { 456 | printf ("Warning: reading a Field, but the number of them is unknown!\n"); 457 | } 458 | 459 | if (field >= *dof) 460 | { 461 | printf ("Warning: more <Field> tags than declared degrees of freedom.\nThis may be because tags are out of order.\n"); 462 | *fieldnames = (char **) realloc 463 | (*fieldnames, (field+1) * sizeof (char *)); 464 | if (fieldtypes != PETSC_NULL) 465 | *fieldtypes = (field_plot_type *) realloc 466 | (*fieldtypes, (field+1) * sizeof (field_plot_type)); 467 | *fieldmin = (PetscScalar *) realloc 468 | (*fieldmin, (field+1) * sizeof (PetscScalar)); 469 | *fieldmax = (PetscScalar *) realloc 470 | (*fieldmax, (field+1) * sizeof (PetscScalar)); 471 | } 472 | 473 | (*fieldnames) [field] = 474 | (char *) xmlGetProp (thenode, BAD_CAST "name"); 475 | /*+ If the type attribute is missing from the Field node (as it is in 476 | version 0.1 documents), assume 477 | +latex+{\tt FIELD\_SCALAR}. 478 | +html+ <tt>FIELD_SCALAR</tt>. +*/ 479 | buffer = xmlGetProp (thenode, BAD_CAST "type"); 480 | if (fieldtypes) 481 | { 482 | if (buffer) 483 | sscanf ((char *) buffer, "0x%x", *fieldtypes + field); 484 | else 485 | (*fieldtypes) [field] = FIELD_SCALAR; 486 | } 487 | if (buffer) 488 | free(buffer); 489 | buffer = xmlGetProp (thenode, BAD_CAST "min"); 490 | #ifdef PETSC_USE_SINGLE 491 | sscanf ((char *) buffer, "%f", *fieldmin + field); 492 | #else 493 | sscanf ((char *) buffer, "%lf", *fieldmin + field); 494 | #endif 495 | free (buffer); 496 | buffer = xmlGetProp (thenode, BAD_CAST "max"); 497 | #ifdef PETSC_USE_SINGLE 498 | sscanf ((char *) buffer, "%f", *fieldmax + field); 499 | #else 500 | sscanf ((char *) buffer, "%lf", *fieldmax + field); 501 | #endif 502 | free (buffer); 503 | field++; 504 | } 505 | 506 | else if (!strncasecmp ((char *) thenode->name, "User", 4)) 507 | { 508 | (*usermetacount)++; 509 | (*usermetanames) = (char **) realloc 510 | (*usermetanames, (*usermetacount) * sizeof (char *)); 511 | (*usermetadata) = (char **) realloc 512 | (*usermetadata, (*usermetacount) * sizeof (char *)); 513 | (*usermetanames) [(*usermetacount)-1] = 514 | (char *) xmlGetProp (thenode, BAD_CAST "name"); 515 | (*usermetadata) [(*usermetacount)-1] = 516 | (char *) xmlGetProp (thenode, BAD_CAST "value"); 517 | } 518 | } 519 | 520 | /* This is another way to do things, which would be a whole lot easier, if it 521 | worked... (See Debian's libxml bug list.) 522 | thenode = xmlStringGetNodeList (thedoc, "GlobalCPUs"); 523 | printf ("thenode = 0x%lx\n", thenode); 524 | if (!thenode) 525 | return 1; 526 | thenode = xmlStringGetNodeList (thedoc, "GlobalSize"); 527 | if (!thenode) 528 | return 1; 529 | thenode = xmlStringGetNodeList (thedoc, "LocalSize"); 530 | if (!thenode) 531 | return 1; 532 | thenode = xmlStringGetNodeList (thedoc, "Stencil"); 533 | if (!thenode) 534 | return 1; 535 | 536 | i = 0; 537 | thenode = xmlStringGetNodeList (thedoc, "Field"); 538 | if (!thenode) 539 | return 1; 540 | while (thenode) 541 | { 542 | i++; 543 | thenode = thenode -> next; 544 | } 545 | 546 | thenode = xmlStringGetNodeList (thedoc, "User"); 547 | while (thenode) 548 | { 549 | thenode = thenode -> next; 550 | } */ 551 | 552 | free (version); 553 | xmlFreeDoc (thedoc); 554 | 555 | return 0; 556 | } 557 | 558 | 559 | #undef __FUNCT__ 560 | #define __FUNCT__ "IlluMultiParseData" 561 | 562 | /*++++++++++++++++++++++++++++++++++++++ 563 | This function reads in the data stored by IlluMultiStoreData(), complete with 564 | int/gzip compression. 565 | 566 | int IlluMultiParseData It returns zero or an error code. 567 | 568 | MPI_Comm comm MPI communicator, if NULL it uses PETSC_COMM_WORLD. 569 | 570 | PetscScalar *globalarray Array into which to load the (local) data. 571 | 572 | char *basename Base file name. 573 | 574 | int rank CPU number to read data for. 575 | 576 | int compressed Data compression: if zero then no compression (fastest), 1-9 577 | then gzip compression level, 10-15 then gzip --best. If 16-31 then save 578 | guint32s representing relative values between min and max for each field, 579 | compressed according to this value minus 16. Likewise for 32-47 and 580 | guint16s, and 48-63 for guint8s. Yes, these alternative formats lose 581 | information and can't be used for accurate checkpointing, but they should 582 | retain enough data for visualization (except perhaps for the guint8s, which 583 | are possibly acceptable for vectors but likely not contours). 584 | 585 | int gridpoints Number of gridpoints to read data for. 586 | 587 | int dof Degrees of freedom at each node, a.k.a. number of field variables. 588 | 589 | int wrongendian Tells whether the data are stored in the opposite endian 590 | format from this platform, and thus must be switched when the data are 591 | streamed in. 592 | 593 | PetscScalar *fieldmin Minimum value of each field variable. 594 | 595 | PetscScalar *fieldmax Maximum value of each field variable. 596 | ++++++++++++++++++++++++++++++++++++++*/ 597 | 598 | static int IlluMultiParseData 599 | (MPI_Comm comm, PetscScalar *globalarray, char *basename, int rank, 600 | int compressed, int gridpoints, int dof, int wrongendian, 601 | PetscScalar *fieldmin, PetscScalar *fieldmax) 602 | { 603 | int i; 604 | char filename [1000]; 605 | FILE *infile; 606 | size_t readoutput; 607 | 608 | if (compressed & COMPRESS_GZIP_MASK) 609 | { 610 | if (snprintf (filename, 999, "gunzip -c < %s.cpu%.4d.data", 611 | basename, rank) > 999) 612 | return 1; 613 | if (!(infile = popen (filename, "r"))) 614 | { 615 | char openerror [1050]; 616 | snprintf (openerror, 1049, 617 | "CPU %d: error %d (%s) opening input pipe \"%s\"", 618 | rank, errno, strerror (errno), filename); 619 | SETERRQ (PETSC_ERR_FILE_OPEN, openerror); 620 | } 621 | } 622 | else 623 | { 624 | if (snprintf (filename, 999, "%s.cpu%.4d.data", basename, rank) > 999) 625 | return 1; 626 | if (!(infile = fopen (filename, "r"))) 627 | { 628 | char openerror [1050]; 629 | snprintf (openerror, 1049, 630 | "CPU %d: error %d (%s) opening input file %s", 631 | rank, errno, strerror (errno), filename); 632 | SETERRQ (PETSC_ERR_FILE_OPEN, openerror); 633 | } 634 | } 635 | 636 | /* Read and store the data */ 637 | /* TODO: make this deal with float and complex */ 638 | switch (compressed & COMPRESS_INT_MASK) 639 | { 640 | /* Yes, this way of doing it is a bit redundant, but it seems better than 641 | the multitude of subconditionals which would be required if I did it 642 | the other way. */ 643 | case COMPRESS_INT_LONG: 644 | { 645 | guint32 *storage; 646 | if (!(storage = (guint32 *) malloc 647 | (gridpoints * dof * sizeof (guint32)))) 648 | return 1; 649 | readoutput = fread (storage, sizeof (guint32), gridpoints*dof, infile); 650 | 651 | if (wrongendian) 652 | { 653 | for (i=0; i<gridpoints*dof; i++) 654 | storage[i] = GUINT32_SWAP_LE_BE_CONSTANT (storage[i]); 655 | } 656 | for (i=0; i<gridpoints*dof; i++) 657 | globalarray[i] = fieldmin [i%dof] + 658 | ((fieldmax [i%dof] - fieldmin [i%dof]) * storage[i]) / 4294967295.; 659 | free (storage); 660 | break; 661 | } 662 | case COMPRESS_INT_SHORT: 663 | { 664 | guint16 *storage; 665 | if (!(storage = (guint16 *) malloc 666 | (gridpoints * dof * sizeof (guint16)))) 667 | return 1; 668 | readoutput = fread (storage, sizeof (guint16), gridpoints*dof, infile); 669 | 670 | if (wrongendian) 671 | { 672 | for (i=0; i<gridpoints*dof; i++) 673 | storage[i] = GUINT16_SWAP_LE_BE_CONSTANT (storage[i]); 674 | } 675 | for (i=0; i<gridpoints*dof; i++) 676 | globalarray[i] = fieldmin [i%dof] + 677 | ((fieldmax [i%dof] - fieldmin [i%dof]) * storage[i]) / 65535.; 678 | free (storage); 679 | break; 680 | } 681 | case COMPRESS_INT_CHAR: 682 | { 683 | guint8 *storage; 684 | if (!(storage = (guint8 *) malloc 685 | (gridpoints * dof * sizeof (guint8)))) 686 | return 1; 687 | readoutput = fread (storage, sizeof (guint8), gridpoints*dof, infile); 688 | 689 | for (i=0; i<gridpoints*dof; i++) 690 | globalarray[i] = fieldmin [i%dof] + 691 | ((fieldmax [i%dof] - fieldmin [i%dof]) * storage[i]) / 255.; 692 | free (storage); 693 | break; 694 | } 695 | default: 696 | { 697 | /* TODO: check for different size data, complex */ 698 | readoutput = fread (globalarray, sizeof (PetscScalar), gridpoints*dof, 699 | infile); 700 | 701 | if (wrongendian) 702 | { 703 | if (sizeof (PetscScalar) == 8) 704 | { 705 | for (i=0; i<gridpoints*dof; i++) 706 | globalarray[i]= GUINT64_SWAP_LE_BE_CONSTANT (globalarray[i]); 707 | } 708 | else if (sizeof (PetscScalar) == 4) 709 | { 710 | for (i=0; i<gridpoints*dof; i++) 711 | globalarray[i]= GUINT32_SWAP_LE_BE_CONSTANT (globalarray[i]); 712 | } 713 | else return 1; 714 | } 715 | 716 | } 717 | } 718 | 719 | if (compressed & COMPRESS_GZIP_MASK) 720 | pclose (infile); 721 | else 722 | fclose (infile); 723 | 724 | if (readoutput < gridpoints*dof) 725 | { 726 | char readerror [1050]; 727 | int readerr = ferror (infile); 728 | snprintf (readerror,1049, "CPU %d: error %d (%s) during read, got %d of %d items", 729 | rank, readerr, strerror (readerr), readoutput, gridpoints*dof); 730 | SETERRQ (PETSC_ERR_FILE_READ, readerror); 731 | } 732 | 733 | return 0; 734 | } 735 | 736 | 737 | #undef __FUNCT__ 738 | #define __FUNCT__ "IlluMultiStoreXML" 739 | 740 | /*++++++++++++++++++++++++++++++++++++++ 741 | This function opens, stores and closes the XML metadata file for IlluMulti 742 | format storage. It is called by IlluMultiSave(). 743 | 744 | int IlluMultiStoreXML It returns zero or an error code. 745 | 746 | MPI_Comm comm MPI communicator, if NULL it uses PETSC_COMM_WORLD. 747 | 748 | char *basename Base file name. 749 | 750 | int rank CPU number to store data for. 751 | 752 | int compressed Data compression: if zero then no compression (fastest), 1-9 753 | then gzip compression level, 10-15 then gzip --best. If 16-31 then save 754 | guint32s representing relative values between min and max for each field, 755 | compressed according to this value minus 16. Likewise for 32-47 and 756 | guint16s, and 48-63 for guint8s. Yes, these alternative formats lose 757 | information and can't be used for accurate checkpointing, but they should 758 | retain enough data for visualization (except perhaps for the guint8s, which 759 | are possibly acceptable for vectors but certainly not contours). 760 | 761 | int dim Dimensionality of the space. 762 | 763 | int px Number of processors in the 764 | +latex+$x$-direction. 765 | +html+ <i>x</i>-direction. 766 | 767 | int py Number of processors in the 768 | +latex+$y$-direction. 769 | +html+ <i>y</i>-direction. 770 | 771 | int pz Number of processors in the 772 | +latex+$z$-direction. 773 | +html+ <i>z</i>-direction. 774 | 775 | int nx Number of grid points over the entire array in the 776 | +latex+$x$-direction. 777 | +html+ <i>x</i>-direction. 778 | 779 | int ny Number of grid points over the entire array in the 780 | +latex+$y$-direction. 781 | +html+ <i>y</i>-direction. 782 | 783 | int nz Number of grid points over the entire array in the 784 | +latex+$z$-direction. 785 | +html+ <i>z</i>-direction. 786 | 787 | PetscScalar wx Physical overall width in the 788 | +latex+$x$-direction. 789 | +html+ <i>x</i>-direction. 790 | 791 | PetscScalar wy Physical overall width in the 792 | +latex+$y$-direction. 793 | +html+ <i>y</i>-direction. 794 | 795 | PetscScalar wz Physical overall width in the 796 | +latex+$z$-direction. 797 | +html+ <i>z</i>-direction. 798 | 799 | int xm Number of grid points over the local part of the array in the 800 | +latex+$x$-direction. 801 | +html+ <i>x</i>-direction. 802 | 803 | int ym Number of grid points over the local part of the array in the 804 | +latex+$y$-direction. 805 | +html+ <i>y</i>-direction. 806 | 807 | int zm Number of grid points over the local part of the array in the 808 | +latex+$z$-direction. 809 | +html+ <i>z</i>-direction. 810 | 811 | int dof Degrees of freedom at each node, a.k.a. number of field variables. 812 | 813 | int sw Stencil width. 814 | 815 | int st Stencil type, given by the 816 | +latex+{\tt PETSc enum} values. 817 | +html+ <tt>PETSc enum</tt> values. 818 | 819 | int wrap Periodic type, given by the 820 | +latex+{\tt PETSc enum} values. 821 | +html+ <tt>PETSc enum</tt> values. 822 | 823 | char **fieldnames Names of the field variables. 824 | 825 | field_plot_type *fieldtypes Data (plot) types for field variables. 826 | 827 | PetscReal *fieldmin Minimum value of each field variable. 828 | 829 | PetscReal *fieldmax Maximum value of each field variable. 830 | 831 | int usermetacount Number of user metadata parameters. 832 | 833 | char **usermetanames User metadata parameter names. 834 | 835 | char **usermetadata User metadata parameter strings. 836 | ++++++++++++++++++++++++++++++++++++++*/ 837 | 838 | static int IlluMultiStoreXML 839 | (MPI_Comm comm, char *basename, int rank, int compressed, 840 | int dim, int px,int py,int pz, int nx,int ny,int nz, 841 | PetscScalar wx,PetscScalar wy,PetscScalar wz, int xm,int ym,int zm, 842 | int dof, int sw, int st, int wrap, char **fieldnames, 843 | field_plot_type *fieldtypes, PetscReal *fieldmin, PetscReal *fieldmax, 844 | int usermetacount, char **usermetanames, char **usermetadata) 845 | { 846 | xmlDocPtr thedoc; 847 | xmlNodePtr thenode; 848 | FILE *outfile; 849 | char filename [1000]; 850 | xmlChar buffer [1000]; 851 | int i; 852 | 853 | /*+The XML tags in the .meta file consist of: 854 | +latex+\begin{center} \begin{tabular}{|l|l|} \hline 855 | +html+ <center><table BORDER> 856 | +latex+{\tt IlluMulti} & Primary tag, with attributes {\tt version}, 857 | +latex+\\ & {\tt endian} ({\tt big} or {\tt little}) and 858 | +latex+{\tt compression} \\ & (none, 1-9, best, float*, long*, short* or 859 | +latex+char*\footnote{longnone, long1-long9 or longbest compresses each 860 | +latex+double to a 32-bit unsigned int, with 0 representing the minimum for 861 | +latex+that field and 2$^{32}-1$ representing the maximum; likewise 862 | +latex+shortnone, short1-short9, shortbest uses 16-bit unsigned ints, and 863 | +latex+char* uses 8-bit unsigned ints. float is there to indicate that 864 | +latex+PetscScalar is 4 bytes at save time, loading should adjust 865 | +latex+accordingly; that's not fully supported just yet. At some point 866 | +latex+I'll have to figure out how to represent complex.}). \\ \hline 867 | +html+ <tr><td><tt>IlluMulti</tt></td><td>Primary tag, with attributes 868 | +html+ <tt>version</tt>,<br><tt>endian</tt> (<tt>big</tt> or 869 | +html+ <tt>little</tt>) and <tt>compression</tt><br>(none, 1-9, best; 870 | +html+ long*, short* or char*)</td></tr> 871 | +*/ 872 | 873 | thedoc = xmlNewDoc (BAD_CAST "1.0"); 874 | thedoc->children = xmlNewDocNode (thedoc,NULL, BAD_CAST"IlluMulti", NULL); 875 | xmlSetProp (thedoc->children, BAD_CAST "version", BAD_CAST "0.2"); 876 | #ifdef WORDS_BIGENDIAN 877 | xmlSetProp (thedoc->children, BAD_CAST "endian", BAD_CAST "big"); 878 | #else 879 | xmlSetProp (thedoc->children, BAD_CAST "endian", BAD_CAST "little"); 880 | #endif 881 | if ((compressed & COMPRESS_INT_MASK) == COMPRESS_INT_LONG) 882 | strcpy ((char *) buffer, "long"); 883 | else if ((compressed & COMPRESS_INT_MASK) == COMPRESS_INT_SHORT) 884 | strcpy ((char *) buffer, "short"); 885 | else if ((compressed & COMPRESS_INT_MASK) == COMPRESS_INT_CHAR) 886 | strcpy ((char *) buffer, "char"); 887 | /* Note: this doesn't really support complex types yet... */ 888 | else if (sizeof (PetscScalar) == 4) 889 | strcpy ((char *) buffer, "float"); 890 | else 891 | *buffer = '\0'; 892 | if ((compressed & COMPRESS_GZIP_MASK) == 0) 893 | strcat ((char *) buffer, "none"); 894 | else if ((compressed & COMPRESS_GZIP_MASK) >= 1 && 895 | (compressed & COMPRESS_GZIP_MASK) <= 9) 896 | sprintf ((char *) buffer + strlen ((char *) buffer), "%d", 897 | compressed & COMPRESS_GZIP_MASK); 898 | else 899 | strcat ((char *) buffer, "best"); 900 | xmlSetProp (thedoc->children, BAD_CAST "compression", buffer); 901 | 902 | /*+ 903 | +latex+{\tt GlobalCPUs} & Number of CPUs in each direction, with 904 | +latex+\\ & attributes {\tt dimensions}, {\tt xwidth}, {\tt ywidth} and 905 | +latex+{\tt zwidth} \\ \hline 906 | +html+ <tr><td><tt>GlobalCPUs</tt></td><td>Number of CPUs in each 907 | +html+ direction, with<br>attributes <tt>dimensions</tt>, <tt>xwidth</tt>, 908 | +html+ <tt>ywidth</tt> and <tt>zwidth</tt></td></tr> 909 | +*/ 910 | 911 | thenode = xmlNewChild (thedoc->children, NULL, BAD_CAST "GlobalCPUs", NULL); 912 | snprintf ((char *) buffer, 999, "%d", dim); 913 | xmlSetProp (thenode, BAD_CAST "dimensions", buffer); 914 | snprintf ((char *) buffer, 999, "%d", px); 915 | xmlSetProp (thenode, BAD_CAST "xwidth", buffer); 916 | if (dim > 1) 917 | { 918 | snprintf ((char *) buffer, 999, "%d", py); 919 | xmlSetProp (thenode, BAD_CAST "ywidth", buffer); 920 | if (dim > 2) 921 | { 922 | snprintf ((char *) buffer, 999, "%d", pz); 923 | xmlSetProp (thenode, BAD_CAST "zwidth", buffer); 924 | } 925 | } 926 | 927 | /*+ 928 | +latex+{\tt GlobalSize} & Size of the entire distributed array, with 929 | +latex+\\ & attributes {\tt xwidth}, {\tt ywidth}, 930 | +latex+{\tt zwidth}, {\tt xsize}**, {\tt ysize}**, {\tt zsize}** and 931 | +latex+{\tt fields} \\ \hline 932 | +html+ <tr><td><tt>GlobalSize</tt></td><td>Size of the entire distributed 933 | +html+ array, with<br>attributes <tt>xwidth</tt>, <tt>ywidth</tt>, 934 | +html+ <tt>zwidth</tt>, <tt>xsize</tt>**, <tt>ysize</tt>**, 935 | +html+ <tt>zsize</tt>** and <tt>fields</tt></td></tr> 936 | +*/ 937 | 938 | thenode = xmlNewChild (thedoc->children, NULL, BAD_CAST "GlobalSize", NULL); 939 | snprintf ((char *) buffer, 999, "%d", nx); 940 | xmlSetProp (thenode, BAD_CAST "xwidth", buffer); 941 | snprintf ((char *) buffer, 999, "%g", wx); 942 | xmlSetProp (thenode, BAD_CAST "xsize", buffer); 943 | if (dim > 1) 944 | { 945 | snprintf ((char *) buffer, 999, "%d", ny); 946 | xmlSetProp (thenode, BAD_CAST "ywidth", buffer); 947 | snprintf ((char *) buffer, 999, "%g", wy); 948 | xmlSetProp (thenode, BAD_CAST "ysize", buffer); 949 | if (dim > 2) 950 | { 951 | snprintf ((char *) buffer, 999, "%d", nz); 952 | xmlSetProp (thenode, BAD_CAST "zwidth", buffer); 953 | snprintf ((char *) buffer, 999, "%g", wz); 954 | xmlSetProp (thenode, BAD_CAST "zsize", buffer); 955 | } 956 | } 957 | snprintf ((char *) buffer, 999, "%d", dof); 958 | xmlSetProp (thenode, BAD_CAST "fields", buffer); 959 | 960 | /*+ 961 | +latex+{\tt LocalSize} & Size of the entire local part of the array, 962 | +latex+\\ & with attributes {\tt xwidth}, {\tt ywidth} and {\tt zwidth} 963 | +latex+\\ \hline 964 | +html+ <tr><td><tt>LocalSize</tt></td><td>Size of the local part of the 965 | +html+ array, with<br>attributes <tt>xwidth</tt>, <tt>ywidth</tt> and 966 | +html+ <tt>zwidth</tt></td></tr> 967 | +*/ 968 | 969 | thenode = xmlNewChild (thedoc->children, NULL, BAD_CAST "LocalSize", NULL); 970 | snprintf ((char *) buffer, 999, "%d", xm); 971 | xmlSetProp (thenode, BAD_CAST "xwidth", buffer); 972 | if (dim > 1) 973 | { 974 | snprintf ((char *) buffer, 999, "%d", ym); 975 | xmlSetProp (thenode, BAD_CAST "ywidth", buffer); 976 | if (dim > 2) 977 | { 978 | snprintf ((char *) buffer, 999, "%d", zm); 979 | xmlSetProp (thenode, BAD_CAST "zwidth", buffer); 980 | } 981 | } 982 | 983 | /*+ 984 | +latex+{\tt Stencil} & Stencil and periodic data, with attributes 985 | +latex+\\ & {\tt width}, {\tt type} and {\tt periodic} (using {\tt PETSc 986 | +latex+enum} values) \\ \hline 987 | +html+ <tr><td><tt>Stencil</tt></td><td>Stencil and periodic data, with 988 | +html+ attributes<br><tt>width</tt>, <tt>type</tt> and <tt>periodic</tt> 989 | +html+ (using <tt>PETSc enum</tt> values)</td></tr> 990 | +*/ 991 | 992 | thenode = xmlNewChild (thedoc->children, NULL, BAD_CAST "Stencil", NULL); 993 | snprintf ((char *) buffer, 999, "%d", sw); 994 | xmlSetProp (thenode, BAD_CAST "width", buffer); 995 | snprintf ((char *) buffer, 999, "%d", (int) st); 996 | xmlSetProp (thenode, BAD_CAST "type", buffer); 997 | snprintf ((char *) buffer, 999, "%d", (int) wrap); 998 | xmlSetProp (thenode, BAD_CAST "periodic", buffer); 999 | 1000 | /*+ 1001 | +latex+{\tt Field} & Data on each field, attributes {\tt name}, 1002 | +latex+{\tt type}**, {\tt min} and {\tt max} \\ \hline 1003 | +html+ <tr><td><tt>Field</tt></td><td>Data on each field, attributes 1004 | +html+ <tt>name</tt>, <tt>type</tt>*, <tt>min</tt> and 1005 | +html+ <tt>max</tt></td></tr> 1006 | +*/ 1007 | 1008 | for (i=0; i<dof; i++) 1009 | { 1010 | thenode = xmlNewChild (thedoc->children, NULL, BAD_CAST "Field", NULL); 1011 | xmlSetProp (thenode, BAD_CAST "name", BAD_CAST fieldnames [i]); 1012 | snprintf ((char *) buffer, 999, "0x%.2x", fieldtypes [i]); 1013 | xmlSetProp (thenode, BAD_CAST "type", buffer); 1014 | snprintf ((char *) buffer, 999, "%g", fieldmin [i]); 1015 | xmlSetProp (thenode, BAD_CAST "min", buffer); 1016 | snprintf ((char *) buffer, 999, "%g", fieldmax [i]); 1017 | xmlSetProp (thenode, BAD_CAST "max", buffer); 1018 | } 1019 | 1020 | /*+ 1021 | +latex+{\tt User} & User parameters, attributes {\tt name} and {\tt value} 1022 | +latex+\\ \hline \end{tabular} \end{center} 1023 | +html+ <tr><td><tt>User</tt></td><td>User parameters, attributes 1024 | +html+ <tt>name</tt> and <tt>value</tt></td></tr></table></center> 1025 | *Lossy compression to smaller data types. 1026 | +latex+\par 1027 | +html+ <br> 1028 | **Represents new attribute for IlluMulti 0.2 file format. 1029 | +*/ 1030 | 1031 | for (i=0; i<usermetacount; i++) 1032 | { 1033 | thenode = xmlNewChild (thedoc->children, NULL, BAD_CAST "User", NULL); 1034 | xmlSetProp (thenode, BAD_CAST "name", BAD_CAST usermetanames [i]); 1035 | xmlSetProp (thenode, BAD_CAST "value", BAD_CAST usermetadata [i]); 1036 | } 1037 | 1038 | if (snprintf ((char *)filename, 999, "%s.cpu%.4d.meta", basename, rank) >999) 1039 | return 1; 1040 | if (!(outfile = fopen (filename, "w"))) 1041 | { 1042 | char openerror [1050]; 1043 | snprintf (openerror,1049, "CPU %d: error %d (%s) opening output file %s", 1044 | rank, errno, strerror (errno), filename); 1045 | SETERRQ (PETSC_ERR_FILE_OPEN, openerror); 1046 | } 1047 | xmlDocDump (outfile, thedoc); 1048 | xmlFreeDoc (thedoc); 1049 | fclose(outfile); 1050 | 1051 | return 0; 1052 | } 1053 | 1054 | 1055 | #undef __FUNCT__ 1056 | #define __FUNCT__ "IlluMultiStoreData" 1057 | 1058 | /*++++++++++++++++++++++++++++++++++++++ 1059 | This function stores the data file. 1060 | 1061 | int IlluMultiStoreData It returns zero or an error code. 1062 | 1063 | MPI_Comm comm MPI communicator, if NULL it uses PETSC_COMM_WORLD. 1064 | 1065 | PetscScalar *globalarray Array from which to save the (local) data. 1066 | 1067 | char *basename Base file name. 1068 | 1069 | int rank CPU number to read data for. 1070 | 1071 | int compressed Data compression: if zero then no compression (fastest), 1-9 1072 | then gzip compression level, 10-15 then gzip --best. If 16-31 then save 1073 | guint32s representing relative values between min and max for each field, 1074 | compressed according to this value minus 16. Likewise for 32-47 and 1075 | guint16s, and 48-63 for guint8s. Yes, these alternative formats lose 1076 | information and can't be used for accurate checkpointing, but they should 1077 | retain enough data for visualization (except perhaps for the guint8s, which 1078 | are possibly acceptable for vectors but likely not contours). 1079 | 1080 | int gridpoints Number of gridpoints to store data for. 1081 | 1082 | int dof Degrees of freedom at each node, a.k.a. number of field variables. 1083 | 1084 | PetscScalar *fieldmin Minimum value of each field variable. 1085 | 1086 | PetscScalar *fieldmax Maximum value of each field variable. 1087 | ++++++++++++++++++++++++++++++++++++++*/ 1088 | 1089 | static int IlluMultiStoreData 1090 | (MPI_Comm comm, PetscScalar *globalarray, char *basename, int rank, 1091 | int compressed, int gridpoints, int dof, PetscScalar *fieldmin, 1092 | PetscScalar *fieldmax) 1093 | { 1094 | int i; 1095 | char filename [1000]; 1096 | FILE *outfile; 1097 | size_t writeout; 1098 | 1099 | if (compressed & COMPRESS_GZIP_MASK) 1100 | { 1101 | if ((compressed & COMPRESS_GZIP_MASK) >= 1 && 1102 | (compressed & COMPRESS_GZIP_MASK) <= 9) 1103 | { 1104 | if (snprintf (filename, 999, "gzip -c -%d > %s.cpu%.4d.data", 1105 | compressed & COMPRESS_GZIP_MASK, basename, rank) > 999) 1106 | return 1; 1107 | } 1108 | else 1109 | { 1110 | if (snprintf (filename,999, "gzip -c --best > %s.cpu%.4d.data", 1111 | basename,rank) > 999) 1112 | return 1; 1113 | } 1114 | if (!(outfile = popen (filename, "w"))) 1115 | { 1116 | char openerror [1050]; 1117 | snprintf (openerror, 1049, 1118 | "CPU %d: error %d (%s) opening output pipe \"%s\"", 1119 | rank, errno, strerror (errno), filename); 1120 | SETERRQ (PETSC_ERR_FILE_OPEN, openerror); 1121 | } 1122 | } 1123 | else 1124 | { 1125 | if (snprintf (filename, 999, "%s.cpu%.4d.data", basename, rank) > 999) 1126 | return 1; 1127 | if (!(outfile = fopen (filename, "w"))) 1128 | { 1129 | char openerror [1050]; 1130 | snprintf (openerror, 1049, 1131 | "CPU %d: error %d (%s) opening output file %s", 1132 | rank, errno, strerror (errno), filename); 1133 | SETERRQ (PETSC_ERR_FILE_OPEN, openerror); 1134 | } 1135 | } 1136 | 1137 | switch (compressed & COMPRESS_INT_MASK) 1138 | { 1139 | case COMPRESS_INT_LONG: 1140 | { 1141 | guint32 *storage; 1142 | if (!(storage = (guint32 *) malloc 1143 | (gridpoints * dof * sizeof (guint32)))) 1144 | return 1; 1145 | for (i=0; i<gridpoints*dof; i++) 1146 | storage [i] = (guint32) 1147 | (4294967295. * (globalarray [i] - fieldmin [i%dof]) / 1148 | (fieldmax [i%dof] - fieldmin [i%dof])); 1149 | writeout = fwrite (storage, sizeof (guint32), gridpoints*dof, outfile); 1150 | free (storage); 1151 | break; 1152 | } 1153 | case COMPRESS_INT_SHORT: 1154 | { 1155 | guint16 *storage; 1156 | if (!(storage = (guint16 *) malloc 1157 | (gridpoints * dof * sizeof (guint16)))) 1158 | return 1; 1159 | for (i=0; i<gridpoints*dof; i++) 1160 | storage [i] = (guint16) 1161 | (65535. * (globalarray [i] - fieldmin [i%dof]) / 1162 | (fieldmax [i%dof] - fieldmin [i%dof])); 1163 | writeout = fwrite (storage, sizeof (guint16), gridpoints*dof,outfile); 1164 | free (storage); 1165 | break; 1166 | } 1167 | case COMPRESS_INT_CHAR: 1168 | { 1169 | guint8 *storage; 1170 | if (!(storage = (guint8 *) malloc 1171 | (gridpoints * dof * sizeof (guint8)))) 1172 | return 1; 1173 | for (i=0; i<gridpoints*dof; i++) 1174 | storage [i] = (guint8) 1175 | (255. * (globalarray [i] - fieldmin [i%dof]) / 1176 | (fieldmax [i%dof] - fieldmin [i%dof])); 1177 | writeout = fwrite (storage, sizeof (guint8), gridpoints*dof, outfile); 1178 | free (storage); 1179 | break; 1180 | } 1181 | default: 1182 | writeout = fwrite (globalarray, sizeof (PetscScalar), gridpoints*dof, 1183 | outfile); 1184 | } 1185 | 1186 | if (compressed & COMPRESS_GZIP_MASK) 1187 | pclose (outfile); 1188 | else 1189 | fclose (outfile); 1190 | 1191 | if (writeout < gridpoints*dof) 1192 | { 1193 | char writerror [1050]; 1194 | int writerr = ferror (outfile); 1195 | snprintf (writerror,1049, "CPU %d: error %d (%s) during write, sent %d of %d items", 1196 | rank, writerr, strerror (writerr), writeout, gridpoints*dof); 1197 | SETERRQ (PETSC_ERR_FILE_READ, writerror); 1198 | } 1199 | 1200 | return 0; 1201 | } 1202 | 1203 | 1204 | #undef __FUNCT__ 1205 | #define __FUNCT__ "checkagree" 1206 | 1207 | /*++++++++++++++++++++++++++++++++++++++ 1208 | Ancillary routine for 1209 | +latex+{\tt IlluMultiRead()}: 1210 | +html+ <tt>IlluMultiRead()</tt>: 1211 | checks agreement of parameters and reports disagreement if necessary. 1212 | 1213 | int checkagree Returns 0 if they agree, 1 otherwise. 1214 | 1215 | MPI_Comm comm MPI communicator, if NULL it uses PETSC_COMM_WORLD. 1216 | 1217 | int da Integer parameter from the existing DA. 1218 | 1219 | int file Integer parameter read from the file. 1220 | 1221 | char *parameter Parameter name for reporting. 1222 | ++++++++++++++++++++++++++++++++++++++*/ 1223 | 1224 | static inline int checkagree (MPI_Comm comm, int da, int file, char *parameter) 1225 | { 1226 | int ierr; 1227 | 1228 | if (da == file) 1229 | return 0; 1230 | 1231 | ierr = PetscPrintf (comm, "Disagreement in %s: da has %d, file %d\n", 1232 | parameter, da, file); CHKERRQ (ierr); 1233 | return 1; 1234 | } 1235 | 1236 | 1237 | #undef __FUNCT__ 1238 | #define __FUNCT__ "IlluMultiRead" 1239 | 1240 | /*++++++++++++++++++++++++++++++++++++++ 1241 | This reads the data into an existing distributed array and vector, checking 1242 | that the sizes are right etc. 1243 | 1244 | int IlluMultiRead It returns zero or an error code. 1245 | 1246 | MPI_Comm comm MPI communicator, if NULL it uses PETSC_COMM_WORLD. 1247 | 1248 | DA theda Distributed array object controlling the data to read. 1249 | 1250 | Vec X Vector into which to read the data. 1251 | 1252 | char *basename Base file name. 1253 | 1254 | int *usermetacount Pointer to an int where we put the number of user metadata 1255 | parameters loaded. 1256 | 1257 | char ***usermetanames Pointer to a char ** where the loaded parameter names 1258 | are stored. This is 1259 | +latex+{\tt malloc}ed by this function, so a call to {\tt free()} 1260 | +html+ <tt>malloc</tt>ed by this function, so a call to <tt>free()</tt> 1261 | is needed to free up its data. 1262 | 1263 | char ***usermetadata Pointer to a char ** where the loaded parameter strings 1264 | are stored. This is 1265 | +latex+{\tt malloc}ed by this function, so a call to {\tt free()} 1266 | +html+ <tt>malloc</tt>ed by this function, so a call to <tt>free()</tt> 1267 | is needed to free up its data. 1268 | ++++++++++++++++++++++++++++++++++++++*/ 1269 | 1270 | int IlluMultiRead 1271 | (MPI_Comm comm, DA theda, Vec X, char *basename, 1272 | int *usermetacount, char ***usermetanames, char ***usermetadata) 1273 | { 1274 | int dim, px,py,pz, nx,ny,nz, xm,ym,zm, dof, sw, size, rank, ierr; 1275 | DAPeriodicType wrap, fwrap; 1276 | DAStencilType st, fst; 1277 | int fdim, fpx,fpy,fpz, fnx,fny,fnz, fxm,fym,fzm, fdof, fsw, fsize = 1; 1278 | int compressed, wrongendian; 1279 | char filename[1000], **fieldnames; 1280 | FILE *infile; 1281 | PetscScalar *globalarray, *fieldmin, *fieldmax; 1282 | 1283 | if (!comm) 1284 | comm = PETSC_COMM_WORLD; 1285 | 1286 | /*+ First it gets the properties of the distributed array for comparison with 1287 | the metadata. +*/ 1288 | ierr = DAGetInfo (theda, &dim, &nx,&ny,&nz, &px,&py,&pz, &dof, &sw, &wrap, 1289 | &st); CHKERRQ (ierr); 1290 | ierr = DAGetCorners (theda, PETSC_NULL,PETSC_NULL,PETSC_NULL, &xm,&ym,&zm); 1291 | CHKERRQ (ierr); 1292 | 1293 | /*+ Next it parses the XML metadata file into the document tree, and reads 1294 | its content into the appropriate structures, comparing parameters with 1295 | those of the existing distributed array structure. +*/ 1296 | ierr = MPI_Comm_size (comm, &size); CHKERRQ (ierr); 1297 | ierr = MPI_Comm_rank (comm, &rank); CHKERRQ (ierr); 1298 | 1299 | DPRINTF ("Parsing XML metadata file from basename %s\n", basename); 1300 | if (ierr = IlluMultiParseXML 1301 | (comm, basename, rank, &compressed, &wrongendian, &fdim, &fpx,&fpy,&fpz, 1302 | &fnx,&fny,&fnz, PETSC_NULL,PETSC_NULL,PETSC_NULL, &fxm,&fym,&fzm, &fdof, 1303 | &fsw, &fst, &fwrap, &fieldnames, PETSC_NULL, &fieldmin, &fieldmax, 1304 | usermetacount, usermetanames, usermetadata)) 1305 | return ierr; 1306 | 1307 | /* The size>1 checks are because we support loading multiple CPUs' data into 1308 | a 1-CPU distributed array, as long as the global dimensions are right. */ 1309 | DPRINTF ("Checking size agreement between DA and data to read\n",0); 1310 | fsize = fpx * ((dim > 1) ? fpy : 1) * ((dim > 2) ? fpz : 1); 1311 | if (ierr = checkagree (comm, dim, fdim, "dimensions")) return ierr; 1312 | if (size > 1 || fsize == 1) { 1313 | if (ierr = checkagree (comm, px, fpx, "cpu xwidth")) return ierr; 1314 | if (dim > 1) { 1315 | if (ierr = checkagree (comm, py, fpy, "cpu ywidth")) return ierr; 1316 | if (dim > 2) { 1317 | if (ierr = checkagree (comm, pz, fpz, "cpu zwidth")) return ierr; }}} 1318 | if (ierr = checkagree (comm, nx, fnx, "global xwidth")) return ierr; 1319 | if (dim > 1) { 1320 | if (ierr = checkagree (comm, ny, fny, "global ywidth")) return ierr; 1321 | if (dim > 2) { 1322 | if (ierr = checkagree (comm, nz, fnz, "global zwidth")) return ierr; }} 1323 | if (size > 1 || fsize == 1) { 1324 | if (ierr = checkagree (comm, xm, fxm, "local xwidth")) return ierr; 1325 | if (dim > 1) { 1326 | if (ierr = checkagree (comm, ym, fym, "local ywidth")) return ierr; 1327 | if (dim > 2) { 1328 | if (ierr = checkagree (comm, zm, fzm, "local zwidth")) return ierr; }}} 1329 | if (ierr = checkagree (comm, dof, fdof, "degrees of freedom")) return ierr; 1330 | if (ierr = checkagree (comm, sw, fsw, "stencil width")) return ierr; 1331 | if (ierr = checkagree (comm, (int)st, (int)fst, "stencil type")) return ierr; 1332 | if (ierr = checkagree (comm, (int)wrap, (int)fwrap, "periodic type")) 1333 | return ierr; 1334 | 1335 | /*+ Then it streams in the data in one big slurp. +*/ 1336 | ierr = VecGetArray (X, &globalarray); CHKERRQ (ierr); 1337 | if (size > 1 || fsize == 1) /* Default condition */ 1338 | { 1339 | DPRINTF ("Reading data\n",0); 1340 | ierr = IlluMultiParseData 1341 | (comm, globalarray, basename, rank, compressed, 1342 | xm * ((dim>1)?ym:1) * ((dim>2)?zm:1), dof, wrongendian, fieldmin, 1343 | fieldmax); 1344 | if (ierr) 1345 | { 1346 | xm = VecRestoreArray (X, &globalarray); CHKERRQ (xm); 1347 | return ierr; 1348 | } 1349 | } 1350 | else /* Getting distributed data into a single array */ 1351 | { 1352 | int i,j,k, xs,ys,zs; 1353 | PetscScalar *storage; 1354 | 1355 | /* Incrementally read in data to storage, store it in globalarray */ 1356 | /* First loop through the stored CPUs */ 1357 | for (pz=0, zs=0; pz<((dim>2)?fpz:1); pz++, zs+=fzm) 1358 | for (py=0, ys=0; py<((dim>1)?fpy:1); py++, ys+=fym) 1359 | for (px=0, xs=0; px<fpx; px++, xs+=fxm, rank++) 1360 | { 1361 | int gridpoints; 1362 | 1363 | DPRINTF ("Parsing XML metadata file for CPU %d\n", rank); 1364 | if (ierr = IlluMultiParseXML 1365 | (comm, basename, rank, &compressed, &wrongendian, &fdim, 1366 | &fpx,&fpy,&fpz, &fnx,&fny,&fnz, 1367 | PETSC_NULL,PETSC_NULL,PETSC_NULL, 1368 | &fxm,&fym,&fzm, &fdof, &fsw, &fst, &fwrap, 1369 | &fieldnames, PETSC_NULL, &fieldmin, &fieldmax, 1370 | usermetacount, usermetanames, usermetadata)) 1371 | return ierr; 1372 | gridpoints = fxm * ((dim>1)?fym:1) * ((dim>2)?fzm:1); 1373 | 1374 | /* This allocate/read/copy/free storage business is annoying. 1375 | Replacing it with "zero-copy" would involve changing the 1376 | IlluMultiParseData() API to allow loading into part of the 1377 | local array. But I'll leave that for future expansion, when 1378 | this will be able to do arbitrary m->n CPU loading as long as 1379 | the global sizes match. */ 1380 | storage = (PetscScalar *) malloc 1381 | (gridpoints * dof * sizeof (PetscScalar)); 1382 | DPRINTF ("Reading data for CPU %d\n", rank); 1383 | ierr = IlluMultiParseData 1384 | (comm, storage, basename, rank, compressed, gridpoints, dof, 1385 | wrongendian, fieldmin, fieldmax); 1386 | 1387 | fxm *= dof; /* so fxm is the number of PetscScalars to copy */ 1388 | i=1; 1389 | /* Now distribute */ 1390 | for (k=0; k<((dim>2)?fzm:1); k++) 1391 | for (j=0; j<((dim>1)?fym:1); j++) 1392 | BLAScopy_ (&fxm, storage + (k*fym + j)*fxm /* *dof */, &i, 1393 | globalarray + (((zs+k)*ny + ys+j)*nx + xs)*dof, &i); 1394 | 1395 | free (storage); 1396 | fxm /= dof; 1397 | } 1398 | } 1399 | 1400 | ierr = VecRestoreArray (X, &globalarray); CHKERRQ (ierr); 1401 | 1402 | return 0; 1403 | } 1404 | 1405 | 1406 | #undef __FUNCT__ 1407 | #define __FUNCT__ "IlluMultiLoad" 1408 | 1409 | /*++++++++++++++++++++++++++++++++++++++ 1410 | This creates a new distributed array of the appropriate size and loads the 1411 | data into the vector contained in it (as retrieved by 1412 | +latex+{\tt DAGetVector()}). 1413 | +html+ <tt>DAGetVector()</tt>). 1414 | It also reads the user metadata parameters into arrays stored at the supplied 1415 | pointers. 1416 | 1417 | int IlluMultiLoad It returns zero or an error code. 1418 | 1419 | MPI_Comm comm MPI communicator, if NULL it uses PETSC_COMM_WORLD. 1420 | 1421 | char *basename Base file name. 1422 | 1423 | DA *theda Pointer to a DA object (to be created by this function). 1424 | 1425 | PetscScalar *wx Physical overall width in the 1426 | +latex+$x$-direction. 1427 | +html+ <i>x</i>-direction. 1428 | 1429 | PetscScalar *wy Physical overall width in the 1430 | +latex+$y$-direction. 1431 | +html+ <i>y</i>-direction. 1432 | 1433 | PetscScalar *wz Physical overall width in the 1434 | +latex+$z$-direction. 1435 | +html+ <i>z</i>-direction. 1436 | 1437 | field_plot_type **fieldtypes Data (plot) types for field variables. 1438 | 1439 | int *usermetacount Pointer to an int where we put the number of user metadata 1440 | parameters loaded. 1441 | 1442 | char ***usermetanames Pointer to a char ** where the loaded parameter names 1443 | are stored. This is 1444 | +latex+{\tt malloc}ed by this function, so a call to {\tt free()} 1445 | +html+ <tt>malloc</tt>ed by this function, so a call to <tt>free()</tt> 1446 | is needed to free up its data. 1447 | 1448 | char ***usermetadata Pointer to a char ** where the loaded parameter strings 1449 | are stored. This is 1450 | +latex+{\tt malloc}ed by this function, so a call to {\tt free()} 1451 | +html+ <tt>malloc</tt>ed by this function, so a call to <tt>free()</tt> 1452 | is needed to free up its data. 1453 | ++++++++++++++++++++++++++++++++++++++*/ 1454 | 1455 | int IlluMultiLoad 1456 | (MPI_Comm comm, char *basename, DA *theda, PetscScalar *wx,PetscScalar *wy, 1457 | PetscScalar *wz, field_plot_type **fieldtypes, int *usermetacount, 1458 | char ***usermetanames, char ***usermetadata) 1459 | { 1460 | int dim, px,py,pz, nx,ny,nz, dof, sw, fxm,fym,fzm, rank,size, ierr; 1461 | int compressed, wrongendian, fpx,fpy,fpz, fsize, xm,ym,zm; 1462 | DAPeriodicType wrap; 1463 | DAStencilType st; 1464 | char filename[1000], **fieldnames; 1465 | xmlChar *buffer; 1466 | FILE *infile; 1467 | xmlDocPtr thedoc; 1468 | xmlNodePtr thenode; 1469 | PetscScalar *globalarray, *fieldmin, *fieldmax; 1470 | Vec X; 1471 | 1472 | if (!comm) 1473 | comm = PETSC_COMM_WORLD; 1474 | 1475 | ierr = MPI_Comm_rank (comm, &rank); CHKERRQ (ierr); 1476 | ierr = MPI_Comm_size (comm, &size); CHKERRQ (ierr); 1477 | 1478 | /*+ First it gets the parameters from the XML file. +*/ 1479 | DPRINTF ("Parsing XML metadata file from basename %s\n", basename); 1480 | if (ierr = IlluMultiParseXML 1481 | (comm, basename, rank, &compressed, &wrongendian, &dim, &fpx,&fpy,&fpz, 1482 | &nx,&ny,&nz, wx,wy,wz, &fxm,&fym,&fzm, &dof, &sw, &st, &wrap, 1483 | &fieldnames, fieldtypes, &fieldmin, &fieldmax, 1484 | usermetacount, usermetanames, usermetadata)) 1485 | return ierr; 1486 | 1487 | fsize = fpx * ((dim > 1) ? fpy : 1) * ((dim > 2) ? fpz : 1); 1488 | if (size == fsize) { /* Default condition: n->n */ 1489 | px = fpx; py = fpy; pz = fpz; } 1490 | else if (size == 1) /* Special condition: n->1 */ 1491 | px = py = pz = 1; 1492 | else /* Error: incorrect number of CPUs */ 1493 | { 1494 | char errstring[100]; 1495 | snprintf (errstring, 100, 1496 | "Incorrect CPU count: current %d, stored %dx%dx%d=%d\n", 1497 | size, fpx,fpy,fpz, fsize); 1498 | SETERRQ (PETSC_ERR_ARG_SIZ, errstring); 1499 | } 1500 | 1501 | /*+ Next it creates a distributed array based on those parameters, and sets 1502 | the names of its fields. +*/ 1503 | switch (dim) 1504 | { 1505 | case 1: 1506 | { 1507 | DPRINTF ("Creating %d-%d 1-D DA\n", nx, dof); 1508 | ierr = DACreate1d (comm, wrap, nx, dof, sw, PETSC_NULL, 1509 | theda); CHKERRQ (ierr); 1510 | break; 1511 | } 1512 | case 2: 1513 | { 1514 | DPRINTF ("Creating %dx%d-%d 2-D DA\n", nx,ny, dof); 1515 | ierr = DACreate2d (comm, wrap, st, nx,ny, px,py, dof, sw, 1516 | PETSC_NULL, PETSC_NULL, theda); CHKERRQ (ierr); 1517 | break; 1518 | } 1519 | case 3: 1520 | { 1521 | DPRINTF ("Creating %dx%dx%d-%d 3-D DA\n", nx,ny,nz, dof); 1522 | ierr = DACreate3d (comm, wrap, st, nx,ny,nz, px,py,pz, dof, 1523 | sw, PETSC_NULL, PETSC_NULL, PETSC_NULL, theda); 1524 | CHKERRQ (ierr); 1525 | break; 1526 | } 1527 | default: 1528 | return 1; 1529 | } 1530 | 1531 | ierr = DAGetCorners (*theda, PETSC_NULL, PETSC_NULL, PETSC_NULL, 1532 | &xm,&ym,&zm); CHKERRQ (ierr); 1533 | if(size==fsize && (xm != fxm || ym != fym || zm != fzm)) 1534 | { 1535 | char errstring[100]; 1536 | snprintf (errstring, 100, 1537 | "Local array size mismatch: DA %dx%dx%d, stored %dx%dx%d\n", 1538 | xm,ym,zm, fxm,fym,fzm); 1539 | SETERRQ (PETSC_ERR_ARG_SIZ, errstring); 1540 | } 1541 | 1542 | for (px=0; px<dof; px++) 1543 | DASetFieldName (*theda, px, fieldnames [px]); 1544 | 1545 | /*+ Then it streams the data into the distributed array's vector in one big 1546 | slurp. +*/ 1547 | DPRINTF ("Getting global vector and array\n",0); 1548 | ierr = DAGetGlobalVector (*theda, &X); CHKERRQ (ierr); 1549 | ierr = VecGetArray (X, &globalarray); CHKERRQ (ierr); 1550 | if (size > 1 || fsize == 1) /* Default condition */ 1551 | { 1552 | DPRINTF ("Loading data in parallel\n",0); 1553 | ierr = IlluMultiParseData 1554 | (comm, globalarray, basename, rank, compressed, 1555 | fxm * ((dim>1)?fym:1) * ((dim>2)?fzm:1), dof, wrongendian, fieldmin, 1556 | fieldmax); 1557 | if (ierr) 1558 | { 1559 | fxm = VecRestoreArray (X, &globalarray); CHKERRQ (fxm); 1560 | return ierr; 1561 | } 1562 | } 1563 | else /* Getting distributed data into a single array */ 1564 | { 1565 | int i,j,k, xs,ys,zs; 1566 | PetscScalar *storage; 1567 | 1568 | /* Incrementally read in data to storage, store it in globalarray */ 1569 | /* First loop through the stored CPUs */ 1570 | DPRINTF ("Loading data into a single array on 1 CPU\n",0); 1571 | for (pz=0, zs=0; pz<((dim>2)?fpz:1); pz++, zs+=fzm) 1572 | for (py=0, ys=0; py<((dim>1)?fpy:1); py++, ys+=fym) 1573 | for (px=0, xs=0; px<fpx; px++, xs+=fxm, rank++) 1574 | { 1575 | int gridpoints, fdim, fnx,fny,fnz, fdof, fsw; 1576 | DAPeriodicType fwrap; 1577 | DAStencilType fst; 1578 | 1579 | if (ierr = IlluMultiParseXML 1580 | (comm, basename, rank, &compressed, &wrongendian, &fdim, 1581 | &fpx,&fpy,&fpz, &fnx,&fny,&fnz, 1582 | PETSC_NULL,PETSC_NULL,PETSC_NULL, 1583 | &fxm,&fym,&fzm, &fdof, &fsw, &fst, &fwrap, 1584 | &fieldnames, PETSC_NULL, &fieldmin, &fieldmax, 1585 | usermetacount, usermetanames, usermetadata)) 1586 | return 1; 1587 | gridpoints = fxm * ((dim>1)?fym:1) * ((dim>2)?fzm:1); 1588 | 1589 | /* This allocate/read/copy/free storage business is annoying. 1590 | Replacing it with "zero-copy" would involve changing the 1591 | IlluMultiParseData() API to allow loading into part of the 1592 | local array. But I'll leave that for future expansion, when 1593 | this will be able to do arbitrary m->n CPU loading as long as 1594 | the global sizes match. */ 1595 | storage = (PetscScalar *) malloc 1596 | (gridpoints * dof * sizeof (PetscScalar)); 1597 | ierr = IlluMultiParseData 1598 | (comm, storage, basename, rank, compressed, gridpoints, dof, 1599 | wrongendian, fieldmin, fieldmax); 1600 | 1601 | fxm *= dof; /* so fxm is the number of PetscScalars to copy */ 1602 | i=1; 1603 | /* Now distribute */ 1604 | for (k=0; k<((dim>2)?fzm:1); k++) 1605 | for (j=0; j<((dim>1)?fym:1); j++) 1606 | BLAScopy_ (&fxm, storage + (k*fym + j)*fxm /* *dof */, &i, 1607 | globalarray + (((zs+k)*ny + ys+j)*nx + xs)*dof, &i); 1608 | 1609 | free (storage); 1610 | fxm /= dof; 1611 | } 1612 | } 1613 | 1614 | DPRINTF ("Done.\n",0); 1615 | ierr = VecRestoreArray (X, &globalarray); CHKERRQ (ierr); 1616 | ierr = DARestoreGlobalVector (*theda, &X); CHKERRQ (ierr); 1617 | 1618 | return 0; 1619 | } 1620 | 1621 | 1622 | #undef __FUNCT__ 1623 | #define __FUNCT__ "IlluMultiSave" 1624 | 1625 | /*++++++++++++++++++++++++++++++++++++++ 1626 | This saves the vector X in multiple files, two per process. 1627 | 1628 | int IlluMultiSave it returns zero or an error code. 1629 | 1630 | MPI_Comm comm MPI communicator, if NULL it uses PETSC_COMM_WORLD. 1631 | 1632 | DA theda Distributed array object controlling data saved. 1633 | 1634 | Vec X Vector whose data are actually being saved. 1635 | 1636 | char *basename Base file name. 1637 | 1638 | int usermetacount Number of user metadata parameters. 1639 | 1640 | char **usermetanames User metadata parameter names. 1641 | 1642 | char **usermetadata User metadata parameter strings. 1643 | 1644 | int compressed Data compression: if zero then no compression (fastest), 1-9 1645 | then gzip compression level, 10-15 then gzip --best. If 16-31 then save 1646 | guint32s representing relative values between min and max for each field, 1647 | compressed according to this value minus 16. Likewise for 32-47 and 1648 | guint16s, and 48-63 for guint8s. Yes, these alternative formats lose 1649 | information and can't be used for accurate checkpointing, but they should 1650 | retain enough data for visualization (except perhaps for the guint8s, which 1651 | are possibly acceptable for vectors but certainly not contours). 1652 | ++++++++++++++++++++++++++++++++++++++*/ 1653 | 1654 | int IlluMultiSave 1655 | (MPI_Comm comm, DA theda, Vec X, char *basename, PetscScalar wx,PetscScalar wy, 1656 | PetscScalar wz, field_plot_type *fieldtypes, int usermetacount, 1657 | char **usermetanames, char **usermetadata, int compressed) 1658 | { 1659 | int dim, nx,ny,nz, px,py,pz, dof, sw, xs,ys,zs, xm,ym,zm, rank, i, ierr; 1660 | DAPeriodicType wrap; 1661 | DAStencilType st; 1662 | FILE *outfile; 1663 | char filename[1000], **fieldnames; 1664 | PetscReal *fieldmin, *fieldmax; 1665 | PetscScalar *globalarray; 1666 | guint64 *array64; 1667 | guint32 *array32; 1668 | guint16 *array16; 1669 | guint8 *array8; 1670 | 1671 | if (!comm) 1672 | comm = PETSC_COMM_WORLD; 1673 | 1674 | /*+First a check to verify a supported value of 1675 | +latex+{\tt compressed}, 1676 | +html+ <tt>compressed</tt>, 1677 | but no fancy guint* compression for complex! 1678 | +*/ 1679 | #ifdef PETSC_USE_COMPLEX 1680 | if (compressed < 0 || compressed > COMPRESS_GZIP_MASK) 1681 | return 1; 1682 | #else 1683 | if (compressed < 0 || compressed > (COMPRESS_GZIP_MASK | COMPRESS_INT_MASK)) 1684 | return 1; 1685 | #endif 1686 | 1687 | /*+Then get the distributed array parameters and processor number, and store 1688 | all this data in the XML .meta file. +*/ 1689 | ierr = DAGetInfo (theda, &dim, &nx,&ny,&nz, &px,&py,&pz, &dof, &sw, &wrap, 1690 | &st); CHKERRQ (ierr); 1691 | ierr = DAGetCorners (theda, &xs,&ys,&zs, &xm,&ym,&zm); CHKERRQ (ierr); 1692 | ierr = MPI_Comm_rank (comm, &rank); CHKERRQ (ierr); 1693 | 1694 | if (!(fieldnames = (char **) malloc (dof * sizeof (char *)))) 1695 | return 1; 1696 | if (!(fieldmin = (PetscScalar *) malloc (2 * dof * sizeof (PetscScalar)))) 1697 | return 1; 1698 | fieldmax = fieldmin + dof; 1699 | for (i=0; i<dof; i++) 1700 | { 1701 | ierr = DAGetFieldName (theda, i, fieldnames + i); CHKERRQ (ierr); 1702 | ierr = VecStrideMin (X, i, PETSC_NULL, fieldmin + i); CHKERRQ (ierr); 1703 | ierr = VecStrideMax (X, i, PETSC_NULL, fieldmax + i); CHKERRQ (ierr); 1704 | } 1705 | 1706 | if (ierr = IlluMultiStoreXML 1707 | (comm, basename, rank, compressed, dim, px,py,pz, nx,ny,nz, wx,wy,wz, 1708 | xm,ym,zm, dof, sw, (int) st, (int) wrap, fieldnames,fieldtypes, 1709 | fieldmin,fieldmax, usermetacount, usermetanames, usermetadata)) 1710 | return ierr; 1711 | 1712 | /*+ Finally, the data just stream out to the data file or gzip pipe in one 1713 | big lump. +*/ 1714 | ierr = VecGetArray (X, &globalarray); CHKERRQ (ierr); 1715 | IlluMultiStoreData (comm, globalarray, basename, rank, compressed, 1716 | xm * ((dim>1)?ym:1) * ((dim>2)?zm:1), dof, 1717 | fieldmin, fieldmax); 1718 | ierr = VecRestoreArray (X, &globalarray); CHKERRQ (ierr); 1719 | 1720 | free (fieldmin); 1721 | free (fieldnames); 1722 | 1723 | return 0; 1724 | }