c2numpy.h
Go to the documentation of this file.
1 // Copyright 2016 Jim Pivarski
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 #ifndef C2NUMPY
16 #define C2NUMPY
17 
18 #include <inttypes.h>
19 #include <stdarg.h>
20 #include <string.h>
21 
22 #include <sstream>
23 #include <string>
24 #include <vector>
25 
26 const char* C2NUMPY_VERSION = "1.2";
27 
28 // http://docs.scipy.org/doc/numpy/user/basics.types.html
29 typedef enum {
30  C2NUMPY_BOOL, // Boolean (True or False) stored as a byte
31  C2NUMPY_INT, // Default integer type (same as C long; normally either int64 or int32)
32  C2NUMPY_INTC, // Identical to C int (normally int32 or int64)
33  C2NUMPY_INTP, // Integer used for indexing (same as C ssize_t; normally either int32 or int64)
34  C2NUMPY_INT8, // Byte (-128 to 127)
35  C2NUMPY_INT16, // Integer (-32768 to 32767)
36  C2NUMPY_INT32, // Integer (-2147483648 to 2147483647)
37  C2NUMPY_INT64, // Integer (-9223372036854775808 to 9223372036854775807)
38  C2NUMPY_UINT8, // Unsigned integer (0 to 255)
39  C2NUMPY_UINT16, // Unsigned integer (0 to 65535)
40  C2NUMPY_UINT32, // Unsigned integer (0 to 4294967295)
41  C2NUMPY_UINT64, // Unsigned integer (0 to 18446744073709551615)
42  C2NUMPY_FLOAT, // Shorthand for float64.
43  C2NUMPY_FLOAT16, // Half precision float: sign bit, 5 bits exponent, 10 bits mantissa
44  C2NUMPY_FLOAT32, // Single precision float: sign bit, 8 bits exponent, 23 bits mantissa
45  C2NUMPY_FLOAT64, // Double precision float: sign bit, 11 bits exponent, 52 bits mantissa
46  C2NUMPY_COMPLEX, // Shorthand for complex128.
47  C2NUMPY_COMPLEX64, // Complex number, represented by two 32-bit floats (real and imaginary components)
48  C2NUMPY_COMPLEX128, // Complex number, represented by two 64-bit floats (real and imaginary components)
49 
50  C2NUMPY_STRING = 100, // strings are C2NUMPY_STRING + their fixed size (up to 155)
51  C2NUMPY_END = 255 // ensure that c2numpy_type is at least a byte
52 } c2numpy_type;
53 
54 // a Numpy writer object
55 typedef struct {
56  FILE *file; // output file handle
57  std::string outputFilePrefix; // output file name, not including the rotating number and .npy
58  int64_t sizeSeekPosition; // (internal) keep track of number of rows to modify before closing
59  int64_t sizeSeekSize; // (internal)
60 
61  int32_t numColumns; // number of columns in the record array
62  std::vector<std::string> columnNames; // column names
63  std::vector<c2numpy_type> columnTypes; // column types
64 
65  int32_t numRowsPerFile; // maximum number of rows per file
66  int32_t currentColumn; // current column number
67  int32_t currentRowInFile; // current row number in the current file
68  int32_t currentFileNumber; // current file number
70 
72  // FIXME: all of the "<" signs should be system-dependent (they mean little endian)
73  static const char *c2numpy_bool = "|b1";
74  static const char *c2numpy_int = "<i8";
75  static const char *c2numpy_intc = "<i4"; // FIXME: should be system-dependent
76  static const char *c2numpy_intp = "<i8"; // FIXME: should be system-dependent
77  static const char *c2numpy_int8 = "|i1";
78  static const char *c2numpy_int16 = "<i2";
79  static const char *c2numpy_int32 = "<i4";
80  static const char *c2numpy_int64 = "<i8";
81  static const char *c2numpy_uint8 = "|u1";
82  static const char *c2numpy_uint16 = "<u2";
83  static const char *c2numpy_uint32 = "<u4";
84  static const char *c2numpy_uint64 = "<u8";
85  static const char *c2numpy_float = "<f8";
86  static const char *c2numpy_float16 = "<f2";
87  static const char *c2numpy_float32 = "<f4";
88  static const char *c2numpy_float64 = "<f8";
89  static const char *c2numpy_complex = "<c16";
90  static const char *c2numpy_complex64 = "<c8";
91  static const char *c2numpy_complex128 = "<c16";
92 
93  static const char *c2numpy_str[155] = {"|S0", "|S1", "|S2", "|S3", "|S4", "|S5", "|S6", "|S7", "|S8", "|S9", "|S10", "|S11", "|S12", "|S13", "|S14", "|S15", "|S16", "|S17", "|S18", "|S19", "|S20", "|S21", "|S22", "|S23", "|S24", "|S25", "|S26", "|S27", "|S28", "|S29", "|S30", "|S31", "|S32", "|S33", "|S34", "|S35", "|S36", "|S37", "|S38", "|S39", "|S40", "|S41", "|S42", "|S43", "|S44", "|S45", "|S46", "|S47", "|S48", "|S49", "|S50", "|S51", "|S52", "|S53", "|S54", "|S55", "|S56", "|S57", "|S58", "|S59", "|S60", "|S61", "|S62", "|S63", "|S64", "|S65", "|S66", "|S67", "|S68", "|S69", "|S70", "|S71", "|S72", "|S73", "|S74", "|S75", "|S76", "|S77", "|S78", "|S79", "|S80", "|S81", "|S82", "|S83", "|S84", "|S85", "|S86", "|S87", "|S88", "|S89", "|S90", "|S91", "|S92", "|S93", "|S94", "|S95", "|S96", "|S97", "|S98", "|S99", "|S100", "|S101", "|S102", "|S103", "|S104", "|S105", "|S106", "|S107", "|S108", "|S109", "|S110", "|S111", "|S112", "|S113", "|S114", "|S115", "|S116", "|S117", "|S118", "|S119", "|S120", "|S121", "|S122", "|S123", "|S124", "|S125", "|S126", "|S127", "|S128", "|S129", "|S130", "|S131", "|S132", "|S133", "|S134", "|S135", "|S136", "|S137", "|S138", "|S139", "|S140", "|S141", "|S142", "|S143", "|S144", "|S145", "|S146", "|S147", "|S148", "|S149", "|S150", "|S151", "|S152", "|S153", "|S154"};
94 
95  switch (type) {
96  case C2NUMPY_BOOL:
97  return c2numpy_bool;
98  case C2NUMPY_INT:
99  return c2numpy_int;
100  case C2NUMPY_INTC:
101  return c2numpy_intc;
102  case C2NUMPY_INTP:
103  return c2numpy_intp;
104  case C2NUMPY_INT8:
105  return c2numpy_int8;
106  case C2NUMPY_INT16:
107  return c2numpy_int16;
108  case C2NUMPY_INT32:
109  return c2numpy_int32;
110  case C2NUMPY_INT64:
111  return c2numpy_int64;
112  case C2NUMPY_UINT8:
113  return c2numpy_uint8;
114  case C2NUMPY_UINT16:
115  return c2numpy_uint16;
116  case C2NUMPY_UINT32:
117  return c2numpy_uint32;
118  case C2NUMPY_UINT64:
119  return c2numpy_uint64;
120  case C2NUMPY_FLOAT:
121  return c2numpy_float;
122  case C2NUMPY_FLOAT16:
123  return c2numpy_float16;
124  case C2NUMPY_FLOAT32:
125  return c2numpy_float32;
126  case C2NUMPY_FLOAT64:
127  return c2numpy_float64;
128  case C2NUMPY_COMPLEX:
129  return c2numpy_complex;
130  case C2NUMPY_COMPLEX64:
131  return c2numpy_complex64;
132  case C2NUMPY_COMPLEX128:
133  return c2numpy_complex128;
134  default:
135  if (0 < type - C2NUMPY_STRING && type - C2NUMPY_STRING < 155)
136  return c2numpy_str[type - C2NUMPY_STRING];
137  }
138 
139  return NULL;
140 }
141 
142 int c2numpy_init(c2numpy_writer *writer, const std::string outputFilePrefix, int32_t numRowsPerFile) {
143  writer->file = NULL;
144  writer->outputFilePrefix = outputFilePrefix;
145  writer->sizeSeekPosition = 0;
146  writer->sizeSeekSize = 0;
147 
148  writer->numColumns = 0;
149 
150  writer->numRowsPerFile = numRowsPerFile;
151  writer->currentColumn = 0;
152  writer->currentRowInFile = 0;
153  writer->currentFileNumber = 0;
154 
155  return 0;
156 }
157 
159  writer->numColumns += 1;
160  writer->columnNames.push_back(name);
161  writer->columnTypes.push_back(type);
162  return 0;
163 }
164 
166  std::stringstream fileNameStream;
167  fileNameStream << writer->outputFilePrefix;
168  fileNameStream << writer->currentFileNumber;
169  fileNameStream << ".npy";
170  std::string fileName = fileNameStream.str();
171  writer->file = fopen(fileName.c_str(), "wb");
172 
173  std::stringstream headerStream;
174  headerStream << "{'descr': [";
175 
176  int column;
177  for (column = 0; column < writer->numColumns; ++column) {
178  headerStream << "('" << writer->columnNames[column] << "', '" << c2numpy_descr(writer->columnTypes[column]) << "')";
179  if (column < writer->numColumns - 1)
180  headerStream << ", ";
181  }
182 
183  headerStream << "], 'fortran_order': False, 'shape': (";
184 
185  writer->sizeSeekPosition = headerStream.str().size();
186 
187  headerStream << writer->numRowsPerFile;
188 
189  writer->sizeSeekSize = headerStream.str().size() - writer->sizeSeekPosition;
190 
191  headerStream << ",), }";
192 
193  int headerSize = headerStream.str().size();
194  char version = 1;
195 
196  if (headerSize > 65535) version = 2;
197  while ((6 + 2 + (version == 1 ? 2 : 4) + headerSize) % 16 != 0) {
198  headerSize += 1;
199  headerStream << " ";
200  if (headerSize > 65535) version = 2;
201  }
202 
203  fwrite("\x93NUMPY", 1, 6, writer->file);
204  if (version == 1) {
205  fwrite("\x01\x00", 1, 2, writer->file);
206  fwrite(&headerSize, 1, 2, writer->file);
207  writer->sizeSeekPosition += 6 + 2 + 2;
208  }
209  else {
210  fwrite("\x02\x00", 1, 2, writer->file);
211  fwrite(&headerSize, 1, 4, writer->file);
212  writer->sizeSeekPosition += 6 + 2 + 4;
213  }
214 
215  std::string header = headerStream.str();
216  fwrite(header.c_str(), 1, header.size(), writer->file);
217 
218  return 0;
219 }
220 
221 #define C2NUMPY_CHECK_ITEM { \
222  if (writer->file == NULL) { \
223  int status = c2numpy_open(writer); \
224  if (status != 0) \
225  return status; \
226  } \
227 }
228 
229 #define C2NUMPY_INCREMENT_ITEM { \
230  if (writer->currentColumn == 0) { \
231  writer->currentRowInFile += 1; \
232  if (writer->currentRowInFile == writer->numRowsPerFile) { \
233  fclose(writer->file); \
234  writer->file = NULL; \
235  writer->currentRowInFile = 0; \
236  writer->currentFileNumber += 1; \
237  } \
238  } \
239  return 0; \
240 }
241 
242 int c2numpy_bool(c2numpy_writer *writer, int8_t data) { // "bool" is just a byte
244  if (writer->columnTypes[writer->currentColumn] != C2NUMPY_BOOL) return -1;
245  fwrite(&data, sizeof(int8_t), 1, writer->file);
246  writer->currentColumn = (writer->currentColumn + 1) % writer->numColumns;
248 }
249 
250 int c2numpy_int(c2numpy_writer *writer, int64_t data) { // Numpy's default int is 64-bit
252  if (writer->columnTypes[writer->currentColumn] != C2NUMPY_INT) return -1;
253  fwrite(&data, sizeof(int64_t), 1, writer->file);
254  writer->currentColumn = (writer->currentColumn + 1) % writer->numColumns;
256 }
257 
258 int c2numpy_intc(c2numpy_writer *writer, int data) { // the built-in C int
260  if (writer->columnTypes[writer->currentColumn] != C2NUMPY_INTC) return -1;
261  fwrite(&data, sizeof(int), 1, writer->file);
262  writer->currentColumn = (writer->currentColumn + 1) % writer->numColumns;
264 }
265 
266 int c2numpy_intp(c2numpy_writer *writer, size_t data) { // intp is Numpy's way of saying size_t
268  if (writer->columnTypes[writer->currentColumn] != C2NUMPY_INTP) return -1;
269  fwrite(&data, sizeof(size_t), 1, writer->file);
270  writer->currentColumn = (writer->currentColumn + 1) % writer->numColumns;
272 }
273 
274 int c2numpy_int8(c2numpy_writer *writer, int8_t data) {
276  if (writer->columnTypes[writer->currentColumn] != C2NUMPY_INT8) return -1;
277  fwrite(&data, sizeof(int8_t), 1, writer->file);
278  writer->currentColumn = (writer->currentColumn + 1) % writer->numColumns;
280 }
281 
282 int c2numpy_int16(c2numpy_writer *writer, int16_t data) {
284  if (writer->columnTypes[writer->currentColumn] != C2NUMPY_INT16) return -1;
285  fwrite(&data, sizeof(int16_t), 1, writer->file);
286  writer->currentColumn = (writer->currentColumn + 1) % writer->numColumns;
288 }
289 
290 int c2numpy_int32(c2numpy_writer *writer, int32_t data) {
292  if (writer->columnTypes[writer->currentColumn] != C2NUMPY_INT32) return -1;
293  fwrite(&data, sizeof(int32_t), 1, writer->file);
294  writer->currentColumn = (writer->currentColumn + 1) % writer->numColumns;
296 }
297 
298 int c2numpy_int64(c2numpy_writer *writer, int64_t data) {
300  if (writer->columnTypes[writer->currentColumn] != C2NUMPY_INT64) return -1;
301  fwrite(&data, sizeof(int64_t), 1, writer->file);
302  writer->currentColumn = (writer->currentColumn + 1) % writer->numColumns;
304 }
305 
306 int c2numpy_uint8(c2numpy_writer *writer, uint8_t data) {
308  if (writer->columnTypes[writer->currentColumn] != C2NUMPY_UINT8) return -1;
309  fwrite(&data, sizeof(uint8_t), 1, writer->file);
310  writer->currentColumn = (writer->currentColumn + 1) % writer->numColumns;
312 }
313 
314 int c2numpy_uint16(c2numpy_writer *writer, uint16_t data) {
316  if (writer->columnTypes[writer->currentColumn] != C2NUMPY_UINT16) return -1;
317  fwrite(&data, sizeof(uint16_t), 1, writer->file);
318  writer->currentColumn = (writer->currentColumn + 1) % writer->numColumns;
320 }
321 
322 int c2numpy_uint32(c2numpy_writer *writer, uint32_t data) {
324  if (writer->columnTypes[writer->currentColumn] != C2NUMPY_UINT32) return -1;
325  fwrite(&data, sizeof(uint32_t), 1, writer->file);
326  writer->currentColumn = (writer->currentColumn + 1) % writer->numColumns;
328 }
329 
330 int c2numpy_uint64(c2numpy_writer *writer, uint64_t data) {
332  if (writer->columnTypes[writer->currentColumn] != C2NUMPY_UINT64) return -1;
333  fwrite(&data, sizeof(uint64_t), 1, writer->file);
334  writer->currentColumn = (writer->currentColumn + 1) % writer->numColumns;
336 }
337 
338 int c2numpy_float(c2numpy_writer *writer, double data) { // Numpy's "float" is a double
340  if (writer->columnTypes[writer->currentColumn] != C2NUMPY_FLOAT) return -1;
341  fwrite(&data, sizeof(double), 1, writer->file);
342  writer->currentColumn = (writer->currentColumn + 1) % writer->numColumns;
344 }
345 
346 // int c2numpy_float16(c2numpy_writer *writer, ??? data) { // how to do float16 in C?
347 // C2NUMPY_CHECK_ITEM
348 // if (writer->columnTypes[writer->currentColumn] != C2NUMPY_FLOAT16) return -1;
349 // fwrite(&data, sizeof(???), 1, writer->file);
350 // writer->currentColumn = (writer->currentColumn + 1) % writer->numColumns;
351 // C2NUMPY_INCREMENT_ITEM
352 // }
353 
354 int c2numpy_float32(c2numpy_writer *writer, float data) {
356  if (writer->columnTypes[writer->currentColumn] != C2NUMPY_FLOAT32) return -1;
357  fwrite(&data, sizeof(float), 1, writer->file);
358  writer->currentColumn = (writer->currentColumn + 1) % writer->numColumns;
360 }
361 
362 int c2numpy_float64(c2numpy_writer *writer, double data) {
364  if (writer->columnTypes[writer->currentColumn] != C2NUMPY_FLOAT64) return -1;
365  fwrite(&data, sizeof(double), 1, writer->file);
366  writer->currentColumn = (writer->currentColumn + 1) % writer->numColumns;
368 }
369 
370 // int c2numpy_complex(c2numpy_writer *writer, ??? data) { // how to do complex in C?
371 // C2NUMPY_CHECK_ITEM
372 // if (writer->columnTypes[writer->currentColumn] != C2NUMPY_COMPLEX) return -1;
373 // fwrite(&data, sizeof(???), 1, writer->file);
374 // writer->currentColumn = (writer->currentColumn + 1) % writer->numColumns;
375 // C2NUMPY_INCREMENT_ITEM
376 // }
377 
378 // int c2numpy_complex64(c2numpy_writer *writer, ??? data) {
379 // C2NUMPY_CHECK_ITEM
380 // if (writer->columnTypes[writer->currentColumn] != C2NUMPY_COMPLEX64) return -1;
381 // fwrite(&data, sizeof(???), 1, writer->file);
382 // writer->currentColumn = (writer->currentColumn + 1) % writer->numColumns;
383 // C2NUMPY_INCREMENT_ITEM
384 // }
385 
386 // int c2numpy_complex128(c2numpy_writer *writer, ??? data) {
387 // C2NUMPY_CHECK_ITEM
388 // if (writer->columnTypes[writer->currentColumn] != C2NUMPY_COMPLEX128) return -1;
389 // fwrite(&data, sizeof(???), 1, writer->file);
390 // writer->currentColumn = (writer->currentColumn + 1) % writer->numColumns;
391 // C2NUMPY_INCREMENT_ITEM
392 // }
393 
394 int c2numpy_string(c2numpy_writer *writer, const char *data) {
396 
397  int stringlength = writer->columnTypes[writer->currentColumn] - C2NUMPY_STRING;
398  if (0 < stringlength && stringlength < 155)
399  fwrite(data, 1, stringlength, writer->file);
400  else
401  return -1;
402  writer->currentColumn = (writer->currentColumn + 1) % writer->numColumns;
403 
405 }
406 
408  if (writer->file != NULL) {
409  // we wrote fewer rows than we promised
410  if (writer->currentRowInFile < writer->numRowsPerFile) {
411  // so go back to the part of the header where that was written
412  fseek(writer->file, writer->sizeSeekPosition, SEEK_SET);
413  // overwrite it with spaces
414  int i;
415  for (i = 0; i < writer->sizeSeekSize; ++i)
416  fputc(' ', writer->file);
417  // now go back and write it again (it MUST be fewer or an equal number of digits)
418  fseek(writer->file, writer->sizeSeekPosition, SEEK_SET);
419  fprintf(writer->file, "%d", writer->currentRowInFile);
420  }
421  // now close it
422  fclose(writer->file);
423  }
424 
425  return 0;
426 }
427 
428 #endif // C2NUMPY
static QCString name
Definition: declinfo.cpp:673
int c2numpy_int32(c2numpy_writer *writer, int32_t data)
Definition: c2numpy.h:290
int c2numpy_init(c2numpy_writer *writer, const std::string outputFilePrefix, int32_t numRowsPerFile)
Definition: c2numpy.h:142
const char * C2NUMPY_VERSION
Definition: c2numpy.h:26
#define C2NUMPY_CHECK_ITEM
Definition: c2numpy.h:221
int c2numpy_int(c2numpy_writer *writer, int64_t data)
Definition: c2numpy.h:250
int32_t currentFileNumber
Definition: c2numpy.h:68
int c2numpy_int16(c2numpy_writer *writer, int16_t data)
Definition: c2numpy.h:282
#define C2NUMPY_INCREMENT_ITEM
Definition: c2numpy.h:229
std::string string
Definition: nybbler.cc:12
int c2numpy_bool(c2numpy_writer *writer, int8_t data)
Definition: c2numpy.h:242
std::vector< std::string > columnNames
Definition: c2numpy.h:62
int c2numpy_uint64(c2numpy_writer *writer, uint64_t data)
Definition: c2numpy.h:330
int c2numpy_int8(c2numpy_writer *writer, int8_t data)
Definition: c2numpy.h:274
int c2numpy_close(c2numpy_writer *writer)
Definition: c2numpy.h:407
int32_t numRowsPerFile
Definition: c2numpy.h:65
int c2numpy_uint16(c2numpy_writer *writer, uint16_t data)
Definition: c2numpy.h:314
int32_t currentColumn
Definition: c2numpy.h:66
std::vector< c2numpy_type > columnTypes
Definition: c2numpy.h:63
int c2numpy_addcolumn(c2numpy_writer *writer, const std::string name, c2numpy_type type)
Definition: c2numpy.h:158
fileName
Definition: dumpTree.py:9
int c2numpy_intp(c2numpy_writer *writer, size_t data)
Definition: c2numpy.h:266
int64_t sizeSeekSize
Definition: c2numpy.h:59
int c2numpy_open(c2numpy_writer *writer)
Definition: c2numpy.h:165
int c2numpy_uint32(c2numpy_writer *writer, uint32_t data)
Definition: c2numpy.h:322
int c2numpy_uint8(c2numpy_writer *writer, uint8_t data)
Definition: c2numpy.h:306
int c2numpy_int64(c2numpy_writer *writer, int64_t data)
Definition: c2numpy.h:298
int c2numpy_float64(c2numpy_writer *writer, double data)
Definition: c2numpy.h:362
const char * c2numpy_descr(c2numpy_type type)
Definition: c2numpy.h:71
int c2numpy_string(c2numpy_writer *writer, const char *data)
Definition: c2numpy.h:394
c2numpy_type
Definition: c2numpy.h:29
std::string outputFilePrefix
Definition: c2numpy.h:57
int64_t sizeSeekPosition
Definition: c2numpy.h:58
int c2numpy_float(c2numpy_writer *writer, double data)
Definition: c2numpy.h:338
int c2numpy_float32(c2numpy_writer *writer, float data)
Definition: c2numpy.h:354
int32_t currentRowInFile
Definition: c2numpy.h:67
std::vector< std::string > column
int c2numpy_intc(c2numpy_writer *writer, int data)
Definition: c2numpy.h:258
FILE * file
Definition: c2numpy.h:56
int32_t numColumns
Definition: c2numpy.h:61