FelixCompress.hh
Go to the documentation of this file.
1 // FelixCompress.hh
2 // Milo Vermeulen 2018
3 // Compression studies in software, primarily focussing on speed.
4 
5 #ifndef artdaq_dune_Overlays_FelixComp_hh
6 #define artdaq_dune_Overlays_FelixComp_hh
7 
8 // Option for previous value subtraction.
9 #define PREV
10 
11 #include <bitset> // testing
12 #include <cmath> // log
13 #include <iomanip>
14 #include <iostream>
15 #include <string>
16 #include <unordered_map>
17 #include <vector>
18 #include <algorithm>
19 #include <chrono>
20 
21 #include "artdaq-core/Data/Fragment.hh"
24 
25 namespace dune {
26 
27 // A Huffman tree is literally a bunch of nodes that are connected in a certain
28 // way and may or may not have values assigned to them.
29 struct HuffTree {
30  struct Node {
31  Node* left = NULL;
32  Node* right = NULL;
33  uint16_t value = 0;
34  size_t huffcode = 0;
35  uint8_t hufflength = 0;
36  unsigned frequency = 0;
37  bool hasParent = false;
38 
39  void operator=(const Node& other) {
40  if (this != &other) {
41  left = other.left;
42  right = other.right;
43  value = other.value;
44  huffcode = other.huffcode;
45  huffcode = other.hufflength;
46  frequency = other.frequency;
47  hasParent = other.hasParent;
48  }
49  }
50 
51  // Operator< overloading for ordering by frequency. If the frequency is the
52  // same, the values in the nodes are taken into account.
53  bool operator<(const Node& other) const {
54  return (frequency < other.frequency) ||
55  ((frequency == other.frequency) && (value < other.value));
56  }
57  };
58 
59  bool valuecomp(const Node& one, const Node& other) const {
60  return one.value < other.value;
61  }
62 
65  std::unordered_map<uint16_t, Node*> nodes;
66 
67  // Function to get a node from a value.
68  Node* operator()(const uint16_t value) { return nodes[value]; }
69 
70  // Function to print all leaves from bottom to top.
71  void print(Node* loc) {
72  if (loc->left == NULL) {
73  std::cout << "Node with value " << loc->value << ", frequency "
74  << loc->frequency << " and huffcode "
75  << std::bitset<32>(loc->huffcode) << " length "
76  << (unsigned)loc->hufflength << '\n';
77  } else {
78  print(loc->left);
79  print(loc->right);
80  }
81  return;
82  }
83  void print() { return print(root); }
84 
85  // Function to print a Huffman tree to a file.
87  std::ofstream ofile(filename);
88 
89  ofile << "Value\tFrequency\tCode\tLength\n";
90  std::vector<Node*> local_nlist;
91 
92  for(auto p : nodes) {
93  Node* n = p.second;
94  if(n->frequency == 0) continue;
95  local_nlist.push_back(n);
96  }
97 
98  std::sort(local_nlist.begin(), local_nlist.end());
99 
100  for(unsigned i = 0; i < local_nlist.size(); ++i) {
101  Node* n = local_nlist[i];
102  if(n->value > (1<<16)/2) {
103  ofile << (int)n->value-(1<<16) << '\t' << n->frequency << '\t' << n->huffcode << '\t' << (unsigned)n->hufflength << '\n';
104  } else {
105  if(n->value > 1000) continue; // Disregard start values.
106  ofile << n->value << '\t' << n->frequency << '\t' << n->huffcode << '\t' << (unsigned)n->hufflength << '\n';
107  }
108  }
109 
110  ofile.close();
111  }
112 
113  // Function to generate codes for a Huffman tree.
114  void generate_codes(Node* loc, size_t buff = 0, uint8_t len = 0) {
115  // Assign a string when a leaf is reached. Otherwise go one layer deeper.
116  if (loc->left == NULL) {
117  loc->huffcode = buff;
118  loc->hufflength = len;
119  nodes[loc->value] = loc;
120  } else {
121  // Move one node down.
122  generate_codes(loc->left, buff, len + 1);
123  generate_codes(loc->right, buff | (1 << len), len + 1);
124  }
125 
126  // Move one node up.
127  return;
128  }
129  void generate_codes() { return generate_codes(root); }
130 
131  // Function to create a tree from a buffer of nodes.
132  std::unordered_map<uint16_t, Node*> make_tree(std::vector<Node> nodevec) {
133  // Order vector to get the same results every time.
134  std::sort(nodevec.begin(), nodevec.end());
135  // Create a new buffer to hold the leaves and N-1 new nodes.
136  const unsigned totlen = 2 * nodevec.size() - 1;
137  Node* begin = &nodevec[0];
138  nodelist = new Node[totlen];
139  std::copy(begin, begin + nodevec.size(), nodelist);
140 
141  // Continue until the buffer is filled.
142  for (unsigned i = 0; i < totlen - nodevec.size(); ++i) {
143  // Find the lowest-frequency non-parented nodes.
144  Node* lowest = nodelist;
145  while (lowest->hasParent) {
146  ++lowest;
147  }
148  Node* sec_lowest = lowest + 1;
149  while (sec_lowest->hasParent) {
150  ++sec_lowest;
151  }
152  for (Node* n = nodelist; n != nodelist + nodevec.size() + i; ++n) {
153  // Disregard parented nodes.
154  if (n->hasParent) {
155  continue;
156  }
157  if (n->frequency < lowest->frequency) {
158  sec_lowest = lowest;
159  lowest = n;
160  } else if (n->frequency < sec_lowest->frequency && n != lowest) {
161  sec_lowest = n;
162  }
163  }
164 
165  // Make lowest value the left node in case of equal frequency.
166  if (lowest->frequency == sec_lowest->frequency &&
167  lowest->value > sec_lowest->value) {
168  Node* tmp = lowest;
169  lowest = sec_lowest;
170  sec_lowest = tmp;
171  }
172 
173  // Link the lowest frequency nodes to a new one in the buffer.
174  Node* newNode = &nodelist[nodevec.size() + i];
175  newNode->left = lowest;
176  newNode->right = sec_lowest;
177  newNode->frequency = lowest->frequency + sec_lowest->frequency;
178  lowest->hasParent = true;
179  sec_lowest->hasParent = true;
180  }
181 
182  // Assign root to the last unparented node and generate codes.
183  root = &nodelist[totlen - 1];
184  generate_codes();
185 
186  // Output the generated map.
187  return nodes;
188  }
189 
190  ~HuffTree() { delete[] nodelist; }
191 };
192 
193 struct MetaData {
194  uint32_t comp_method : 2, unique_values : 14, num_frames : 16;
195 };
196 
197 //========================
198 // FELIX compressor class
199 //========================
201  private:
202  // Input and output buffers.
203  const void* input;
204  const size_t input_length;
205  const size_t num_frames = input_length / sizeof(FelixFrame);
206 
207  // Frame access to data.
208  FelixFrame const* frame_(const size_t& frame_num = 0) const {
209  return static_cast<FelixFrame const*>(input) + frame_num;
210  }
211 
212  // Huffman tree storage for easy access.
214 
215  public:
216  FelixCompressor(const uint8_t* data, const size_t num_frames = 10000)
217  : input(data), input_length(num_frames * sizeof(dune::FelixFrame)) {}
219  : input(frag.dataBeginBytes()), input_length(frag.dataSizeBytes()) {}
220 
221  // Function to store metadata in a recognisable format.
222  void store_metadata(std::vector<char>& out) {
223  // Clear the vector as a first action to make sure there is a clean slate to
224  // work with.
225  out.clear();
226 
227  MetaData meta = {1, 0, (uint32_t)num_frames};
228  out.resize(sizeof(meta));
229  memcpy(&out[0], &meta, sizeof(meta));
230  }
231 
232  // Function to reduce headers and error fields upon error encounter.
233  void header_reduce(std::vector<char>& out) {
234  // Fields for storing which frames have faulty headers.
235  std::vector<uint8_t> bad_headers(num_frames/8 + 1);
236  // Check all error fields and increments of timestamp and CCC.
237  for (unsigned i = 1; i < num_frames; ++i) {
238  bool check_failed = false;
239 
240  // WIB header checks.
241  check_failed |= frame_()->sof() ^ frame_(i)->sof();
242  check_failed |= frame_()->version() ^ frame_(i)->version();
243  check_failed |= frame_()->fiber_no() ^ frame_(i)->fiber_no();
244  check_failed |= frame_()->crate_no() ^ frame_(i)->crate_no();
245  check_failed |= frame_()->slot_no() ^ frame_(i)->slot_no();
246  check_failed |= frame_()->mm() ^ frame_(i)->mm();
247  check_failed |= frame_()->oos() ^ frame_(i)->oos();
248  check_failed |= frame_()->wib_errors() ^ frame_(i)->wib_errors();
249  check_failed |= frame_()->z() ^ frame_(i)->z();
250 
251  check_failed |=
252  (uint64_t)(frame_()->timestamp() + 25 * i) ^ frame_(i)->timestamp();
253 
254  // COLDATA header checks.
255  for (unsigned j = 0; j < 4; ++j) {
256  check_failed |= frame_()->s1_error(j) ^ frame_(i)->s1_error(j);
257  check_failed |= frame_()->s2_error(j) ^ frame_(i)->s2_error(j);
258  check_failed |= frame_()->checksum_a(j) ^ frame_(i)->checksum_a(j);
259  check_failed |= frame_()->checksum_b(j) ^ frame_(i)->checksum_b(j);
260  check_failed |=
261  frame_()->error_register(j) ^ frame_(i)->error_register(j);
262  for (unsigned h = 0; h < 8; ++h) {
263  check_failed |= frame_()->hdr(j, h) ^ frame_(i)->hdr(j, h);
264  }
265 
266  check_failed |=
267  (uint16_t)(frame_()->coldata_convert_count(j) + 25 * i) ^
268  frame_(i)->coldata_convert_count(j);
269  }
270 
271  // Set a bit if anything failed.
272  if (check_failed) {
273  bad_headers[i/8] |= 1 << (i%8);
274 
275  // std::cout << "Check failed at the " << i << "th frame.\n";
276 
277  // std::cout << "First frame:\n";
278  // frame_(i - 1)->print();
279  // std::cout << "Second frame:\n";
280  // frame_(i)->print();
281 
282  // exit(1);
283  }
284  } // Loop over all frames.
285 
286  // Record faulty frame field.
287  size_t tail = out.size();
288  out.resize(tail + num_frames/8+1);
289  memcpy(&out[tail], &bad_headers[0], bad_headers.size());
290 
291  // Record first headers.
292  tail = out.size();
293  out.resize(tail + sizeof(WIBHeader) + 4*sizeof(ColdataHeader));
294 
295  memcpy(&out[tail], frame_()->wib_header(), sizeof(WIBHeader));
296  memcpy(&out[tail + sizeof(WIBHeader)], frame_()->coldata_header(0),
297  sizeof(ColdataHeader));
298  memcpy(&out[tail + sizeof(WIBHeader) + sizeof(ColdataHeader)], frame_()->coldata_header(1),
299  sizeof(ColdataHeader));
300  memcpy(&out[tail + sizeof(WIBHeader) + 2*sizeof(ColdataHeader)], frame_()->coldata_header(2),
301  sizeof(ColdataHeader));
302  memcpy(&out[tail + sizeof(WIBHeader) + 3*sizeof(ColdataHeader)], frame_()->coldata_header(3),
303  sizeof(ColdataHeader));
304 
305  // Loop over frames again to record faulty headers.
306  for(unsigned i = 1; i < num_frames; ++i) {
307  bool check_failed = (bad_headers[i / 8] >> (7 - (i % 8))) & 1;
308  if(!check_failed) { continue; }
309 
310  tail = out.size();
311  out.resize(tail + sizeof(WIBHeader) + 4*sizeof(ColdataHeader));
312 
313  memcpy(&out[tail], frame_(i)->wib_header(), sizeof(WIBHeader));
314  for(unsigned j = 0; j < 4; ++j) {
315  memcpy(&out[tail + sizeof(WIBHeader)], frame_(i)->coldata_header(j),
316  sizeof(ColdataHeader));
317  }
318  }
319  }
320 
321  // Function to generate a Huffman table and tree.
322  void generate_Huff_tree(std::vector<char>& out) {
323  // Build a frequency table.
324  std::unordered_map<adc_t, uint32_t> freq_table;
325  for (unsigned vi = 0; vi < frame_()->num_ch_per_frame; ++vi) {
326  freq_table[frame_(0)->channel(vi)]++;
327  for (unsigned fri = 1; fri < num_frames; ++fri) {
328 #ifdef PREV
329  adc_t curr_val = frame_(fri)->channel(vi) - frame_(fri - 1)->channel(vi);
330 #else
331  adc_t curr_val = frame_(fri)->channel(vi);
332 #endif
333  freq_table[curr_val]++;
334  }
335  }
336 
337  // Save the number of unique values to the metadata.
338  MetaData* meta = reinterpret_cast<MetaData*>(&out[0]);
339  meta->unique_values = freq_table.size();
340 
341  // Save the frequency table to the outgoing buffer so that the decompressor
342  // can make a Huffman tree of its own.
343  size_t tail = out.size();
344  out.resize(tail + freq_table.size() * (sizeof(adc_t) + 4));
345  for (auto p : freq_table) {
346  memcpy(&out[tail], &p.first, sizeof(p.first));
347  memcpy(&out[tail + sizeof(p.first)], &p.second, sizeof(p.second));
348  tail += sizeof(p.first) + sizeof(p.second);
349  }
350 
351  // Insert the frequency table into Huffman tree nodes and calculate the
352  // information entropy according to Shannon.
353  std::vector<HuffTree::Node> nodes;
354  double entropy = 0;
355  const unsigned num_vals = num_frames * frame_()->num_ch_per_frame;
356  for (auto p : freq_table) {
357  HuffTree::Node curr_node;
358  curr_node.value = p.first;
359  curr_node.frequency = p.second;
360  nodes.push_back(curr_node);
361 
362  entropy += (double)p.second / num_vals * log((double)num_vals / p.second);
363  }
364  // std::cout << "ADC value entropy: " << entropy << " bits.\n";
365 
366  // Connect the nodes in the tree according to Huffman's method.
367  hufftree.make_tree(nodes);
368  }
369 
370  // Function to write ADC values using the Huffman table.
371  void ADC_compress(std::vector<char>& out) {
372  // Resize the output buffer appropriately for the worst case.
373  const size_t tail = out.size();
374  out.resize(tail + sizeof(adc_t) * num_frames * 256);
375  // Record the encoded ADC values into the buffer.
376  unsigned rec_bits = 0;
377  const char* dest = &out[0] + tail;
378 
379  for(unsigned j = 0; j < 256; ++j) {
380  adc_t curr_val = frame_(0)->channel(j);
381  *(unsigned long*)(dest + (rec_bits/8)) |= hufftree(curr_val)->huffcode << (rec_bits%8);
382  rec_bits += hufftree(curr_val)->hufflength;
383 
384  for (unsigned i = 1; i < num_frames; ++i) {
385 #ifdef PREV
386  adc_t curr_val = frame_(i)->channel(j) - frame_(i - 1)->channel(j);
387 #else
388  adc_t curr_val = frame_(i)->channel(j);
389 #endif
390 
391  // No code is longer than 64 bits.
392  *(uint64_t*)(dest + (rec_bits/8)) |= hufftree(curr_val)->huffcode << (rec_bits%8);
393  rec_bits += hufftree(curr_val)->hufflength;
394  }
395  }
396  // Resize output buffer to actual data length.
397  out.resize(tail + rec_bits / 8 + 1);
398  }
399 
400  // Function that calls all others relevant for compression.
401  void compress_copy(std::vector<char>& out) {
402  // auto comp_begin = std::chrono::high_resolution_clock::now();
403  store_metadata(out);
404  // auto meta_end = std::chrono::high_resolution_clock::now();
405  // std::cout << "Meta time taken: "
406  // << std::chrono::duration_cast<std::chrono::microseconds>(
407  // meta_end - comp_begin)
408  // .count()
409  // << " us.\n";
410  header_reduce(out);
411  // auto header_end = std::chrono::high_resolution_clock::now();
412  // std::cout << "Header time taken: "
413  // << std::chrono::duration_cast<std::chrono::microseconds>(
414  // header_end - meta_end)
415  // .count()
416  // << " us.\n";
417  generate_Huff_tree(out);
418  // auto huff_end = std::chrono::high_resolution_clock::now();
419  // std::cout << "Huffman time taken: "
420  // << std::chrono::duration_cast<std::chrono::microseconds>(
421  // huff_end - header_end)
422  // .count()
423  // << " us.\n";
424  ADC_compress(out);
425  // auto adc_end = std::chrono::high_resolution_clock::now();
426  // std::cout << "ADC time taken: "
427  // << std::chrono::duration_cast<std::chrono::microseconds>(
428  // adc_end - huff_end)
429  // .count()
430  // << " us.\n";
431  }
432 }; // FelixCompressor
433 
434 // Single function for compressing data from a fragment.
435 std::vector<char> FelixCompress(const dune::FelixFragment& frag) {
436  FelixCompressor compressor(frag);
437  std::vector<char> result;
438  compressor.compress_copy(result);
439 
440  return result;
441 }
442 
443 // Similarly, a function for decompressing data and returning an
444 // artdaq::Fragment.
445 artdaq::Fragment FelixDecompress(const std::vector<char>& buff) {
446  artdaq::Fragment result;
447  const char* src = buff.data();
448 
449  // Read metadata and apply to fragment.
450  const MetaData* meta = reinterpret_cast<MetaData const*>(src);
451  size_t num_frames = meta->num_frames;
452  uint16_t unique_values = meta->unique_values;
453  result.resizeBytes(num_frames * sizeof(FelixFrame));
454  src += sizeof(MetaData);
455 
456  // Handle for filling fragment data.
457  FelixFrame* frame = reinterpret_cast<FelixFrame*>(result.dataBeginBytes());
458 
459  // Access error frames field.
460  std::vector<uint8_t> bad_headers(num_frames/8+1);
461  memcpy(&bad_headers[0], src, num_frames/8+1);
462  src += num_frames / 8 + 1;
463 
464  // Access saved headers and generate headers.
465  unsigned sizeof_header_set = sizeof(WIBHeader) + 4 * sizeof(ColdataHeader);
466  const WIBHeader* whead = reinterpret_cast<WIBHeader const*>(src);
467  std::vector<const ColdataHeader*> chead(4);
468  for(unsigned i = 0; i < 4; ++i) {
469  chead[i] = reinterpret_cast<ColdataHeader const*>(
470  src + sizeof(WIBHeader) + i * sizeof(ColdataHeader));
471  }
472  size_t bad_header_counter = 0;
473  for (unsigned i = 0; i < num_frames; ++i) {
474  const WIBHeader* curr_whead;
475  std::vector<const ColdataHeader*> curr_chead(4);
476  // See if the current headers were bad.
477  bool bad_header = (bad_headers[i / 8] >> (7 - (1 % 8))) & 1;
478  if(bad_header) {
479  ++bad_header_counter;
480  // Set current headers to be the bad one.
481  std::cout << "BAD HEADER\n";
482  curr_whead = reinterpret_cast<WIBHeader const*>(
483  src + bad_header_counter * sizeof_header_set);
484  for (unsigned j = 0; j < 4; ++j) {
485  curr_chead[j] = reinterpret_cast<ColdataHeader const*>(
486  src + bad_header_counter * sizeof_header_set +
487  sizeof(WIBHeader) + j * sizeof(ColdataHeader));
488  }
489  } else {
490  // Set current headers to be the first.
491  curr_whead = whead;
492  for(unsigned j = 0; j < 4; ++j) {
493  curr_chead[j] = chead[j];
494  }
495  }
496  // Assign header fields.
497  (frame + i)->set_sof(curr_whead->sof);
498  (frame + i)->set_version(curr_whead->version);
499  (frame + i)->set_fiber_no(curr_whead->fiber_no);
500  (frame + i)->set_crate_no(curr_whead->crate_no);
501  (frame + i)->set_slot_no(curr_whead->slot_no);
502  (frame + i)->set_mm(curr_whead->mm);
503  (frame + i)->set_oos(curr_whead->oos);
504  (frame + i)->set_wib_errors(curr_whead->wib_errors);
505  (frame + i)->set_timestamp(curr_whead->timestamp() + i * 25);
506  (frame + i)->set_wib_counter(curr_whead->wib_counter());
507  (frame + i)->set_z(curr_whead->z);
508  for(unsigned j = 0; j < 4; ++j) {
509  (frame + i)->set_s1_error(j, curr_chead[j]->s1_error);
510  (frame + i)->set_s2_error(j, curr_chead[j]->s2_error);
511  (frame + i)->set_coldata_convert_count(
512  j, curr_chead[j]->coldata_convert_count + i * 25);
513  (frame + i)->set_error_register(j, curr_chead[j]->error_register);
514  }
515  }
516  src += (bad_header_counter+1) * sizeof_header_set;
517 
518  // Read frequency table and generate a Huffman tree.
519  std::unordered_map<uint16_t, unsigned> freq_table;
520  for (unsigned i = 0; i < unique_values; ++i) {
521  const uint16_t* v = reinterpret_cast<uint16_t const*>(src);
522  const uint32_t* f =
523  reinterpret_cast<uint32_t const*>(src + sizeof(uint16_t));
524  freq_table[*v] = *f;
525  src += sizeof(uint16_t) + sizeof(uint32_t);
526  }
527  std::vector<HuffTree::Node> nodes;
528  for (auto p : freq_table) {
529  HuffTree::Node curr_node;
530  curr_node.value = p.first;
531  curr_node.frequency = p.second;
532  nodes.push_back(curr_node);
533  }
534  HuffTree hufftree;
535  hufftree.make_tree(nodes);
536 
537  // Read ADC values from the buffer.
538  size_t bits_read = 0;
539  for (unsigned i = 0; i < num_frames * 256; ++i) {
540  // Pointer to walk through the tree.
541  HuffTree::Node* pnode = hufftree.root;
542  while (pnode->left != NULL) {
543  if (*src >> (bits_read % 8) & 1) { // Next bit is 1.
544  pnode = pnode->right;
545  } else { // Next bit is 0.
546  pnode = pnode->left;
547  }
548  // Increment bits read and possibly the source pointer.
549  ++bits_read;
550  if (bits_read % 8 == 0) {
551  ++src; // Next byte reached.
552  }
553  }
554  // Value reached.
555  adc_t found_val = pnode->value;
556 #ifdef PREV
557  if (i % num_frames != 0) {
558  found_val += (frame + i % num_frames - 1)->channel(i / num_frames);
559  }
560 #endif
561  (frame + i % num_frames)->set_channel(i / num_frames, found_val);
562  }
563 
564  return result;
565 }
566 
567 } // namespace dune
568 
569 #endif /* artdaq_dune_Overlays_FelixComp_hh */
void generate_Huff_tree(std::vector< char > &out)
uint16_t adc_t
Definition: FelixFormat.hh:18
static QCString result
void header_reduce(std::vector< char > &out)
void store_metadata(std::vector< char > &out)
uint16_t wib_counter() const
Definition: FelixFormat.hh:38
uint32_t unique_values
std::string string
Definition: nybbler.cc:12
artdaq::Fragment FelixDecompress(const std::vector< char > &buff)
bool valuecomp(const Node &one, const Node &other) const
void operator=(const Node &other)
std::unordered_map< uint16_t, Node * > nodes
FelixFrame const * frame_(const size_t &frame_num=0) const
std::unordered_map< uint16_t, Node * > make_tree(std::vector< Node > nodevec)
std::vector< char > FelixCompress(const dune::FelixFragment &frag)
uint8_t channel
Definition: CRTFragment.hh:201
string filename
Definition: train.py:213
FelixCompressor(const dune::FelixFragment &frag)
void print(Node *loc)
const size_t input_length
void printFile(std::string &filename)
Node * operator()(const uint16_t value)
static int input(void)
Definition: code.cpp:15695
std::void_t< T > n
void compress_copy(std::vector< char > &out)
uint64_t timestamp() const
Definition: FelixFormat.hh:31
p
Definition: test.py:223
string tmp
Definition: languages.py:63
FelixCompressor(const uint8_t *data, const size_t num_frames=10000)
uint32_t num_frames
void generate_codes(Node *loc, size_t buff=0, uint8_t len=0)
T copy(T const &v)
bool operator<(const Node &other) const
decltype(auto) constexpr begin(T &&obj)
ADL-aware version of std::begin.
Definition: StdUtils.h:72
void ADC_compress(std::vector< char > &out)