Source code for pymchelper.writers.sparse

import logging

import numpy as np

logger = logging.getLogger(__name__)


[docs]class SparseWriter: """ Supports writing sparse matrix format """ def __init__(self, filename, options): self.filename = filename if not self.filename.endswith(".npz"): self.filename += ".npz" self.threshold = options.threshold logger.info("Sparse threshold {:g}".format(self.threshold))
[docs] def write(self, estimator): if len(estimator.pages) > 1: print("Conversion of data with multiple pages not supported yet") return False page = estimator.pages[0] # estimator.data array is a 3-D numpy array # some of its dimensions may be as well ones and the array reduced to 0,1 or 2-D all_items = page.data.size logger.info("Number of all items: {:d}".format(all_items)) # prepare a cut to select values which norm is greater than threshold # default value of threshold is zero, in this case non-zero values will be selected # cut will be 3-D arrays of booleans # note that numpy allocates here same amount of memory as for original data thres_cut = np.abs(page.data) > self.threshold passed_items = np.sum(thres_cut) logger.info("Number of items passing threshold: {:d}".format(passed_items)) logger.info("Sparse matrix compression rate: {:g}".format(passed_items / all_items)) # select indices which pass threshold # we get here a plain python tuple of 3-elements # first element is numpy array of indices along X-axis, second for Y axis and third for Z # note that such table cannot be used directly to index numpy arrays indices = np.argwhere(thres_cut) # select data which pass threshold and save it as plain 1-D numpy array filtered_data = page.data[thres_cut] # save file to NPZ file format np.savez(file=self.filename, data=filtered_data, indices=indices, shape=page.data.shape) return 0