-
Notifications
You must be signed in to change notification settings - Fork 521
Extend test coverage for sparse matrix block representations and related improvements #2406
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
c3fa0e5
7326de4
263d945
a3527c9
02dbad2
2660b5b
483465b
5c67157
5708be6
ef2e3f3
e4cdd00
4aa4802
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I am a bit horrified about how many bugs you found in this file :) |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -193,6 +193,20 @@ public void compact(int r) { | |
| //do nothing everything preallocated | ||
| } | ||
|
|
||
| @Override | ||
| public void compact() { | ||
| int pos = 0; | ||
| for(int i=0; i< _values.length; i++) { | ||
| if(_values[i] != 0){ | ||
| _values[pos] = _values[i]; | ||
| _rindexes[pos] = _rindexes[i]; | ||
| _cindexes[pos] = _cindexes[i]; | ||
| pos++; | ||
| } | ||
| } | ||
| _size = pos; | ||
| } | ||
|
|
||
| @Override | ||
| public int numRows() { | ||
| return _rlen; | ||
|
|
@@ -221,12 +235,12 @@ public boolean checkValidity(int rlen, int clen, long nnz, boolean strict) { | |
| } | ||
|
|
||
| //2. correct array lengths | ||
| if(_size != nnz && _cindexes.length < nnz && _rindexes.length < nnz && _values.length < nnz) { | ||
| if(_size != nnz || _cindexes.length < nnz || _rindexes.length < nnz || _values.length < nnz) { | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This line is duplicated across the sparse blocks, maybe some of these validity checks can be unified? |
||
| throw new RuntimeException("Incorrect array lengths."); | ||
| } | ||
|
|
||
| //3.1. sort order of row indices | ||
| for( int i=1; i<=nnz; i++ ) { | ||
| for( int i=1; i<nnz; i++ ) { | ||
| if(_rindexes[i] < _rindexes[i-1]) | ||
| throw new RuntimeException("Wrong sorted order of row indices"); | ||
| } | ||
|
|
@@ -235,26 +249,24 @@ public boolean checkValidity(int rlen, int clen, long nnz, boolean strict) { | |
| for( int i=0; i<rlen; i++ ) { | ||
| int apos = pos(i); | ||
| int alen = size(i); | ||
| for(int k=apos+i; k<apos+alen; k++) | ||
| if( _cindexes[k+1] >= _cindexes[k] ) | ||
| for(int k=apos+1; k<apos+alen; k++) | ||
| if(_cindexes[k-1] > _cindexes[k]) | ||
| throw new RuntimeException("Wrong sparse row ordering: " | ||
| + k + " "+_cindexes[k-1]+" "+_cindexes[k]); | ||
| for( int k=apos; k<apos+alen; k++ ) | ||
| if(_values[k] == 0) | ||
| throw new RuntimeException("Wrong sparse row: zero at " | ||
| + k + " at col index " + _cindexes[k]); | ||
| } | ||
|
|
||
| //4. non-existing zero values | ||
| for( int i=0; i<_size; i++ ) { | ||
| if( _values[i] == 0) | ||
| throw new RuntimeException("The values array should not contain zeros." | ||
| + " The " + i + "th value is "+_values[i]); | ||
| if(_cindexes[i] < 0 || _rindexes[i] < 0) | ||
| throw new RuntimeException("Invalid index at pos=" + i); | ||
| } | ||
|
|
||
| //5. a capacity that is no larger than nnz times the resize factor | ||
| int capacity = _values.length; | ||
| if( capacity > nnz*RESIZE_FACTOR1 ) { | ||
| if( capacity > INIT_CAPACITY && capacity > nnz*RESIZE_FACTOR1 ) { | ||
| throw new RuntimeException("Capacity is larger than the nnz times a resize factor." | ||
| + " Current size: "+capacity+ ", while Expected size:"+nnz*RESIZE_FACTOR1); | ||
| } | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -64,15 +64,16 @@ public SparseBlockCSC(int rlen, int clen) { | |
| _size = 0; | ||
| } | ||
|
|
||
| public SparseBlockCSC(int clen, int capacity, int size) { | ||
| public SparseBlockCSC(int rlen, int clen, int capacity) { | ||
| _rlen = rlen; | ||
| _ptr = new int[clen + 1]; //ix0=0 | ||
| _indexes = new int[capacity]; | ||
| _values = new double[capacity]; | ||
| _size = size; | ||
| _size = 0; | ||
| } | ||
|
|
||
| public SparseBlockCSC(int[] rowPtr, int[] rowInd, double[] values, int nnz) { | ||
| _ptr = rowPtr; | ||
| public SparseBlockCSC(int[] colPtr, int[] rowInd, double[] values, int nnz) { | ||
| _ptr = colPtr; | ||
| _indexes = rowInd; | ||
| _values = values; | ||
| _size = nnz; | ||
|
|
@@ -94,8 +95,9 @@ public SparseBlockCSC(SparseBlock sblock) { | |
|
|
||
| private void initialize(SparseBlock sblock) { | ||
|
|
||
| if(_size > Integer.MAX_VALUE) | ||
| throw new RuntimeException("SparseBlockCSC supports nnz<=Integer.MAX_VALUE but got " + _size); | ||
| long size = sblock.size(); | ||
| if(size > Integer.MAX_VALUE) | ||
| throw new RuntimeException("SparseBlockCSC supports nnz<=Integer.MAX_VALUE but got " + size); | ||
|
|
||
| //special case SparseBlockCSC | ||
| if(sblock instanceof SparseBlockCSC) { | ||
|
|
@@ -223,27 +225,6 @@ public SparseBlockCSC(int cols, int[] rowInd, int[] colInd, double[] values) { | |
|
|
||
| } | ||
|
|
||
| public SparseBlockCSC(int cols, int nnz, int[] rowInd) { | ||
|
|
||
| _clenInferred = cols; | ||
| _ptr = new int[cols + 1]; | ||
| _indexes = Arrays.copyOf(rowInd, nnz); | ||
| _values = new double[nnz]; | ||
| Arrays.fill(_values, 1); | ||
| _size = nnz; | ||
|
|
||
| //single-pass construction of col pointers | ||
| //and copy of row indexes if necessary | ||
| for(int i = 0, pos = 0; i < cols; i++) { | ||
| if(rowInd[i] >= 0) { | ||
| if(cols > nnz) | ||
| _indexes[pos] = rowInd[i]; | ||
| pos++; | ||
| } | ||
| _ptr[i + 1] = pos; | ||
| } | ||
| } | ||
|
|
||
| /** | ||
| * Initializes the CSC sparse block from an ordered input stream of ultra-sparse ijv triples. | ||
| * | ||
|
|
@@ -288,7 +269,6 @@ public void initSparse(int clen, int nnz, DataInput in) throws IOException { | |
| // Allocate space if necessary | ||
| if(_values.length < nnz) { | ||
| resize(newCapacity(nnz)); | ||
| System.out.println("hallo"); | ||
| } | ||
| // Read sparse columns, append and update pointers | ||
| _ptr[0] = 0; | ||
|
|
@@ -377,12 +357,31 @@ public void compact(int r) { | |
| //do nothing everything preallocated | ||
| } | ||
|
|
||
| @Override | ||
| public void compact() { | ||
| int pos = 0; | ||
| for(int i=0; i<numCols(); i++) { | ||
| int apos = posCol(i); | ||
| int alen = sizeCol(i); | ||
| _ptr[i] = pos; | ||
| for(int j=apos; j<apos+alen; j++) { | ||
| if(_values[j] != 0){ | ||
| _values[pos] = _values[j]; | ||
| _indexes[pos] = _indexes[j]; | ||
| pos++; | ||
| } | ||
| } | ||
| } | ||
| _ptr[numCols()] = pos; | ||
| _size = pos; | ||
| } | ||
|
|
||
| @Override | ||
| public int numRows() { | ||
| if(_rlen > -1) | ||
| return _rlen; | ||
| else { | ||
| int rlen = Arrays.stream(_indexes).max().getAsInt(); | ||
| int rlen = Arrays.stream(_indexes).max().getAsInt()+1; | ||
| _rlen = rlen; | ||
| return rlen; | ||
| } | ||
|
|
@@ -550,12 +549,12 @@ public boolean checkValidity(int rlen, int clen, long nnz, boolean strict) { | |
| } | ||
|
|
||
| //2. correct array lengths | ||
| if(_size != nnz && _ptr.length < clen + 1 && _values.length < nnz && _indexes.length < nnz) { | ||
| if(_size != nnz || _ptr.length < clen + 1 || _values.length < nnz || _indexes.length < nnz) { | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Here is what i mentioned as duplicated, maybe reuse. |
||
| throw new RuntimeException("Incorrect array lengths."); | ||
| } | ||
|
|
||
| //3. non-decreasing row pointers | ||
| for(int i = 1; i < clen; i++) { | ||
| //3. non-decreasing col pointers | ||
| for(int i = 1; i <= clen; i++) { | ||
| if(_ptr[i - 1] > _ptr[i] && strict) | ||
| throw new RuntimeException( | ||
| "Column pointers are decreasing at column: " + i + ", with pointers " + _ptr[i - 1] + " > " + | ||
|
|
@@ -569,10 +568,9 @@ public boolean checkValidity(int rlen, int clen, long nnz, boolean strict) { | |
| for(int k = apos + 1; k < apos + alen; k++) | ||
| if(_indexes[k - 1] >= _indexes[k]) | ||
| throw new RuntimeException( | ||
| "Wrong sparse column ordering: " + k + " " + _indexes[k - 1] + " " + _indexes[k]); | ||
| for(int k = apos; k < apos + alen; k++) | ||
| if(_values[k] == 0) | ||
| throw new RuntimeException("Wrong sparse column: zero at " + k + " at row index " + _indexes[k]); | ||
| "Wrong sparse column ordering, at column=" + i + ", pos=" + k + " with row indexes " + | ||
| _indexes[k - 1] + ">=" + _indexes[k] | ||
| ); | ||
| } | ||
|
|
||
| //5. non-existing zero values | ||
|
|
@@ -581,11 +579,13 @@ public boolean checkValidity(int rlen, int clen, long nnz, boolean strict) { | |
| throw new RuntimeException( | ||
| "The values array should not contain zeros." + " The " + i + "th value is " + _values[i]); | ||
| } | ||
| if(_indexes[i] < 0) | ||
| throw new RuntimeException("Invalid index at pos=" + i); | ||
| } | ||
|
|
||
| //6. a capacity that is no larger than nnz times resize factor. | ||
| int capacity = _values.length; | ||
| if(capacity > nnz * RESIZE_FACTOR1) { | ||
| if(capacity > INIT_CAPACITY && capacity > nnz * RESIZE_FACTOR1) { | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The additional check seems redundant. Maybe I am missing something. I seem to remember misusing the INIT_CAPACITY somewhere else, and explicitly setting it high to avoid reallocation on appends to MCSR. |
||
| throw new RuntimeException( | ||
| "Capacity is larger than the nnz times a resize factor." + " Current size: " + capacity + | ||
| ", while Expected size:" + nnz * RESIZE_FACTOR1); | ||
|
|
@@ -938,7 +938,7 @@ public void deleteIndexRangeCol(int c, int rl, int ru) { | |
| int len = sizeCol(c); | ||
| int end = internPosFIndexGTECol(ru, c); | ||
| if(end < 0) //delete all remaining | ||
| end = start + len; | ||
| end = posCol(c) + len; | ||
|
|
||
| //overlapping array copy (shift rhs values left) | ||
| System.arraycopy(_indexes, end, _indexes, start, _size - end); | ||
|
|
@@ -1059,7 +1059,7 @@ public int posFIndexGTCol(int r, int c) { | |
| @Override | ||
| public String toString() { | ||
| StringBuilder sb = new StringBuilder(); | ||
| sb.append("SparseBlockCSR: clen="); | ||
| sb.append("SparseBlockCSC: clen="); | ||
| sb.append(numCols()); | ||
| sb.append(", nnz="); | ||
| sb.append(size()); | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -350,7 +350,8 @@ public void allocate(int r, int ennz, int maxnnz) { | |
| public void compact(int r) { | ||
| //do nothing everything preallocated | ||
| } | ||
|
|
||
|
|
||
| @Override | ||
| public void compact() { | ||
| int pos = 0; | ||
| for(int i=0; i<numRows(); i++) { | ||
|
|
@@ -937,12 +938,12 @@ public boolean checkValidity(int rlen, int clen, long nnz, boolean strict) { | |
| } | ||
|
|
||
| //2. correct array lengths | ||
| if(_size != nnz && _ptr.length < rlen+1 && _values.length < nnz && _indexes.length < nnz ) { | ||
| if( _size != nnz || _ptr.length < rlen+1 || _values.length < nnz || _indexes.length < nnz ) { | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. same here, try the reuse. |
||
| throw new RuntimeException("Incorrect array lengths."); | ||
| } | ||
|
|
||
| //3. non-decreasing row pointers | ||
| for( int i=1; i<rlen; i++ ) { | ||
| for( int i=1; i<=rlen; i++ ) { | ||
| if(_ptr[i-1] > _ptr[i] && strict) | ||
| throw new RuntimeException("Row pointers are decreasing at row: "+i | ||
| + ", with pointers "+_ptr[i-1]+" > "+_ptr[i]); | ||
|
|
@@ -956,10 +957,6 @@ public boolean checkValidity(int rlen, int clen, long nnz, boolean strict) { | |
| if( _indexes[k-1] >= _indexes[k] ) | ||
| throw new RuntimeException("Wrong sparse row ordering: " | ||
| + k + " "+_indexes[k-1]+" "+_indexes[k]); | ||
| for( int k=apos; k<apos+alen; k++ ) | ||
| if( _values[k] == 0 ) | ||
| throw new RuntimeException("Wrong sparse row: zero at " | ||
| + k + " at col index " + _indexes[k]); | ||
| } | ||
|
|
||
| //5. non-existing zero values | ||
|
|
@@ -968,11 +965,13 @@ public boolean checkValidity(int rlen, int clen, long nnz, boolean strict) { | |
| throw new RuntimeException("The values array should not contain zeros." | ||
| + " The " + i + "th value is "+_values[i]); | ||
| } | ||
| if(_indexes[i] < 0) | ||
| throw new RuntimeException("Invalid index at pos=" + i); | ||
| } | ||
|
|
||
| //6. a capacity that is no larger than nnz times resize factor. | ||
| int capacity = _values.length; | ||
| if(capacity > nnz*RESIZE_FACTOR1 ) { | ||
| if(capacity > INIT_CAPACITY && capacity > nnz*RESIZE_FACTOR1 ) { | ||
| throw new RuntimeException("Capacity is larger than the nnz times a resize factor." | ||
| + " Current size: "+capacity+ ", while Expected size:"+nnz*RESIZE_FACTOR1); | ||
| } | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Not completely guaranteed if the underlying block is not compacted.
Additionally, while I really like the optimization to leverage the nnz in the comparing vector, the comparing vector also should have the nonzeros at the correct indexes. Perhaps an implementation leveraging materializing a temporary int[] with such offsets, would be faster?