001    /*
002     * Licensed to the Apache Software Foundation (ASF) under one or more
003     * contributor license agreements.  See the NOTICE file distributed with
004     * this work for additional information regarding copyright ownership.
005     * The ASF licenses this file to You under the Apache License, Version 2.0
006     * (the "License"); you may not use this file except in compliance with
007     * the License.  You may obtain a copy of the License at
008     *
009     *      http://www.apache.org/licenses/LICENSE-2.0
010     *
011     * Unless required by applicable law or agreed to in writing, software
012     * distributed under the License is distributed on an "AS IS" BASIS,
013     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014     * See the License for the specific language governing permissions and
015     * limitations under the License.
016     */
017    
018    package org.apache.commons.math.linear;
019    
020    import java.io.Serializable;
021    
022    import org.apache.commons.math.Field;
023    import org.apache.commons.math.FieldElement;
024    import org.apache.commons.math.MathRuntimeException;
025    import org.apache.commons.math.linear.MatrixVisitorException;
026    import org.apache.commons.math.exception.util.LocalizedFormats;
027    import org.apache.commons.math.util.FastMath;
028    
029    /**
030     * Cache-friendly implementation of FieldMatrix using a flat arrays to store
031     * square blocks of the matrix.
032     * <p>
033     * This implementation is specially designed to be cache-friendly. Square blocks are
034     * stored as small arrays and allow efficient traversal of data both in row major direction
035     * and columns major direction, one block at a time. This greatly increases performances
036     * for algorithms that use crossed directions loops like multiplication or transposition.
037     * </p>
038     * <p>
039     * The size of square blocks is a static parameter. It may be tuned according to the cache
040     * size of the target computer processor. As a rule of thumbs, it should be the largest
041     * value that allows three blocks to be simultaneously cached (this is necessary for example
042     * for matrix multiplication). The default value is to use 36x36 blocks.
043     * </p>
044     * <p>
045     * The regular blocks represent {@link #BLOCK_SIZE} x {@link #BLOCK_SIZE} squares. Blocks
046     * at right hand side and bottom side which may be smaller to fit matrix dimensions. The square
047     * blocks are flattened in row major order in single dimension arrays which are therefore
048     * {@link #BLOCK_SIZE}<sup>2</sup> elements long for regular blocks. The blocks are themselves
049     * organized in row major order.
050     * </p>
051     * <p>
052     * As an example, for a block size of 36x36, a 100x60 matrix would be stored in 6 blocks.
053     * Block 0 would be a Field[1296] array holding the upper left 36x36 square, block 1 would be
054     * a Field[1296] array holding the upper center 36x36 square, block 2 would be a Field[1008]
055     * array holding the upper right 36x28 rectangle, block 3 would be a Field[864] array holding
056     * the lower left 24x36 rectangle, block 4 would be a Field[864] array holding the lower center
057     * 24x36 rectangle and block 5 would be a Field[672] array holding the lower right 24x28
058     * rectangle.
059     * </p>
060     * <p>
061     * The layout complexity overhead versus simple mapping of matrices to java
062     * arrays is negligible for small matrices (about 1%). The gain from cache efficiency leads
063     * to up to 3-fold improvements for matrices of moderate to large size.
064     * </p>
065     * @param <T> the type of the field elements
066     * @version $Revision: 1073158 $ $Date: 2011-02-21 22:46:52 +0100 (lun. 21 f??vr. 2011) $
067     * @since 2.0
068     */
069    public class BlockFieldMatrix<T extends FieldElement<T>> extends AbstractFieldMatrix<T> implements Serializable {
070    
071        /** Block size. */
072        public static final int BLOCK_SIZE = 36;
073    
074        /** Serializable version identifier */
075        private static final long serialVersionUID = -4602336630143123183L;
076    
077        /** Blocks of matrix entries. */
078        private final T blocks[][];
079    
080        /** Number of rows of the matrix. */
081        private final int rows;
082    
083        /** Number of columns of the matrix. */
084        private final int columns;
085    
086        /** Number of block rows of the matrix. */
087        private final int blockRows;
088    
089        /** Number of block columns of the matrix. */
090        private final int blockColumns;
091    
092        /**
093         * Create a new matrix with the supplied row and column dimensions.
094         *
095         * @param field field to which the elements belong
096         * @param rows  the number of rows in the new matrix
097         * @param columns  the number of columns in the new matrix
098         * @throws IllegalArgumentException if row or column dimension is not
099         *  positive
100         */
101        public BlockFieldMatrix(final Field<T> field, final int rows, final int columns)
102            throws IllegalArgumentException {
103    
104            super(field, rows, columns);
105            this.rows    = rows;
106            this.columns = columns;
107    
108            // number of blocks
109            blockRows    = (rows    + BLOCK_SIZE - 1) / BLOCK_SIZE;
110            blockColumns = (columns + BLOCK_SIZE - 1) / BLOCK_SIZE;
111    
112            // allocate storage blocks, taking care of smaller ones at right and bottom
113            blocks = createBlocksLayout(field, rows, columns);
114    
115        }
116    
117        /**
118         * Create a new dense matrix copying entries from raw layout data.
119         * <p>The input array <em>must</em> already be in raw layout.</p>
120         * <p>Calling this constructor is equivalent to call:
121         * <pre>matrix = new BlockFieldMatrix<T>(getField(), rawData.length, rawData[0].length,
122         *                                   toBlocksLayout(rawData), false);</pre>
123         * </p>
124         * @param rawData data for new matrix, in raw layout
125         *
126         * @exception IllegalArgumentException if <code>blockData</code> shape is
127         * inconsistent with block layout
128         * @see #BlockFieldMatrix(int, int, FieldElement[][], boolean)
129         */
130        public BlockFieldMatrix(final T[][] rawData)
131            throws IllegalArgumentException {
132            this(rawData.length, rawData[0].length, toBlocksLayout(rawData), false);
133        }
134    
135        /**
136         * Create a new dense matrix copying entries from block layout data.
137         * <p>The input array <em>must</em> already be in blocks layout.</p>
138         * @param rows  the number of rows in the new matrix
139         * @param columns  the number of columns in the new matrix
140         * @param blockData data for new matrix
141         * @param copyArray if true, the input array will be copied, otherwise
142         * it will be referenced
143         *
144         * @exception IllegalArgumentException if <code>blockData</code> shape is
145         * inconsistent with block layout
146         * @see #createBlocksLayout(Field, int, int)
147         * @see #toBlocksLayout(FieldElement[][])
148         * @see #BlockFieldMatrix(FieldElement[][])
149         */
150        public BlockFieldMatrix(final int rows, final int columns,
151                                final T[][] blockData, final boolean copyArray)
152            throws IllegalArgumentException {
153    
154            super(extractField(blockData), rows, columns);
155            this.rows    = rows;
156            this.columns = columns;
157    
158            // number of blocks
159            blockRows    = (rows    + BLOCK_SIZE - 1) / BLOCK_SIZE;
160            blockColumns = (columns + BLOCK_SIZE - 1) / BLOCK_SIZE;
161    
162            if (copyArray) {
163                // allocate storage blocks, taking care of smaller ones at right and bottom
164                blocks = buildArray(getField(), blockRows * blockColumns, -1);
165            } else {
166                // reference existing array
167                blocks = blockData;
168            }
169    
170            int index = 0;
171            for (int iBlock = 0; iBlock < blockRows; ++iBlock) {
172                final int iHeight = blockHeight(iBlock);
173                for (int jBlock = 0; jBlock < blockColumns; ++jBlock, ++index) {
174                    if (blockData[index].length != iHeight * blockWidth(jBlock)) {
175                        throw MathRuntimeException.createIllegalArgumentException(
176                                LocalizedFormats.WRONG_BLOCK_LENGTH,
177                                blockData[index].length, iHeight * blockWidth(jBlock));
178                    }
179                    if (copyArray) {
180                        blocks[index] = blockData[index].clone();
181                    }
182                }
183            }
184    
185        }
186    
187        /**
188         * Convert a data array from raw layout to blocks layout.
189         * <p>
190         * Raw layout is the straightforward layout where element at row i and
191         * column j is in array element <code>rawData[i][j]</code>. Blocks layout
192         * is the layout used in {@link BlockFieldMatrix} instances, where the matrix
193         * is split in square blocks (except at right and bottom side where blocks may
194         * be rectangular to fit matrix size) and each block is stored in a flattened
195         * one-dimensional array.
196         * </p>
197         * <p>
198         * This method creates an array in blocks layout from an input array in raw layout.
199         * It can be used to provide the array argument of the {@link
200         * #BlockFieldMatrix(int, int, FieldElement[][], boolean)}
201         * constructor.
202         * </p>
203         * @param <T> the type of the field elements
204         * @param rawData data array in raw layout
205         * @return a new data array containing the same entries but in blocks layout
206         * @exception IllegalArgumentException if <code>rawData</code> is not rectangular
207         *  (not all rows have the same length)
208         * @see #createBlocksLayout(Field, int, int)
209         * @see #BlockFieldMatrix(int, int, FieldElement[][], boolean)
210         */
211        public static <T extends FieldElement<T>> T[][] toBlocksLayout(final T[][] rawData)
212            throws IllegalArgumentException {
213    
214            final int rows         = rawData.length;
215            final int columns      = rawData[0].length;
216            final int blockRows    = (rows    + BLOCK_SIZE - 1) / BLOCK_SIZE;
217            final int blockColumns = (columns + BLOCK_SIZE - 1) / BLOCK_SIZE;
218    
219            // safety checks
220            for (int i = 0; i < rawData.length; ++i) {
221                final int length = rawData[i].length;
222                if (length != columns) {
223                    throw MathRuntimeException.createIllegalArgumentException(
224                            LocalizedFormats.DIFFERENT_ROWS_LENGTHS,
225                            columns, length);
226                }
227            }
228    
229            // convert array
230            final Field<T> field = extractField(rawData);
231            final T[][] blocks = buildArray(field, blockRows * blockColumns, -1);
232            int blockIndex = 0;
233            for (int iBlock = 0; iBlock < blockRows; ++iBlock) {
234                final int pStart  = iBlock * BLOCK_SIZE;
235                final int pEnd    = FastMath.min(pStart + BLOCK_SIZE, rows);
236                final int iHeight = pEnd - pStart;
237                for (int jBlock = 0; jBlock < blockColumns; ++jBlock) {
238                    final int qStart = jBlock * BLOCK_SIZE;
239                    final int qEnd   = FastMath.min(qStart + BLOCK_SIZE, columns);
240                    final int jWidth = qEnd - qStart;
241    
242                    // allocate new block
243                    final T[] block = buildArray(field, iHeight * jWidth);
244                    blocks[blockIndex] = block;
245    
246                    // copy data
247                    int index = 0;
248                    for (int p = pStart; p < pEnd; ++p) {
249                        System.arraycopy(rawData[p], qStart, block, index, jWidth);
250                        index += jWidth;
251                    }
252    
253                    ++blockIndex;
254    
255                }
256            }
257    
258            return blocks;
259    
260        }
261    
262        /**
263         * Create a data array in blocks layout.
264         * <p>
265         * This method can be used to create the array argument of the {@link
266         * #BlockFieldMatrix(int, int, FieldElement[][], boolean)}
267         * constructor.
268         * </p>
269         * @param <T> the type of the field elements
270         * @param field field to which the elements belong
271         * @param rows  the number of rows in the new matrix
272         * @param columns  the number of columns in the new matrix
273         * @return a new data array in blocks layout
274         * @see #toBlocksLayout(FieldElement[][])
275         * @see #BlockFieldMatrix(int, int, FieldElement[][], boolean)
276         */
277        public static <T extends FieldElement<T>> T[][] createBlocksLayout(final Field<T> field,
278                                                                           final int rows, final int columns) {
279    
280            final int blockRows    = (rows    + BLOCK_SIZE - 1) / BLOCK_SIZE;
281            final int blockColumns = (columns + BLOCK_SIZE - 1) / BLOCK_SIZE;
282    
283            final T[][] blocks = buildArray(field, blockRows * blockColumns, -1);
284            int blockIndex = 0;
285            for (int iBlock = 0; iBlock < blockRows; ++iBlock) {
286                final int pStart  = iBlock * BLOCK_SIZE;
287                final int pEnd    = FastMath.min(pStart + BLOCK_SIZE, rows);
288                final int iHeight = pEnd - pStart;
289                for (int jBlock = 0; jBlock < blockColumns; ++jBlock) {
290                    final int qStart = jBlock * BLOCK_SIZE;
291                    final int qEnd   = FastMath.min(qStart + BLOCK_SIZE, columns);
292                    final int jWidth = qEnd - qStart;
293                    blocks[blockIndex] = buildArray(field, iHeight * jWidth);
294                    ++blockIndex;
295                }
296            }
297    
298            return blocks;
299    
300        }
301    
302        /** {@inheritDoc} */
303        @Override
304        public FieldMatrix<T> createMatrix(final int rowDimension, final int columnDimension)
305            throws IllegalArgumentException {
306            return new BlockFieldMatrix<T>(getField(), rowDimension, columnDimension);
307        }
308    
309        /** {@inheritDoc} */
310        @Override
311        public FieldMatrix<T> copy() {
312    
313            // create an empty matrix
314            BlockFieldMatrix<T> copied = new BlockFieldMatrix<T>(getField(), rows, columns);
315    
316            // copy the blocks
317            for (int i = 0; i < blocks.length; ++i) {
318                System.arraycopy(blocks[i], 0, copied.blocks[i], 0, blocks[i].length);
319            }
320    
321            return copied;
322    
323        }
324    
325        /** {@inheritDoc} */
326        @Override
327        public FieldMatrix<T> add(final FieldMatrix<T> m)
328            throws IllegalArgumentException {
329            try {
330                return add((BlockFieldMatrix<T>) m);
331            } catch (ClassCastException cce) {
332    
333                // safety check
334                checkAdditionCompatible(m);
335    
336                final BlockFieldMatrix<T> out = new BlockFieldMatrix<T>(getField(), rows, columns);
337    
338                // perform addition block-wise, to ensure good cache behavior
339                int blockIndex = 0;
340                for (int iBlock = 0; iBlock < out.blockRows; ++iBlock) {
341                    for (int jBlock = 0; jBlock < out.blockColumns; ++jBlock) {
342    
343                        // perform addition on the current block
344                        final T[] outBlock = out.blocks[blockIndex];
345                        final T[] tBlock   = blocks[blockIndex];
346                        final int      pStart   = iBlock * BLOCK_SIZE;
347                        final int      pEnd     = FastMath.min(pStart + BLOCK_SIZE, rows);
348                        final int      qStart   = jBlock * BLOCK_SIZE;
349                        final int      qEnd     = FastMath.min(qStart + BLOCK_SIZE, columns);
350                        int k = 0;
351                        for (int p = pStart; p < pEnd; ++p) {
352                            for (int q = qStart; q < qEnd; ++q) {
353                                outBlock[k] = tBlock[k].add(m.getEntry(p, q));
354                                ++k;
355                            }
356                        }
357    
358                        // go to next block
359                        ++blockIndex;
360    
361                    }
362                }
363    
364                return out;
365    
366            }
367        }
368    
369        /**
370         * Compute the sum of this and <code>m</code>.
371         *
372         * @param m    matrix to be added
373         * @return     this + m
374         * @throws  IllegalArgumentException if m is not the same size as this
375         */
376        public BlockFieldMatrix<T> add(final BlockFieldMatrix<T> m)
377            throws IllegalArgumentException {
378    
379            // safety check
380            checkAdditionCompatible(m);
381    
382            final BlockFieldMatrix<T> out = new BlockFieldMatrix<T>(getField(), rows, columns);
383    
384            // perform addition block-wise, to ensure good cache behavior
385            for (int blockIndex = 0; blockIndex < out.blocks.length; ++blockIndex) {
386                final T[] outBlock = out.blocks[blockIndex];
387                final T[] tBlock   = blocks[blockIndex];
388                final T[] mBlock   = m.blocks[blockIndex];
389                for (int k = 0; k < outBlock.length; ++k) {
390                    outBlock[k] = tBlock[k].add(mBlock[k]);
391                }
392            }
393    
394            return out;
395    
396        }
397    
398        /** {@inheritDoc} */
399        @Override
400        public FieldMatrix<T> subtract(final FieldMatrix<T> m)
401            throws IllegalArgumentException {
402            try {
403                return subtract((BlockFieldMatrix<T>) m);
404            } catch (ClassCastException cce) {
405    
406                // safety check
407                checkSubtractionCompatible(m);
408    
409                final BlockFieldMatrix<T> out = new BlockFieldMatrix<T>(getField(), rows, columns);
410    
411                // perform subtraction block-wise, to ensure good cache behavior
412                int blockIndex = 0;
413                for (int iBlock = 0; iBlock < out.blockRows; ++iBlock) {
414                    for (int jBlock = 0; jBlock < out.blockColumns; ++jBlock) {
415    
416                        // perform subtraction on the current block
417                        final T[] outBlock = out.blocks[blockIndex];
418                        final T[] tBlock   = blocks[blockIndex];
419                        final int      pStart   = iBlock * BLOCK_SIZE;
420                        final int      pEnd     = FastMath.min(pStart + BLOCK_SIZE, rows);
421                        final int      qStart   = jBlock * BLOCK_SIZE;
422                        final int      qEnd     = FastMath.min(qStart + BLOCK_SIZE, columns);
423                        int k = 0;
424                        for (int p = pStart; p < pEnd; ++p) {
425                            for (int q = qStart; q < qEnd; ++q) {
426                                outBlock[k] = tBlock[k].subtract(m.getEntry(p, q));
427                                ++k;
428                            }
429                        }
430    
431                        // go to next block
432                        ++blockIndex;
433    
434                    }
435                }
436    
437                return out;
438    
439            }
440        }
441    
442        /**
443         * Compute this minus <code>m</code>.
444         *
445         * @param m    matrix to be subtracted
446         * @return     this - m
447         * @throws  IllegalArgumentException if m is not the same size as this
448         */
449        public BlockFieldMatrix<T> subtract(final BlockFieldMatrix<T> m)
450            throws IllegalArgumentException {
451    
452            // safety check
453            checkSubtractionCompatible(m);
454    
455            final BlockFieldMatrix<T> out = new BlockFieldMatrix<T>(getField(), rows, columns);
456    
457            // perform subtraction block-wise, to ensure good cache behavior
458            for (int blockIndex = 0; blockIndex < out.blocks.length; ++blockIndex) {
459                final T[] outBlock = out.blocks[blockIndex];
460                final T[] tBlock   = blocks[blockIndex];
461                final T[] mBlock   = m.blocks[blockIndex];
462                for (int k = 0; k < outBlock.length; ++k) {
463                    outBlock[k] = tBlock[k].subtract(mBlock[k]);
464                }
465            }
466    
467            return out;
468    
469        }
470    
471        /** {@inheritDoc} */
472        @Override
473        public FieldMatrix<T> scalarAdd(final T d)
474            throws IllegalArgumentException {
475    
476            final BlockFieldMatrix<T> out = new BlockFieldMatrix<T>(getField(), rows, columns);
477    
478            // perform subtraction block-wise, to ensure good cache behavior
479            for (int blockIndex = 0; blockIndex < out.blocks.length; ++blockIndex) {
480                final T[] outBlock = out.blocks[blockIndex];
481                final T[] tBlock   = blocks[blockIndex];
482                for (int k = 0; k < outBlock.length; ++k) {
483                    outBlock[k] = tBlock[k].add(d);
484                }
485            }
486    
487            return out;
488    
489        }
490    
491        /** {@inheritDoc} */
492        @Override
493        public FieldMatrix<T> scalarMultiply(final T d)
494            throws IllegalArgumentException {
495    
496            final BlockFieldMatrix<T> out = new BlockFieldMatrix<T>(getField(), rows, columns);
497    
498            // perform subtraction block-wise, to ensure good cache behavior
499            for (int blockIndex = 0; blockIndex < out.blocks.length; ++blockIndex) {
500                final T[] outBlock = out.blocks[blockIndex];
501                final T[] tBlock   = blocks[blockIndex];
502                for (int k = 0; k < outBlock.length; ++k) {
503                    outBlock[k] = tBlock[k].multiply(d);
504                }
505            }
506    
507            return out;
508    
509        }
510    
511        /** {@inheritDoc} */
512        @Override
513        public FieldMatrix<T> multiply(final FieldMatrix<T> m)
514            throws IllegalArgumentException {
515            try {
516                return multiply((BlockFieldMatrix<T>) m);
517            } catch (ClassCastException cce) {
518    
519                // safety check
520                checkMultiplicationCompatible(m);
521    
522                final BlockFieldMatrix<T> out = new BlockFieldMatrix<T>(getField(), rows, m.getColumnDimension());
523                final T zero = getField().getZero();
524    
525                // perform multiplication block-wise, to ensure good cache behavior
526                int blockIndex = 0;
527                for (int iBlock = 0; iBlock < out.blockRows; ++iBlock) {
528    
529                    final int pStart = iBlock * BLOCK_SIZE;
530                    final int pEnd   = FastMath.min(pStart + BLOCK_SIZE, rows);
531    
532                    for (int jBlock = 0; jBlock < out.blockColumns; ++jBlock) {
533    
534                        final int qStart = jBlock * BLOCK_SIZE;
535                        final int qEnd   = FastMath.min(qStart + BLOCK_SIZE, m.getColumnDimension());
536    
537                        // select current block
538                        final T[] outBlock = out.blocks[blockIndex];
539    
540                        // perform multiplication on current block
541                        for (int kBlock = 0; kBlock < blockColumns; ++kBlock) {
542                            final int kWidth      = blockWidth(kBlock);
543                            final T[] tBlock = blocks[iBlock * blockColumns + kBlock];
544                            final int rStart      = kBlock * BLOCK_SIZE;
545                            int k = 0;
546                            for (int p = pStart; p < pEnd; ++p) {
547                                final int lStart = (p - pStart) * kWidth;
548                                final int lEnd   = lStart + kWidth;
549                                for (int q = qStart; q < qEnd; ++q) {
550                                    T sum = zero;
551                                    int r = rStart;
552                                    for (int l = lStart; l < lEnd; ++l) {
553                                        sum = sum.add(tBlock[l].multiply(m.getEntry(r, q)));
554                                        ++r;
555                                    }
556                                    outBlock[k] = outBlock[k].add(sum);
557                                    ++k;
558                                }
559                            }
560                        }
561    
562                        // go to next block
563                        ++blockIndex;
564    
565                    }
566                }
567    
568                return out;
569    
570            }
571        }
572    
573        /**
574         * Returns the result of postmultiplying this by m.
575         *
576         * @param m    matrix to postmultiply by
577         * @return     this * m
578         * @throws     IllegalArgumentException
579         *             if columnDimension(this) != rowDimension(m)
580         */
581        public BlockFieldMatrix<T> multiply(BlockFieldMatrix<T> m) throws IllegalArgumentException {
582    
583            // safety check
584            checkMultiplicationCompatible(m);
585    
586            final BlockFieldMatrix<T> out = new BlockFieldMatrix<T>(getField(), rows, m.columns);
587            final T zero = getField().getZero();
588    
589            // perform multiplication block-wise, to ensure good cache behavior
590            int blockIndex = 0;
591            for (int iBlock = 0; iBlock < out.blockRows; ++iBlock) {
592    
593                final int pStart = iBlock * BLOCK_SIZE;
594                final int pEnd   = FastMath.min(pStart + BLOCK_SIZE, rows);
595    
596                for (int jBlock = 0; jBlock < out.blockColumns; ++jBlock) {
597                    final int jWidth = out.blockWidth(jBlock);
598                    final int jWidth2 = jWidth  + jWidth;
599                    final int jWidth3 = jWidth2 + jWidth;
600                    final int jWidth4 = jWidth3 + jWidth;
601    
602                    // select current block
603                    final T[] outBlock = out.blocks[blockIndex];
604    
605                    // perform multiplication on current block
606                    for (int kBlock = 0; kBlock < blockColumns; ++kBlock) {
607                        final int kWidth = blockWidth(kBlock);
608                        final T[] tBlock = blocks[iBlock * blockColumns + kBlock];
609                        final T[] mBlock = m.blocks[kBlock * m.blockColumns + jBlock];
610                        int k = 0;
611                        for (int p = pStart; p < pEnd; ++p) {
612                            final int lStart = (p - pStart) * kWidth;
613                            final int lEnd   = lStart + kWidth;
614                            for (int nStart = 0; nStart < jWidth; ++nStart) {
615                                T sum = zero;
616                                int l = lStart;
617                                int n = nStart;
618                                while (l < lEnd - 3) {
619                                    sum = sum.
620                                          add(tBlock[l].multiply(mBlock[n])).
621                                          add(tBlock[l + 1].multiply(mBlock[n + jWidth])).
622                                          add(tBlock[l + 2].multiply(mBlock[n + jWidth2])).
623                                          add(tBlock[l + 3].multiply(mBlock[n + jWidth3]));
624                                    l += 4;
625                                    n += jWidth4;
626                                }
627                                while (l < lEnd) {
628                                    sum = sum.add(tBlock[l++].multiply(mBlock[n]));
629                                    n += jWidth;
630                                }
631                                outBlock[k] = outBlock[k].add(sum);
632                                ++k;
633                            }
634                        }
635                    }
636    
637                    // go to next block
638                    ++blockIndex;
639    
640                }
641            }
642    
643            return out;
644    
645        }
646    
647        /** {@inheritDoc} */
648        @Override
649        public T[][] getData() {
650    
651            final T[][] data = buildArray(getField(), getRowDimension(), getColumnDimension());
652            final int lastColumns = columns - (blockColumns - 1) * BLOCK_SIZE;
653    
654            for (int iBlock = 0; iBlock < blockRows; ++iBlock) {
655                final int pStart = iBlock * BLOCK_SIZE;
656                final int pEnd   = FastMath.min(pStart + BLOCK_SIZE, rows);
657                int regularPos   = 0;
658                int lastPos      = 0;
659                for (int p = pStart; p < pEnd; ++p) {
660                    final T[] dataP = data[p];
661                    int blockIndex = iBlock * blockColumns;
662                    int dataPos    = 0;
663                    for (int jBlock = 0; jBlock < blockColumns - 1; ++jBlock) {
664                        System.arraycopy(blocks[blockIndex++], regularPos, dataP, dataPos, BLOCK_SIZE);
665                        dataPos += BLOCK_SIZE;
666                    }
667                    System.arraycopy(blocks[blockIndex], lastPos, dataP, dataPos, lastColumns);
668                    regularPos += BLOCK_SIZE;
669                    lastPos    += lastColumns;
670                }
671            }
672    
673            return data;
674    
675        }
676    
677        /** {@inheritDoc} */
678        @Override
679        public FieldMatrix<T> getSubMatrix(final int startRow, final int endRow,
680                                       final int startColumn, final int endColumn)
681            throws MatrixIndexException {
682    
683            // safety checks
684            checkSubMatrixIndex(startRow, endRow, startColumn, endColumn);
685    
686            // create the output matrix
687            final BlockFieldMatrix<T> out =
688                new BlockFieldMatrix<T>(getField(), endRow - startRow + 1, endColumn - startColumn + 1);
689    
690            // compute blocks shifts
691            final int blockStartRow    = startRow    / BLOCK_SIZE;
692            final int rowsShift        = startRow    % BLOCK_SIZE;
693            final int blockStartColumn = startColumn / BLOCK_SIZE;
694            final int columnsShift     = startColumn % BLOCK_SIZE;
695    
696            // perform extraction block-wise, to ensure good cache behavior
697            int pBlock = blockStartRow;
698            for (int iBlock = 0; iBlock < out.blockRows; ++iBlock) {
699                final int iHeight = out.blockHeight(iBlock);
700                int qBlock = blockStartColumn;
701                for (int jBlock = 0; jBlock < out.blockColumns; ++jBlock) {
702                    final int jWidth = out.blockWidth(jBlock);
703    
704                    // handle one block of the output matrix
705                    final int      outIndex = iBlock * out.blockColumns + jBlock;
706                    final T[] outBlock = out.blocks[outIndex];
707                    final int      index    = pBlock * blockColumns + qBlock;
708                    final int      width    = blockWidth(qBlock);
709    
710                    final int heightExcess = iHeight + rowsShift - BLOCK_SIZE;
711                    final int widthExcess  = jWidth + columnsShift - BLOCK_SIZE;
712                    if (heightExcess > 0) {
713                        // the submatrix block spans on two blocks rows from the original matrix
714                        if (widthExcess > 0) {
715                            // the submatrix block spans on two blocks columns from the original matrix
716                            final int width2 = blockWidth(qBlock + 1);
717                            copyBlockPart(blocks[index], width,
718                                          rowsShift, BLOCK_SIZE,
719                                          columnsShift, BLOCK_SIZE,
720                                          outBlock, jWidth, 0, 0);
721                            copyBlockPart(blocks[index + 1], width2,
722                                          rowsShift, BLOCK_SIZE,
723                                          0, widthExcess,
724                                          outBlock, jWidth, 0, jWidth - widthExcess);
725                            copyBlockPart(blocks[index + blockColumns], width,
726                                          0, heightExcess,
727                                          columnsShift, BLOCK_SIZE,
728                                          outBlock, jWidth, iHeight - heightExcess, 0);
729                            copyBlockPart(blocks[index + blockColumns + 1], width2,
730                                          0, heightExcess,
731                                          0, widthExcess,
732                                          outBlock, jWidth, iHeight - heightExcess, jWidth - widthExcess);
733                        } else {
734                            // the submatrix block spans on one block column from the original matrix
735                            copyBlockPart(blocks[index], width,
736                                          rowsShift, BLOCK_SIZE,
737                                          columnsShift, jWidth + columnsShift,
738                                          outBlock, jWidth, 0, 0);
739                            copyBlockPart(blocks[index + blockColumns], width,
740                                          0, heightExcess,
741                                          columnsShift, jWidth + columnsShift,
742                                          outBlock, jWidth, iHeight - heightExcess, 0);
743                        }
744                    } else {
745                        // the submatrix block spans on one block row from the original matrix
746                        if (widthExcess > 0) {
747                            // the submatrix block spans on two blocks columns from the original matrix
748                            final int width2 = blockWidth(qBlock + 1);
749                            copyBlockPart(blocks[index], width,
750                                          rowsShift, iHeight + rowsShift,
751                                          columnsShift, BLOCK_SIZE,
752                                          outBlock, jWidth, 0, 0);
753                            copyBlockPart(blocks[index + 1], width2,
754                                          rowsShift, iHeight + rowsShift,
755                                          0, widthExcess,
756                                          outBlock, jWidth, 0, jWidth - widthExcess);
757                        } else {
758                            // the submatrix block spans on one block column from the original matrix
759                            copyBlockPart(blocks[index], width,
760                                          rowsShift, iHeight + rowsShift,
761                                          columnsShift, jWidth + columnsShift,
762                                          outBlock, jWidth, 0, 0);
763                        }
764                   }
765    
766                    ++qBlock;
767                }
768    
769                ++pBlock;
770    
771            }
772    
773            return out;
774    
775        }
776    
777        /**
778         * Copy a part of a block into another one
779         * <p>This method can be called only when the specified part fits in both
780         * blocks, no verification is done here.</p>
781         * @param srcBlock source block
782         * @param srcWidth source block width ({@link #BLOCK_SIZE} or smaller)
783         * @param srcStartRow start row in the source block
784         * @param srcEndRow end row (exclusive) in the source block
785         * @param srcStartColumn start column in the source block
786         * @param srcEndColumn end column (exclusive) in the source block
787         * @param dstBlock destination block
788         * @param dstWidth destination block width ({@link #BLOCK_SIZE} or smaller)
789         * @param dstStartRow start row in the destination block
790         * @param dstStartColumn start column in the destination block
791         */
792        private void copyBlockPart(final T[] srcBlock, final int srcWidth,
793                                   final int srcStartRow, final int srcEndRow,
794                                   final int srcStartColumn, final int srcEndColumn,
795                                   final T[] dstBlock, final int dstWidth,
796                                   final int dstStartRow, final int dstStartColumn) {
797            final int length = srcEndColumn - srcStartColumn;
798            int srcPos = srcStartRow * srcWidth + srcStartColumn;
799            int dstPos = dstStartRow * dstWidth + dstStartColumn;
800            for (int srcRow = srcStartRow; srcRow < srcEndRow; ++srcRow) {
801                System.arraycopy(srcBlock, srcPos, dstBlock, dstPos, length);
802                srcPos += srcWidth;
803                dstPos += dstWidth;
804            }
805        }
806    
807        /** {@inheritDoc} */
808        @Override
809        public void setSubMatrix(final T[][] subMatrix, final int row, final int column)
810            throws MatrixIndexException {
811    
812            // safety checks
813            final int refLength = subMatrix[0].length;
814            if (refLength < 1) {
815                throw MathRuntimeException.createIllegalArgumentException(LocalizedFormats.AT_LEAST_ONE_COLUMN);
816            }
817            final int endRow    = row + subMatrix.length - 1;
818            final int endColumn = column + refLength - 1;
819            checkSubMatrixIndex(row, endRow, column, endColumn);
820            for (final T[] subRow : subMatrix) {
821                if (subRow.length != refLength) {
822                    throw MathRuntimeException.createIllegalArgumentException(
823                            LocalizedFormats.DIFFERENT_ROWS_LENGTHS,
824                            refLength, subRow.length);
825                }
826            }
827    
828            // compute blocks bounds
829            final int blockStartRow    = row / BLOCK_SIZE;
830            final int blockEndRow      = (endRow + BLOCK_SIZE) / BLOCK_SIZE;
831            final int blockStartColumn = column / BLOCK_SIZE;
832            final int blockEndColumn   = (endColumn + BLOCK_SIZE) / BLOCK_SIZE;
833    
834            // perform copy block-wise, to ensure good cache behavior
835            for (int iBlock = blockStartRow; iBlock < blockEndRow; ++iBlock) {
836                final int iHeight  = blockHeight(iBlock);
837                final int firstRow = iBlock * BLOCK_SIZE;
838                final int iStart   = FastMath.max(row,    firstRow);
839                final int iEnd     = FastMath.min(endRow + 1, firstRow + iHeight);
840    
841                for (int jBlock = blockStartColumn; jBlock < blockEndColumn; ++jBlock) {
842                    final int jWidth      = blockWidth(jBlock);
843                    final int firstColumn = jBlock * BLOCK_SIZE;
844                    final int jStart      = FastMath.max(column,    firstColumn);
845                    final int jEnd        = FastMath.min(endColumn + 1, firstColumn + jWidth);
846                    final int jLength     = jEnd - jStart;
847    
848                    // handle one block, row by row
849                    final T[] block = blocks[iBlock * blockColumns + jBlock];
850                    for (int i = iStart; i < iEnd; ++i) {
851                        System.arraycopy(subMatrix[i - row], jStart - column,
852                                         block, (i - firstRow) * jWidth + (jStart - firstColumn),
853                                         jLength);
854                    }
855    
856                }
857            }
858        }
859    
860        /** {@inheritDoc} */
861        @Override
862        public FieldMatrix<T> getRowMatrix(final int row)
863            throws MatrixIndexException {
864    
865            checkRowIndex(row);
866            final BlockFieldMatrix<T> out = new BlockFieldMatrix<T>(getField(), 1, columns);
867    
868            // perform copy block-wise, to ensure good cache behavior
869            final int iBlock  = row / BLOCK_SIZE;
870            final int iRow    = row - iBlock * BLOCK_SIZE;
871            int outBlockIndex = 0;
872            int outIndex      = 0;
873            T[] outBlock = out.blocks[outBlockIndex];
874            for (int jBlock = 0; jBlock < blockColumns; ++jBlock) {
875                final int jWidth     = blockWidth(jBlock);
876                final T[] block = blocks[iBlock * blockColumns + jBlock];
877                final int available  = outBlock.length - outIndex;
878                if (jWidth > available) {
879                    System.arraycopy(block, iRow * jWidth, outBlock, outIndex, available);
880                    outBlock = out.blocks[++outBlockIndex];
881                    System.arraycopy(block, iRow * jWidth, outBlock, 0, jWidth - available);
882                    outIndex = jWidth - available;
883                } else {
884                    System.arraycopy(block, iRow * jWidth, outBlock, outIndex, jWidth);
885                    outIndex += jWidth;
886                }
887            }
888    
889            return out;
890    
891        }
892    
893        /** {@inheritDoc} */
894        @Override
895        public void setRowMatrix(final int row, final FieldMatrix<T> matrix)
896            throws MatrixIndexException, InvalidMatrixException {
897            try {
898                setRowMatrix(row, (BlockFieldMatrix<T>) matrix);
899            } catch (ClassCastException cce) {
900                super.setRowMatrix(row, matrix);
901            }
902        }
903    
904        /**
905         * Sets the entries in row number <code>row</code>
906         * as a row matrix.  Row indices start at 0.
907         *
908         * @param row the row to be set
909         * @param matrix row matrix (must have one row and the same number of columns
910         * as the instance)
911         * @throws MatrixIndexException if the specified row index is invalid
912         * @throws InvalidMatrixException if the matrix dimensions do not match one
913         * instance row
914         */
915        public void setRowMatrix(final int row, final BlockFieldMatrix<T> matrix)
916            throws MatrixIndexException, InvalidMatrixException {
917    
918            checkRowIndex(row);
919            final int nCols = getColumnDimension();
920            if ((matrix.getRowDimension() != 1) ||
921                (matrix.getColumnDimension() != nCols)) {
922                throw new InvalidMatrixException(
923                        LocalizedFormats.DIMENSIONS_MISMATCH_2x2,
924                        matrix.getRowDimension(), matrix.getColumnDimension(),
925                        1, nCols);
926            }
927    
928            // perform copy block-wise, to ensure good cache behavior
929            final int iBlock = row / BLOCK_SIZE;
930            final int iRow   = row - iBlock * BLOCK_SIZE;
931            int mBlockIndex  = 0;
932            int mIndex       = 0;
933            T[] mBlock  = matrix.blocks[mBlockIndex];
934            for (int jBlock = 0; jBlock < blockColumns; ++jBlock) {
935                final int jWidth     = blockWidth(jBlock);
936                final T[] block = blocks[iBlock * blockColumns + jBlock];
937                final int available  = mBlock.length - mIndex;
938                if (jWidth > available) {
939                    System.arraycopy(mBlock, mIndex, block, iRow * jWidth, available);
940                    mBlock = matrix.blocks[++mBlockIndex];
941                    System.arraycopy(mBlock, 0, block, iRow * jWidth, jWidth - available);
942                    mIndex = jWidth - available;
943                } else {
944                    System.arraycopy(mBlock, mIndex, block, iRow * jWidth, jWidth);
945                    mIndex += jWidth;
946               }
947            }
948    
949        }
950    
951        /** {@inheritDoc} */
952        @Override
953        public FieldMatrix<T> getColumnMatrix(final int column)
954            throws MatrixIndexException {
955    
956            checkColumnIndex(column);
957            final BlockFieldMatrix<T> out = new BlockFieldMatrix<T>(getField(), rows, 1);
958    
959            // perform copy block-wise, to ensure good cache behavior
960            final int jBlock  = column / BLOCK_SIZE;
961            final int jColumn = column - jBlock * BLOCK_SIZE;
962            final int jWidth  = blockWidth(jBlock);
963            int outBlockIndex = 0;
964            int outIndex      = 0;
965            T[] outBlock = out.blocks[outBlockIndex];
966            for (int iBlock = 0; iBlock < blockRows; ++iBlock) {
967                final int iHeight = blockHeight(iBlock);
968                final T[] block = blocks[iBlock * blockColumns + jBlock];
969                for (int i = 0; i < iHeight; ++i) {
970                    if (outIndex >= outBlock.length) {
971                        outBlock = out.blocks[++outBlockIndex];
972                        outIndex = 0;
973                    }
974                    outBlock[outIndex++] = block[i * jWidth + jColumn];
975                }
976            }
977    
978            return out;
979    
980        }
981    
982        /** {@inheritDoc} */
983        @Override
984        public void setColumnMatrix(final int column, final FieldMatrix<T> matrix)
985            throws MatrixIndexException, InvalidMatrixException {
986            try {
987                setColumnMatrix(column, (BlockFieldMatrix<T>) matrix);
988            } catch (ClassCastException cce) {
989                super.setColumnMatrix(column, matrix);
990            }
991        }
992    
993        /**
994         * Sets the entries in column number <code>column</code>
995         * as a column matrix.  Column indices start at 0.
996         *
997         * @param column the column to be set
998         * @param matrix column matrix (must have one column and the same number of rows
999         * as the instance)
1000         * @throws MatrixIndexException if the specified column index is invalid
1001         * @throws InvalidMatrixException if the matrix dimensions do not match one
1002         * instance column
1003         */
1004        void setColumnMatrix(final int column, final BlockFieldMatrix<T> matrix)
1005            throws MatrixIndexException, InvalidMatrixException {
1006    
1007            checkColumnIndex(column);
1008            final int nRows = getRowDimension();
1009            if ((matrix.getRowDimension() != nRows) ||
1010                (matrix.getColumnDimension() != 1)) {
1011                throw new InvalidMatrixException(
1012                        LocalizedFormats.DIMENSIONS_MISMATCH_2x2,
1013                        matrix.getRowDimension(), matrix.getColumnDimension(),
1014                        nRows, 1);
1015            }
1016    
1017            // perform copy block-wise, to ensure good cache behavior
1018            final int jBlock  = column / BLOCK_SIZE;
1019            final int jColumn = column - jBlock * BLOCK_SIZE;
1020            final int jWidth  = blockWidth(jBlock);
1021            int mBlockIndex = 0;
1022            int mIndex      = 0;
1023            T[] mBlock = matrix.blocks[mBlockIndex];
1024            for (int iBlock = 0; iBlock < blockRows; ++iBlock) {
1025                final int iHeight = blockHeight(iBlock);
1026                final T[] block = blocks[iBlock * blockColumns + jBlock];
1027                for (int i = 0; i < iHeight; ++i) {
1028                    if (mIndex >= mBlock.length) {
1029                        mBlock = matrix.blocks[++mBlockIndex];
1030                        mIndex = 0;
1031                    }
1032                    block[i * jWidth + jColumn] = mBlock[mIndex++];
1033                }
1034            }
1035    
1036        }
1037    
1038        /** {@inheritDoc} */
1039        @Override
1040        public FieldVector<T> getRowVector(final int row)
1041            throws MatrixIndexException {
1042    
1043            checkRowIndex(row);
1044            final T[] outData = buildArray(getField(), columns);
1045    
1046            // perform copy block-wise, to ensure good cache behavior
1047            final int iBlock  = row / BLOCK_SIZE;
1048            final int iRow    = row - iBlock * BLOCK_SIZE;
1049            int outIndex      = 0;
1050            for (int jBlock = 0; jBlock < blockColumns; ++jBlock) {
1051                final int jWidth     = blockWidth(jBlock);
1052                final T[] block = blocks[iBlock * blockColumns + jBlock];
1053                System.arraycopy(block, iRow * jWidth, outData, outIndex, jWidth);
1054                outIndex += jWidth;
1055            }
1056    
1057            return new ArrayFieldVector<T>(outData, false);
1058    
1059        }
1060    
1061        /** {@inheritDoc} */
1062        @Override
1063        public void setRowVector(final int row, final FieldVector<T> vector)
1064            throws MatrixIndexException, InvalidMatrixException {
1065            try {
1066                setRow(row, ((ArrayFieldVector<T>) vector).getDataRef());
1067            } catch (ClassCastException cce) {
1068                super.setRowVector(row, vector);
1069            }
1070        }
1071    
1072        /** {@inheritDoc} */
1073        @Override
1074        public FieldVector<T> getColumnVector(final int column)
1075            throws MatrixIndexException {
1076    
1077            checkColumnIndex(column);
1078            final T[] outData = buildArray(getField(), rows);
1079    
1080            // perform copy block-wise, to ensure good cache behavior
1081            final int jBlock  = column / BLOCK_SIZE;
1082            final int jColumn = column - jBlock * BLOCK_SIZE;
1083            final int jWidth  = blockWidth(jBlock);
1084            int outIndex      = 0;
1085            for (int iBlock = 0; iBlock < blockRows; ++iBlock) {
1086                final int iHeight = blockHeight(iBlock);
1087                final T[] block = blocks[iBlock * blockColumns + jBlock];
1088                for (int i = 0; i < iHeight; ++i) {
1089                    outData[outIndex++] = block[i * jWidth + jColumn];
1090                }
1091            }
1092    
1093            return new ArrayFieldVector<T>(outData, false);
1094    
1095        }
1096    
1097        /** {@inheritDoc} */
1098        @Override
1099        public void setColumnVector(final int column, final FieldVector<T> vector)
1100            throws MatrixIndexException, InvalidMatrixException {
1101            try {
1102                setColumn(column, ((ArrayFieldVector<T>) vector).getDataRef());
1103            } catch (ClassCastException cce) {
1104                super.setColumnVector(column, vector);
1105            }
1106        }
1107    
1108        /** {@inheritDoc} */
1109        @Override
1110        public T[] getRow(final int row)
1111            throws MatrixIndexException {
1112    
1113            checkRowIndex(row);
1114            final T[] out = buildArray(getField(), columns);
1115    
1116            // perform copy block-wise, to ensure good cache behavior
1117            final int iBlock  = row / BLOCK_SIZE;
1118            final int iRow    = row - iBlock * BLOCK_SIZE;
1119            int outIndex      = 0;
1120            for (int jBlock = 0; jBlock < blockColumns; ++jBlock) {
1121                final int jWidth     = blockWidth(jBlock);
1122                final T[] block = blocks[iBlock * blockColumns + jBlock];
1123                System.arraycopy(block, iRow * jWidth, out, outIndex, jWidth);
1124                outIndex += jWidth;
1125            }
1126    
1127            return out;
1128    
1129        }
1130    
1131        /** {@inheritDoc} */
1132        @Override
1133        public void setRow(final int row, final T[] array)
1134            throws MatrixIndexException, InvalidMatrixException {
1135    
1136            checkRowIndex(row);
1137            final int nCols = getColumnDimension();
1138            if (array.length != nCols) {
1139                throw new InvalidMatrixException(
1140                        LocalizedFormats.DIMENSIONS_MISMATCH_2x2,
1141                        1, array.length, 1, nCols);
1142            }
1143    
1144            // perform copy block-wise, to ensure good cache behavior
1145            final int iBlock  = row / BLOCK_SIZE;
1146            final int iRow    = row - iBlock * BLOCK_SIZE;
1147            int outIndex      = 0;
1148            for (int jBlock = 0; jBlock < blockColumns; ++jBlock) {
1149                final int jWidth     = blockWidth(jBlock);
1150                final T[] block = blocks[iBlock * blockColumns + jBlock];
1151                System.arraycopy(array, outIndex, block, iRow * jWidth, jWidth);
1152                outIndex += jWidth;
1153            }
1154    
1155        }
1156    
1157        /** {@inheritDoc} */
1158        @Override
1159        public T[] getColumn(final int column)
1160            throws MatrixIndexException {
1161    
1162            checkColumnIndex(column);
1163            final T[] out = buildArray(getField(), rows);
1164    
1165            // perform copy block-wise, to ensure good cache behavior
1166            final int jBlock  = column / BLOCK_SIZE;
1167            final int jColumn = column - jBlock * BLOCK_SIZE;
1168            final int jWidth  = blockWidth(jBlock);
1169            int outIndex      = 0;
1170            for (int iBlock = 0; iBlock < blockRows; ++iBlock) {
1171                final int iHeight = blockHeight(iBlock);
1172                final T[] block = blocks[iBlock * blockColumns + jBlock];
1173                for (int i = 0; i < iHeight; ++i) {
1174                    out[outIndex++] = block[i * jWidth + jColumn];
1175                }
1176            }
1177    
1178            return out;
1179    
1180        }
1181    
1182        /** {@inheritDoc} */
1183        @Override
1184        public void setColumn(final int column, final T[] array)
1185            throws MatrixIndexException, InvalidMatrixException {
1186    
1187            checkColumnIndex(column);
1188            final int nRows = getRowDimension();
1189            if (array.length != nRows) {
1190                throw new InvalidMatrixException(
1191                        LocalizedFormats.DIMENSIONS_MISMATCH_2x2,
1192                        array.length, 1, nRows, 1);
1193            }
1194    
1195            // perform copy block-wise, to ensure good cache behavior
1196            final int jBlock  = column / BLOCK_SIZE;
1197            final int jColumn = column - jBlock * BLOCK_SIZE;
1198            final int jWidth  = blockWidth(jBlock);
1199            int outIndex      = 0;
1200            for (int iBlock = 0; iBlock < blockRows; ++iBlock) {
1201                final int iHeight = blockHeight(iBlock);
1202                final T[] block = blocks[iBlock * blockColumns + jBlock];
1203                for (int i = 0; i < iHeight; ++i) {
1204                    block[i * jWidth + jColumn] = array[outIndex++];
1205                }
1206            }
1207    
1208        }
1209    
1210        /** {@inheritDoc} */
1211        @Override
1212        public T getEntry(final int row, final int column)
1213            throws MatrixIndexException {
1214            try {
1215                final int iBlock = row    / BLOCK_SIZE;
1216                final int jBlock = column / BLOCK_SIZE;
1217                final int k      = (row    - iBlock * BLOCK_SIZE) * blockWidth(jBlock) +
1218                                   (column - jBlock * BLOCK_SIZE);
1219                return blocks[iBlock * blockColumns + jBlock][k];
1220            } catch (ArrayIndexOutOfBoundsException e) {
1221                throw new MatrixIndexException(
1222                        LocalizedFormats.NO_SUCH_MATRIX_ENTRY,
1223                        row, column, getRowDimension(), getColumnDimension());
1224            }
1225        }
1226    
1227        /** {@inheritDoc} */
1228        @Override
1229        public void setEntry(final int row, final int column, final T value)
1230            throws MatrixIndexException {
1231            try {
1232                final int iBlock = row    / BLOCK_SIZE;
1233                final int jBlock = column / BLOCK_SIZE;
1234                final int k      = (row    - iBlock * BLOCK_SIZE) * blockWidth(jBlock) +
1235                                   (column - jBlock * BLOCK_SIZE);
1236                blocks[iBlock * blockColumns + jBlock][k] = value;
1237            } catch (ArrayIndexOutOfBoundsException e) {
1238                throw new MatrixIndexException(
1239                        LocalizedFormats.NO_SUCH_MATRIX_ENTRY,
1240                        row, column, getRowDimension(), getColumnDimension());
1241            }
1242        }
1243    
1244        /** {@inheritDoc} */
1245        @Override
1246        public void addToEntry(final int row, final int column, final T increment)
1247            throws MatrixIndexException {
1248            try {
1249                final int iBlock = row    / BLOCK_SIZE;
1250                final int jBlock = column / BLOCK_SIZE;
1251                final int k      = (row    - iBlock * BLOCK_SIZE) * blockWidth(jBlock) +
1252                                   (column - jBlock * BLOCK_SIZE);
1253                final T[] blockIJ = blocks[iBlock * blockColumns + jBlock];
1254                blockIJ[k] = blockIJ[k].add(increment);
1255            } catch (ArrayIndexOutOfBoundsException e) {
1256                throw new MatrixIndexException(
1257                        LocalizedFormats.NO_SUCH_MATRIX_ENTRY,
1258                        row, column, getRowDimension(), getColumnDimension());
1259            }
1260        }
1261    
1262        /** {@inheritDoc} */
1263        @Override
1264        public void multiplyEntry(final int row, final int column, final T factor)
1265            throws MatrixIndexException {
1266            try {
1267                final int iBlock = row    / BLOCK_SIZE;
1268                final int jBlock = column / BLOCK_SIZE;
1269                final int k      = (row    - iBlock * BLOCK_SIZE) * blockWidth(jBlock) +
1270                                   (column - jBlock * BLOCK_SIZE);
1271                final T[] blockIJ = blocks[iBlock * blockColumns + jBlock];
1272                blockIJ[k] = blockIJ[k].multiply(factor);
1273            } catch (ArrayIndexOutOfBoundsException e) {
1274                throw new MatrixIndexException(
1275                        LocalizedFormats.NO_SUCH_MATRIX_ENTRY,
1276                        row, column, getRowDimension(), getColumnDimension());
1277            }
1278        }
1279    
1280        /** {@inheritDoc} */
1281        @Override
1282        public FieldMatrix<T> transpose() {
1283    
1284            final int nRows = getRowDimension();
1285            final int nCols = getColumnDimension();
1286            final BlockFieldMatrix<T> out = new BlockFieldMatrix<T>(getField(), nCols, nRows);
1287    
1288            // perform transpose block-wise, to ensure good cache behavior
1289            int blockIndex = 0;
1290            for (int iBlock = 0; iBlock < blockColumns; ++iBlock) {
1291                for (int jBlock = 0; jBlock < blockRows; ++jBlock) {
1292    
1293                    // transpose current block
1294                    final T[] outBlock = out.blocks[blockIndex];
1295                    final T[] tBlock   = blocks[jBlock * blockColumns + iBlock];
1296                    final int      pStart   = iBlock * BLOCK_SIZE;
1297                    final int      pEnd     = FastMath.min(pStart + BLOCK_SIZE, columns);
1298                    final int      qStart   = jBlock * BLOCK_SIZE;
1299                    final int      qEnd     = FastMath.min(qStart + BLOCK_SIZE, rows);
1300                    int k = 0;
1301                    for (int p = pStart; p < pEnd; ++p) {
1302                        final int lInc = pEnd - pStart;
1303                        int l = p - pStart;
1304                        for (int q = qStart; q < qEnd; ++q) {
1305                            outBlock[k] = tBlock[l];
1306                            ++k;
1307                            l+= lInc;
1308                        }
1309                    }
1310    
1311                    // go to next block
1312                    ++blockIndex;
1313    
1314                }
1315            }
1316    
1317            return out;
1318    
1319        }
1320    
1321        /** {@inheritDoc} */
1322        @Override
1323        public int getRowDimension() {
1324            return rows;
1325        }
1326    
1327        /** {@inheritDoc} */
1328        @Override
1329        public int getColumnDimension() {
1330            return columns;
1331        }
1332    
1333        /** {@inheritDoc} */
1334        @Override
1335        public T[] operate(final T[] v)
1336            throws IllegalArgumentException {
1337    
1338            if (v.length != columns) {
1339                throw MathRuntimeException.createIllegalArgumentException(
1340                        LocalizedFormats.VECTOR_LENGTH_MISMATCH,
1341                        v.length, columns);
1342            }
1343            final T[] out = buildArray(getField(), rows);
1344            final T zero = getField().getZero();
1345    
1346            // perform multiplication block-wise, to ensure good cache behavior
1347            for (int iBlock = 0; iBlock < blockRows; ++iBlock) {
1348                final int pStart = iBlock * BLOCK_SIZE;
1349                final int pEnd   = FastMath.min(pStart + BLOCK_SIZE, rows);
1350                for (int jBlock = 0; jBlock < blockColumns; ++jBlock) {
1351                    final T[] block  = blocks[iBlock * blockColumns + jBlock];
1352                    final int      qStart = jBlock * BLOCK_SIZE;
1353                    final int      qEnd   = FastMath.min(qStart + BLOCK_SIZE, columns);
1354                    int k = 0;
1355                    for (int p = pStart; p < pEnd; ++p) {
1356                        T sum = zero;
1357                        int q = qStart;
1358                        while (q < qEnd - 3) {
1359                            sum = sum.
1360                                  add(block[k].multiply(v[q])).
1361                                  add(block[k + 1].multiply(v[q + 1])).
1362                                  add(block[k + 2].multiply(v[q + 2])).
1363                                  add(block[k + 3].multiply(v[q + 3]));
1364                            k += 4;
1365                            q += 4;
1366                        }
1367                        while (q < qEnd) {
1368                            sum = sum.add(block[k++].multiply(v[q++]));
1369                        }
1370                        out[p] = out[p].add(sum);
1371                    }
1372                }
1373            }
1374    
1375            return out;
1376    
1377        }
1378    
1379        /** {@inheritDoc} */
1380        @Override
1381        public T[] preMultiply(final T[] v)
1382            throws IllegalArgumentException {
1383    
1384            if (v.length != rows) {
1385                throw MathRuntimeException.createIllegalArgumentException(
1386                        LocalizedFormats.VECTOR_LENGTH_MISMATCH,
1387                        v.length, rows);
1388            }
1389            final T[] out = buildArray(getField(), columns);
1390            final T zero = getField().getZero();
1391    
1392            // perform multiplication block-wise, to ensure good cache behavior
1393            for (int jBlock = 0; jBlock < blockColumns; ++jBlock) {
1394                final int jWidth  = blockWidth(jBlock);
1395                final int jWidth2 = jWidth  + jWidth;
1396                final int jWidth3 = jWidth2 + jWidth;
1397                final int jWidth4 = jWidth3 + jWidth;
1398                final int qStart = jBlock * BLOCK_SIZE;
1399                final int qEnd   = FastMath.min(qStart + BLOCK_SIZE, columns);
1400                for (int iBlock = 0; iBlock < blockRows; ++iBlock) {
1401                    final T[] block  = blocks[iBlock * blockColumns + jBlock];
1402                    final int      pStart = iBlock * BLOCK_SIZE;
1403                    final int      pEnd   = FastMath.min(pStart + BLOCK_SIZE, rows);
1404                    for (int q = qStart; q < qEnd; ++q) {
1405                        int k = q - qStart;
1406                        T sum = zero;
1407                        int p = pStart;
1408                        while (p < pEnd - 3) {
1409                            sum = sum.
1410                                  add(block[k].multiply(v[p])).
1411                                  add(block[k + jWidth].multiply(v[p + 1])).
1412                                  add(block[k + jWidth2].multiply(v[p + 2])).
1413                                  add(block[k + jWidth3].multiply(v[p + 3]));
1414                            k += jWidth4;
1415                            p += 4;
1416                        }
1417                        while (p < pEnd) {
1418                            sum = sum.add(block[k].multiply(v[p++]));
1419                            k += jWidth;
1420                        }
1421                        out[q] = out[q].add(sum);
1422                    }
1423                }
1424            }
1425    
1426            return out;
1427    
1428        }
1429    
1430        /** {@inheritDoc} */
1431        @Override
1432        public T walkInRowOrder(final FieldMatrixChangingVisitor<T> visitor)
1433            throws MatrixVisitorException {
1434            visitor.start(rows, columns, 0, rows - 1, 0, columns - 1);
1435            for (int iBlock = 0; iBlock < blockRows; ++iBlock) {
1436                final int pStart = iBlock * BLOCK_SIZE;
1437                final int pEnd   = FastMath.min(pStart + BLOCK_SIZE, rows);
1438                for (int p = pStart; p < pEnd; ++p) {
1439                    for (int jBlock = 0; jBlock < blockColumns; ++jBlock) {
1440                        final int jWidth = blockWidth(jBlock);
1441                        final int qStart = jBlock * BLOCK_SIZE;
1442                        final int qEnd   = FastMath.min(qStart + BLOCK_SIZE, columns);
1443                        final T[] block = blocks[iBlock * blockColumns + jBlock];
1444                        int k = (p - pStart) * jWidth;
1445                        for (int q = qStart; q < qEnd; ++q) {
1446                            block[k] = visitor.visit(p, q, block[k]);
1447                            ++k;
1448                        }
1449                    }
1450                 }
1451            }
1452            return visitor.end();
1453        }
1454    
1455        /** {@inheritDoc} */
1456        @Override
1457        public T walkInRowOrder(final FieldMatrixPreservingVisitor<T> visitor)
1458            throws MatrixVisitorException {
1459            visitor.start(rows, columns, 0, rows - 1, 0, columns - 1);
1460            for (int iBlock = 0; iBlock < blockRows; ++iBlock) {
1461                final int pStart = iBlock * BLOCK_SIZE;
1462                final int pEnd   = FastMath.min(pStart + BLOCK_SIZE, rows);
1463                for (int p = pStart; p < pEnd; ++p) {
1464                    for (int jBlock = 0; jBlock < blockColumns; ++jBlock) {
1465                        final int jWidth = blockWidth(jBlock);
1466                        final int qStart = jBlock * BLOCK_SIZE;
1467                        final int qEnd   = FastMath.min(qStart + BLOCK_SIZE, columns);
1468                        final T[] block = blocks[iBlock * blockColumns + jBlock];
1469                        int k = (p - pStart) * jWidth;
1470                        for (int q = qStart; q < qEnd; ++q) {
1471                            visitor.visit(p, q, block[k]);
1472                            ++k;
1473                        }
1474                    }
1475                 }
1476            }
1477            return visitor.end();
1478        }
1479    
1480        /** {@inheritDoc} */
1481        @Override
1482        public T walkInRowOrder(final FieldMatrixChangingVisitor<T> visitor,
1483                                     final int startRow, final int endRow,
1484                                     final int startColumn, final int endColumn)
1485            throws MatrixIndexException, MatrixVisitorException {
1486            checkSubMatrixIndex(startRow, endRow, startColumn, endColumn);
1487            visitor.start(rows, columns, startRow, endRow, startColumn, endColumn);
1488            for (int iBlock = startRow / BLOCK_SIZE; iBlock < 1 + endRow / BLOCK_SIZE; ++iBlock) {
1489                final int p0     = iBlock * BLOCK_SIZE;
1490                final int pStart = FastMath.max(startRow, p0);
1491                final int pEnd   = FastMath.min((iBlock + 1) * BLOCK_SIZE, 1 + endRow);
1492                for (int p = pStart; p < pEnd; ++p) {
1493                    for (int jBlock = startColumn / BLOCK_SIZE; jBlock < 1 + endColumn / BLOCK_SIZE; ++jBlock) {
1494                        final int jWidth = blockWidth(jBlock);
1495                        final int q0     = jBlock * BLOCK_SIZE;
1496                        final int qStart = FastMath.max(startColumn, q0);
1497                        final int qEnd   = FastMath.min((jBlock + 1) * BLOCK_SIZE, 1 + endColumn);
1498                        final T[] block = blocks[iBlock * blockColumns + jBlock];
1499                        int k = (p - p0) * jWidth + qStart - q0;
1500                        for (int q = qStart; q < qEnd; ++q) {
1501                            block[k] = visitor.visit(p, q, block[k]);
1502                            ++k;
1503                        }
1504                    }
1505                 }
1506            }
1507            return visitor.end();
1508        }
1509    
1510        /** {@inheritDoc} */
1511        @Override
1512        public T walkInRowOrder(final FieldMatrixPreservingVisitor<T> visitor,
1513                                     final int startRow, final int endRow,
1514                                     final int startColumn, final int endColumn)
1515            throws MatrixIndexException, MatrixVisitorException {
1516            checkSubMatrixIndex(startRow, endRow, startColumn, endColumn);
1517            visitor.start(rows, columns, startRow, endRow, startColumn, endColumn);
1518            for (int iBlock = startRow / BLOCK_SIZE; iBlock < 1 + endRow / BLOCK_SIZE; ++iBlock) {
1519                final int p0     = iBlock * BLOCK_SIZE;
1520                final int pStart = FastMath.max(startRow, p0);
1521                final int pEnd   = FastMath.min((iBlock + 1) * BLOCK_SIZE, 1 + endRow);
1522                for (int p = pStart; p < pEnd; ++p) {
1523                    for (int jBlock = startColumn / BLOCK_SIZE; jBlock < 1 + endColumn / BLOCK_SIZE; ++jBlock) {
1524                        final int jWidth = blockWidth(jBlock);
1525                        final int q0     = jBlock * BLOCK_SIZE;
1526                        final int qStart = FastMath.max(startColumn, q0);
1527                        final int qEnd   = FastMath.min((jBlock + 1) * BLOCK_SIZE, 1 + endColumn);
1528                        final T[] block = blocks[iBlock * blockColumns + jBlock];
1529                        int k = (p - p0) * jWidth + qStart - q0;
1530                        for (int q = qStart; q < qEnd; ++q) {
1531                            visitor.visit(p, q, block[k]);
1532                            ++k;
1533                        }
1534                    }
1535                 }
1536            }
1537            return visitor.end();
1538        }
1539    
1540        /** {@inheritDoc} */
1541        @Override
1542        public T walkInOptimizedOrder(final FieldMatrixChangingVisitor<T> visitor)
1543            throws MatrixVisitorException {
1544            visitor.start(rows, columns, 0, rows - 1, 0, columns - 1);
1545            int blockIndex = 0;
1546            for (int iBlock = 0; iBlock < blockRows; ++iBlock) {
1547                final int pStart = iBlock * BLOCK_SIZE;
1548                final int pEnd   = FastMath.min(pStart + BLOCK_SIZE, rows);
1549                for (int jBlock = 0; jBlock < blockColumns; ++jBlock) {
1550                    final int qStart = jBlock * BLOCK_SIZE;
1551                    final int qEnd   = FastMath.min(qStart + BLOCK_SIZE, columns);
1552                    final T[] block = blocks[blockIndex];
1553                    int k = 0;
1554                    for (int p = pStart; p < pEnd; ++p) {
1555                        for (int q = qStart; q < qEnd; ++q) {
1556                            block[k] = visitor.visit(p, q, block[k]);
1557                            ++k;
1558                        }
1559                    }
1560                    ++blockIndex;
1561                }
1562            }
1563            return visitor.end();
1564        }
1565    
1566        /** {@inheritDoc} */
1567        @Override
1568        public T walkInOptimizedOrder(final FieldMatrixPreservingVisitor<T> visitor)
1569            throws MatrixVisitorException {
1570            visitor.start(rows, columns, 0, rows - 1, 0, columns - 1);
1571            int blockIndex = 0;
1572            for (int iBlock = 0; iBlock < blockRows; ++iBlock) {
1573                final int pStart = iBlock * BLOCK_SIZE;
1574                final int pEnd   = FastMath.min(pStart + BLOCK_SIZE, rows);
1575                for (int jBlock = 0; jBlock < blockColumns; ++jBlock) {
1576                    final int qStart = jBlock * BLOCK_SIZE;
1577                    final int qEnd   = FastMath.min(qStart + BLOCK_SIZE, columns);
1578                    final T[] block = blocks[blockIndex];
1579                    int k = 0;
1580                    for (int p = pStart; p < pEnd; ++p) {
1581                        for (int q = qStart; q < qEnd; ++q) {
1582                            visitor.visit(p, q, block[k]);
1583                            ++k;
1584                        }
1585                    }
1586                    ++blockIndex;
1587                }
1588            }
1589            return visitor.end();
1590        }
1591    
1592        /** {@inheritDoc} */
1593        @Override
1594        public T walkInOptimizedOrder(final FieldMatrixChangingVisitor<T> visitor,
1595                                           final int startRow, final int endRow,
1596                                           final int startColumn, final int endColumn)
1597            throws MatrixIndexException, MatrixVisitorException {
1598            checkSubMatrixIndex(startRow, endRow, startColumn, endColumn);
1599            visitor.start(rows, columns, startRow, endRow, startColumn, endColumn);
1600            for (int iBlock = startRow / BLOCK_SIZE; iBlock < 1 + endRow / BLOCK_SIZE; ++iBlock) {
1601                final int p0     = iBlock * BLOCK_SIZE;
1602                final int pStart = FastMath.max(startRow, p0);
1603                final int pEnd   = FastMath.min((iBlock + 1) * BLOCK_SIZE, 1 + endRow);
1604                for (int jBlock = startColumn / BLOCK_SIZE; jBlock < 1 + endColumn / BLOCK_SIZE; ++jBlock) {
1605                    final int jWidth = blockWidth(jBlock);
1606                    final int q0     = jBlock * BLOCK_SIZE;
1607                    final int qStart = FastMath.max(startColumn, q0);
1608                    final int qEnd   = FastMath.min((jBlock + 1) * BLOCK_SIZE, 1 + endColumn);
1609                    final T[] block = blocks[iBlock * blockColumns + jBlock];
1610                    for (int p = pStart; p < pEnd; ++p) {
1611                        int k = (p - p0) * jWidth + qStart - q0;
1612                        for (int q = qStart; q < qEnd; ++q) {
1613                            block[k] = visitor.visit(p, q, block[k]);
1614                            ++k;
1615                        }
1616                    }
1617                }
1618            }
1619            return visitor.end();
1620        }
1621    
1622        /** {@inheritDoc} */
1623        @Override
1624        public T walkInOptimizedOrder(final FieldMatrixPreservingVisitor<T> visitor,
1625                                           final int startRow, final int endRow,
1626                                           final int startColumn, final int endColumn)
1627            throws MatrixIndexException, MatrixVisitorException {
1628            checkSubMatrixIndex(startRow, endRow, startColumn, endColumn);
1629            visitor.start(rows, columns, startRow, endRow, startColumn, endColumn);
1630            for (int iBlock = startRow / BLOCK_SIZE; iBlock < 1 + endRow / BLOCK_SIZE; ++iBlock) {
1631                final int p0     = iBlock * BLOCK_SIZE;
1632                final int pStart = FastMath.max(startRow, p0);
1633                final int pEnd   = FastMath.min((iBlock + 1) * BLOCK_SIZE, 1 + endRow);
1634                for (int jBlock = startColumn / BLOCK_SIZE; jBlock < 1 + endColumn / BLOCK_SIZE; ++jBlock) {
1635                    final int jWidth = blockWidth(jBlock);
1636                    final int q0     = jBlock * BLOCK_SIZE;
1637                    final int qStart = FastMath.max(startColumn, q0);
1638                    final int qEnd   = FastMath.min((jBlock + 1) * BLOCK_SIZE, 1 + endColumn);
1639                    final T[] block = blocks[iBlock * blockColumns + jBlock];
1640                    for (int p = pStart; p < pEnd; ++p) {
1641                        int k = (p - p0) * jWidth + qStart - q0;
1642                        for (int q = qStart; q < qEnd; ++q) {
1643                            visitor.visit(p, q, block[k]);
1644                            ++k;
1645                        }
1646                    }
1647                }
1648            }
1649            return visitor.end();
1650        }
1651    
1652        /**
1653         * Get the height of a block.
1654         * @param blockRow row index (in block sense) of the block
1655         * @return height (number of rows) of the block
1656         */
1657        private int blockHeight(final int blockRow) {
1658            return (blockRow == blockRows - 1) ? rows - blockRow * BLOCK_SIZE : BLOCK_SIZE;
1659        }
1660    
1661        /**
1662         * Get the width of a block.
1663         * @param blockColumn column index (in block sense) of the block
1664         * @return width (number of columns) of the block
1665         */
1666        private int blockWidth(final int blockColumn) {
1667            return (blockColumn == blockColumns - 1) ? columns - blockColumn * BLOCK_SIZE : BLOCK_SIZE;
1668        }
1669    
1670    }