48 #include <m4ri/m4ri.h>
50 #include "m4ri_functions.h"
113 mzd_concat(C->
x, A->
x, B->
x);
137 mzd_stack(C->
x, A->
x, B->
x);
160 mzd_submatrix(S->
x, M->
x, lowr, lowc*M->
w, highr, highc*M->
w);
190 B->
nrows = highr - lowr;
191 B->
ncols = highc - lowc;
192 B->
x = mzd_init_window(A->
x, lowr, B->
w*lowc, highr, B->
w*highc);
205 mzd_free_window(A->
x);
246 #define mzed_sub mzed_add
258 #define _mzed_sub _mzed_add
424 return __mzd_read_bits(A->
x, row, A->
w*col, A->
w);
439 __mzd_xor_bits(A->
x, row, A->
w*col, A->
w, elem);
454 __mzd_clear_bits(A->
x, row, A->
w*col, A->
w);
455 __mzd_xor_bits(A->
x, row, A->
w*col, A->
w, elem);
472 return mzd_cmp(A->
x,B->
x);
484 return mzd_is_zero(A->
x);
516 assert(A->
x->offset == B->
x->offset);
517 assert(start_col < A->ncols);
519 const rci_t start = A->
x->offset + A->
w*start_col;
520 const wi_t startblock = start/m4ri_radix;
521 const word bitmask_begin = __M4RI_RIGHT_BITMASK(m4ri_radix - (start%m4ri_radix));
522 const word bitmask_end = __M4RI_LEFT_BITMASK((A->
x->offset + A->
x->ncols) % m4ri_radix);
524 word *_a = A->
x->rows[ar];
525 const word *_b = B->
x->rows[br];
528 if (A->
x->width - startblock > 1) {
529 _a[startblock] ^= _b[startblock] & bitmask_begin;
530 for(j=startblock+1; j<A->
x->width-1; j++)
532 _a[j] ^= _b[j] & bitmask_end;
534 _a[startblock] ^= _b[startblock] & (bitmask_begin & bitmask_end);
550 assert(start_col < A->ncols);
552 const rci_t start = A->
x->offset + A->
w*start_col;
553 const wi_t startblock = start/m4ri_radix;
554 word *_a = A->
x->rows[r];
555 const word bitmask_begin = __M4RI_RIGHT_BITMASK(m4ri_radix - (start%m4ri_radix));
556 const word bitmask_end = __M4RI_LEFT_BITMASK((A->
x->offset + A->
x->ncols) % m4ri_radix);
557 register word __a = _a[startblock]>>(start%m4ri_radix);
558 register word __t = 0;
562 switch( (start/2) % 32 ) {
563 case 0: __t ^= (X[((__a)& 0x0000000000000003ULL)])<<0; __a >>= 2;
564 case 1: __t ^= (X[((__a)& 0x0000000000000003ULL)])<<2; __a >>= 2;
565 case 2: __t ^= (X[((__a)& 0x0000000000000003ULL)])<<4; __a >>= 2;
566 case 3: __t ^= (X[((__a)& 0x0000000000000003ULL)])<<6; __a >>= 2;
567 case 4: __t ^= (X[((__a)& 0x0000000000000003ULL)])<<8; __a >>= 2;
568 case 5: __t ^= (X[((__a)& 0x0000000000000003ULL)])<<10; __a >>= 2;
569 case 6: __t ^= (X[((__a)& 0x0000000000000003ULL)])<<12; __a >>= 2;
570 case 7: __t ^= (X[((__a)& 0x0000000000000003ULL)])<<14; __a >>= 2;
571 case 8: __t ^= (X[((__a)& 0x0000000000000003ULL)])<<16; __a >>= 2;
572 case 9: __t ^= (X[((__a)& 0x0000000000000003ULL)])<<18; __a >>= 2;
573 case 10: __t ^= (X[((__a)& 0x0000000000000003ULL)])<<20; __a >>= 2;
574 case 11: __t ^= (X[((__a)& 0x0000000000000003ULL)])<<22; __a >>= 2;
575 case 12: __t ^= (X[((__a)& 0x0000000000000003ULL)])<<24; __a >>= 2;
576 case 13: __t ^= (X[((__a)& 0x0000000000000003ULL)])<<26; __a >>= 2;
577 case 14: __t ^= (X[((__a)& 0x0000000000000003ULL)])<<28; __a >>= 2;
578 case 15: __t ^= (X[((__a)& 0x0000000000000003ULL)])<<30; __a >>= 2;
579 case 16: __t ^= (X[((__a)& 0x0000000000000003ULL)])<<32; __a >>= 2;
580 case 17: __t ^= (X[((__a)& 0x0000000000000003ULL)])<<34; __a >>= 2;
581 case 18: __t ^= (X[((__a)& 0x0000000000000003ULL)])<<36; __a >>= 2;
582 case 19: __t ^= (X[((__a)& 0x0000000000000003ULL)])<<38; __a >>= 2;
583 case 20: __t ^= (X[((__a)& 0x0000000000000003ULL)])<<40; __a >>= 2;
584 case 21: __t ^= (X[((__a)& 0x0000000000000003ULL)])<<42; __a >>= 2;
585 case 22: __t ^= (X[((__a)& 0x0000000000000003ULL)])<<44; __a >>= 2;
586 case 23: __t ^= (X[((__a)& 0x0000000000000003ULL)])<<46; __a >>= 2;
587 case 24: __t ^= (X[((__a)& 0x0000000000000003ULL)])<<48; __a >>= 2;
588 case 25: __t ^= (X[((__a)& 0x0000000000000003ULL)])<<50; __a >>= 2;
589 case 26: __t ^= (X[((__a)& 0x0000000000000003ULL)])<<52; __a >>= 2;
590 case 27: __t ^= (X[((__a)& 0x0000000000000003ULL)])<<54; __a >>= 2;
591 case 28: __t ^= (X[((__a)& 0x0000000000000003ULL)])<<56; __a >>= 2;
592 case 29: __t ^= (X[((__a)& 0x0000000000000003ULL)])<<58; __a >>= 2;
593 case 30: __t ^= (X[((__a)& 0x0000000000000003ULL)])<<60; __a >>= 2;
594 case 31: __t ^= (X[((__a)& 0x0000000000000003ULL)])<<62;
break;
595 default: m4ri_die(
"impossible");
597 if(A->
x->width-startblock == 1) {
598 _a[startblock] &= ~(bitmask_begin & bitmask_end);
599 _a[startblock] ^= __t & bitmask_begin & bitmask_end;
602 _a[startblock] &= ~bitmask_begin;
603 _a[startblock] ^= __t & bitmask_begin;
606 for(j=startblock+1; j<A->
x->width -1; j++) {
607 __a = _a[j], __t = 0;
608 __t ^= (X[((__a)& 0x0000000000000003ULL)])<<0; __a >>= 2;
609 __t ^= (X[((__a)& 0x0000000000000003ULL)])<<2; __a >>= 2;
610 __t ^= (X[((__a)& 0x0000000000000003ULL)])<<4; __a >>= 2;
611 __t ^= (X[((__a)& 0x0000000000000003ULL)])<<6; __a >>= 2;
612 __t ^= (X[((__a)& 0x0000000000000003ULL)])<<8; __a >>= 2;
613 __t ^= (X[((__a)& 0x0000000000000003ULL)])<<10; __a >>= 2;
614 __t ^= (X[((__a)& 0x0000000000000003ULL)])<<12; __a >>= 2;
615 __t ^= (X[((__a)& 0x0000000000000003ULL)])<<14; __a >>= 2;
616 __t ^= (X[((__a)& 0x0000000000000003ULL)])<<16; __a >>= 2;
617 __t ^= (X[((__a)& 0x0000000000000003ULL)])<<18; __a >>= 2;
618 __t ^= (X[((__a)& 0x0000000000000003ULL)])<<20; __a >>= 2;
619 __t ^= (X[((__a)& 0x0000000000000003ULL)])<<22; __a >>= 2;
620 __t ^= (X[((__a)& 0x0000000000000003ULL)])<<24; __a >>= 2;
621 __t ^= (X[((__a)& 0x0000000000000003ULL)])<<26; __a >>= 2;
622 __t ^= (X[((__a)& 0x0000000000000003ULL)])<<28; __a >>= 2;
623 __t ^= (X[((__a)& 0x0000000000000003ULL)])<<30; __a >>= 2;
624 __t ^= (X[((__a)& 0x0000000000000003ULL)])<<32; __a >>= 2;
625 __t ^= (X[((__a)& 0x0000000000000003ULL)])<<34; __a >>= 2;
626 __t ^= (X[((__a)& 0x0000000000000003ULL)])<<36; __a >>= 2;
627 __t ^= (X[((__a)& 0x0000000000000003ULL)])<<38; __a >>= 2;
628 __t ^= (X[((__a)& 0x0000000000000003ULL)])<<40; __a >>= 2;
629 __t ^= (X[((__a)& 0x0000000000000003ULL)])<<42; __a >>= 2;
630 __t ^= (X[((__a)& 0x0000000000000003ULL)])<<44; __a >>= 2;
631 __t ^= (X[((__a)& 0x0000000000000003ULL)])<<46; __a >>= 2;
632 __t ^= (X[((__a)& 0x0000000000000003ULL)])<<48; __a >>= 2;
633 __t ^= (X[((__a)& 0x0000000000000003ULL)])<<50; __a >>= 2;
634 __t ^= (X[((__a)& 0x0000000000000003ULL)])<<52; __a >>= 2;
635 __t ^= (X[((__a)& 0x0000000000000003ULL)])<<54; __a >>= 2;
636 __t ^= (X[((__a)& 0x0000000000000003ULL)])<<56; __a >>= 2;
637 __t ^= (X[((__a)& 0x0000000000000003ULL)])<<58; __a >>= 2;
638 __t ^= (X[((__a)& 0x0000000000000003ULL)])<<60; __a >>= 2;
639 __t ^= (X[((__a)& 0x0000000000000003ULL)])<<62;
643 __t = _a[j] & ~bitmask_end;
644 switch((A->
x->offset+A->
x->ncols) % m4ri_radix) {
645 case 0: __t ^= ((word)X[(
int)((_a[j] & 0xC000000000000000ULL)>>62)])<<62;
646 case 62: __t ^= ((word)X[(
int)((_a[j] & 0x3000000000000000ULL)>>60)])<<60;
647 case 60: __t ^= ((word)X[(
int)((_a[j] & 0x0C00000000000000ULL)>>58)])<<58;
648 case 58: __t ^= ((word)X[(
int)((_a[j] & 0x0300000000000000ULL)>>56)])<<56;
649 case 56: __t ^= ((word)X[(
int)((_a[j] & 0x00C0000000000000ULL)>>54)])<<54;
650 case 54: __t ^= ((word)X[(
int)((_a[j] & 0x0030000000000000ULL)>>52)])<<52;
651 case 52: __t ^= ((word)X[(
int)((_a[j] & 0x000C000000000000ULL)>>50)])<<50;
652 case 50: __t ^= ((word)X[(
int)((_a[j] & 0x0003000000000000ULL)>>48)])<<48;
653 case 48: __t ^= ((word)X[(
int)((_a[j] & 0x0000C00000000000ULL)>>46)])<<46;
654 case 46: __t ^= ((word)X[(
int)((_a[j] & 0x0000300000000000ULL)>>44)])<<44;
655 case 44: __t ^= ((word)X[(
int)((_a[j] & 0x00000C0000000000ULL)>>42)])<<42;
656 case 42: __t ^= ((word)X[(
int)((_a[j] & 0x0000030000000000ULL)>>40)])<<40;
657 case 40: __t ^= ((word)X[(
int)((_a[j] & 0x000000C000000000ULL)>>38)])<<38;
658 case 38: __t ^= ((word)X[(
int)((_a[j] & 0x0000003000000000ULL)>>36)])<<36;
659 case 36: __t ^= ((word)X[(
int)((_a[j] & 0x0000000C00000000ULL)>>34)])<<34;
660 case 34: __t ^= ((word)X[(
int)((_a[j] & 0x0000000300000000ULL)>>32)])<<32;
661 case 32: __t ^= ((word)X[(
int)((_a[j] & 0x00000000C0000000ULL)>>30)])<<30;
662 case 30: __t ^= ((word)X[(
int)((_a[j] & 0x0000000030000000ULL)>>28)])<<28;
663 case 28: __t ^= ((word)X[(
int)((_a[j] & 0x000000000C000000ULL)>>26)])<<26;
664 case 26: __t ^= ((word)X[(
int)((_a[j] & 0x0000000003000000ULL)>>24)])<<24;
665 case 24: __t ^= ((word)X[(
int)((_a[j] & 0x0000000000C00000ULL)>>22)])<<22;
666 case 22: __t ^= ((word)X[(
int)((_a[j] & 0x0000000000300000ULL)>>20)])<<20;
667 case 20: __t ^= ((word)X[(
int)((_a[j] & 0x00000000000C0000ULL)>>18)])<<18;
668 case 18: __t ^= ((word)X[(
int)((_a[j] & 0x0000000000030000ULL)>>16)])<<16;
669 case 16: __t ^= ((word)X[(
int)((_a[j] & 0x000000000000C000ULL)>>14)])<<14;
670 case 14: __t ^= ((word)X[(
int)((_a[j] & 0x0000000000003000ULL)>>12)])<<12;
671 case 12: __t ^= ((word)X[(
int)((_a[j] & 0x0000000000000C00ULL)>>10)])<<10;
672 case 10: __t ^= ((word)X[(
int)((_a[j] & 0x0000000000000300ULL)>> 8)])<< 8;
673 case 8: __t ^= ((word)X[(
int)((_a[j] & 0x00000000000000C0ULL)>> 6)])<< 6;
674 case 6: __t ^= ((word)X[(
int)((_a[j] & 0x0000000000000030ULL)>> 4)])<< 4;
675 case 4: __t ^= ((word)X[(
int)((_a[j] & 0x000000000000000CULL)>> 2)])<< 2;
676 case 2: __t ^= ((word)X[(
int)((_a[j] & 0x0000000000000003ULL)>> 0)])<< 0;
680 }
else if(A->
w == 4) {
681 switch( (start/4)%16 ) {
682 case 0: __t ^= (X[((__a)& 0x000000000000000FULL)])<<0; __a >>= 4;
683 case 1: __t ^= (X[((__a)& 0x000000000000000FULL)])<<4; __a >>= 4;
684 case 2: __t ^= (X[((__a)& 0x000000000000000FULL)])<<8; __a >>= 4;
685 case 3: __t ^= (X[((__a)& 0x000000000000000FULL)])<<12; __a >>= 4;
686 case 4: __t ^= (X[((__a)& 0x000000000000000FULL)])<<16; __a >>= 4;
687 case 5: __t ^= (X[((__a)& 0x000000000000000FULL)])<<20; __a >>= 4;
688 case 6: __t ^= (X[((__a)& 0x000000000000000FULL)])<<24; __a >>= 4;
689 case 7: __t ^= (X[((__a)& 0x000000000000000FULL)])<<28; __a >>= 4;
690 case 8: __t ^= (X[((__a)& 0x000000000000000FULL)])<<32; __a >>= 4;
691 case 9: __t ^= (X[((__a)& 0x000000000000000FULL)])<<36; __a >>= 4;
692 case 10: __t ^= (X[((__a)& 0x000000000000000FULL)])<<40; __a >>= 4;
693 case 11: __t ^= (X[((__a)& 0x000000000000000FULL)])<<44; __a >>= 4;
694 case 12: __t ^= (X[((__a)& 0x000000000000000FULL)])<<48; __a >>= 4;
695 case 13: __t ^= (X[((__a)& 0x000000000000000FULL)])<<52; __a >>= 4;
696 case 14: __t ^= (X[((__a)& 0x000000000000000FULL)])<<56; __a >>= 4;
697 case 15: __t ^= (X[((__a)& 0x000000000000000FULL)])<<60;
break;
698 default: m4ri_die(
"impossible");
700 if(A->
x->width-startblock == 1) {
701 _a[startblock] &= ~(bitmask_begin & bitmask_end);
702 _a[startblock] ^= __t & bitmask_begin & bitmask_end;
705 _a[startblock] &= ~bitmask_begin;
706 _a[startblock] ^= __t & bitmask_begin;
709 for(j=startblock+1; j<A->
x->width -1; j++) {
710 __a = _a[j], __t = 0;
711 __t ^= (X[((__a)& 0x000000000000000FULL)])<<0; __a >>= 4;
712 __t ^= (X[((__a)& 0x000000000000000FULL)])<<4; __a >>= 4;
713 __t ^= (X[((__a)& 0x000000000000000FULL)])<<8; __a >>= 4;
714 __t ^= (X[((__a)& 0x000000000000000FULL)])<<12; __a >>= 4;
715 __t ^= (X[((__a)& 0x000000000000000FULL)])<<16; __a >>= 4;
716 __t ^= (X[((__a)& 0x000000000000000FULL)])<<20; __a >>= 4;
717 __t ^= (X[((__a)& 0x000000000000000FULL)])<<24; __a >>= 4;
718 __t ^= (X[((__a)& 0x000000000000000FULL)])<<28; __a >>= 4;
719 __t ^= (X[((__a)& 0x000000000000000FULL)])<<32; __a >>= 4;
720 __t ^= (X[((__a)& 0x000000000000000FULL)])<<36; __a >>= 4;
721 __t ^= (X[((__a)& 0x000000000000000FULL)])<<40; __a >>= 4;
722 __t ^= (X[((__a)& 0x000000000000000FULL)])<<44; __a >>= 4;
723 __t ^= (X[((__a)& 0x000000000000000FULL)])<<48; __a >>= 4;
724 __t ^= (X[((__a)& 0x000000000000000FULL)])<<52; __a >>= 4;
725 __t ^= (X[((__a)& 0x000000000000000FULL)])<<56; __a >>= 4;
726 __t ^= (X[((__a)& 0x000000000000000FULL)])<<60;
730 __t = _a[j] & ~bitmask_end;
731 switch( (A->
x->offset + A->
x->ncols) % m4ri_radix) {
732 case 0: __t ^= ((word)X[(
int)((_a[j] & 0xF000000000000000ULL)>>60)])<<60;
733 case 60: __t ^= ((word)X[(
int)((_a[j] & 0x0F00000000000000ULL)>>56)])<<56;
734 case 56: __t ^= ((word)X[(
int)((_a[j] & 0x00F0000000000000ULL)>>52)])<<52;
735 case 52: __t ^= ((word)X[(
int)((_a[j] & 0x000F000000000000ULL)>>48)])<<48;
736 case 48: __t ^= ((word)X[(
int)((_a[j] & 0x0000F00000000000ULL)>>44)])<<44;
737 case 44: __t ^= ((word)X[(
int)((_a[j] & 0x00000F0000000000ULL)>>40)])<<40;
738 case 40: __t ^= ((word)X[(
int)((_a[j] & 0x000000F000000000ULL)>>36)])<<36;
739 case 36: __t ^= ((word)X[(
int)((_a[j] & 0x0000000F00000000ULL)>>32)])<<32;
740 case 32: __t ^= ((word)X[(
int)((_a[j] & 0x00000000F0000000ULL)>>28)])<<28;
741 case 28: __t ^= ((word)X[(
int)((_a[j] & 0x000000000F000000ULL)>>24)])<<24;
742 case 24: __t ^= ((word)X[(
int)((_a[j] & 0x0000000000F00000ULL)>>20)])<<20;
743 case 20: __t ^= ((word)X[(
int)((_a[j] & 0x00000000000F0000ULL)>>16)])<<16;
744 case 16: __t ^= ((word)X[(
int)((_a[j] & 0x000000000000F000ULL)>>12)])<<12;
745 case 12: __t ^= ((word)X[(
int)((_a[j] & 0x0000000000000F00ULL)>> 8)])<< 8;
746 case 8: __t ^= ((word)X[(
int)((_a[j] & 0x00000000000000F0ULL)>> 4)])<< 4;
747 case 4: __t ^= ((word)X[(
int)((_a[j] & 0x000000000000000FULL)>> 0)])<< 0;
751 }
else if (A->
w == 8) {
753 register word __a0 = _a[startblock]>>(start%m4ri_radix);
755 register word __t0 = 0;
758 switch( (start/8) %8 ) {
759 case 0: __t0 ^= (X[((__a0)& 0x00000000000000FFULL)])<<0; __a0 >>= 8;
760 case 1: __t0 ^= (X[((__a0)& 0x00000000000000FFULL)])<<8; __a0 >>= 8;
761 case 2: __t0 ^= (X[((__a0)& 0x00000000000000FFULL)])<<16; __a0 >>= 8;
762 case 3: __t0 ^= (X[((__a0)& 0x00000000000000FFULL)])<<24; __a0 >>= 8;
763 case 4: __t0 ^= (X[((__a0)& 0x00000000000000FFULL)])<<32; __a0 >>= 8;
764 case 5: __t0 ^= (X[((__a0)& 0x00000000000000FFULL)])<<40; __a0 >>= 8;
765 case 6: __t0 ^= (X[((__a0)& 0x00000000000000FFULL)])<<48; __a0 >>= 8;
766 case 7: __t0 ^= (X[((__a0)& 0x00000000000000FFULL)])<<56;
break;
767 default: m4ri_die(
"impossible");
769 if(A->
x->width-startblock == 1) {
770 _a[startblock] &= ~(bitmask_begin & bitmask_end);
771 _a[startblock] ^= __t0 & bitmask_begin & bitmask_end;
774 _a[startblock] &= ~bitmask_begin;
775 _a[startblock] ^= __t0 & bitmask_begin;
778 for(j=startblock+1; j+2 < A->
x->width; j+=2) {
779 __a0 = _a[j], __t0 = 0;
780 __a1 = _a[j+1], __t1 = 0;
781 __t0 ^= (X[((__a0)& 0x00000000000000FFULL)])<<0; __a0 >>= 8;
782 __t1 ^= (X[((__a1)& 0x00000000000000FFULL)])<<0; __a1 >>= 8;
783 __t0 ^= (X[((__a0)& 0x00000000000000FFULL)])<<8; __a0 >>= 8;
784 __t1 ^= (X[((__a1)& 0x00000000000000FFULL)])<<8; __a1 >>= 8;
785 __t0 ^= (X[((__a0)& 0x00000000000000FFULL)])<<16; __a0 >>= 8;
786 __t1 ^= (X[((__a1)& 0x00000000000000FFULL)])<<16; __a1 >>= 8;
787 __t0 ^= (X[((__a0)& 0x00000000000000FFULL)])<<24; __a0 >>= 8;
788 __t1 ^= (X[((__a1)& 0x00000000000000FFULL)])<<24; __a1 >>= 8;
789 __t0 ^= (X[((__a0)& 0x00000000000000FFULL)])<<32; __a0 >>= 8;
790 __t1 ^= (X[((__a1)& 0x00000000000000FFULL)])<<32; __a1 >>= 8;
791 __t0 ^= (X[((__a0)& 0x00000000000000FFULL)])<<40; __a0 >>= 8;
792 __t1 ^= (X[((__a1)& 0x00000000000000FFULL)])<<40; __a1 >>= 8;
793 __t0 ^= (X[((__a0)& 0x00000000000000FFULL)])<<48; __a0 >>= 8;
794 __t1 ^= (X[((__a1)& 0x00000000000000FFULL)])<<48; __a1 >>= 8;
795 __t0 ^= (X[((__a0)& 0x00000000000000FFULL)])<<56; __a0 >>= 8;
796 __t1 ^= (X[((__a1)& 0x00000000000000FFULL)])<<56;
801 for(; j < A->
x->width-1; j++) {
802 __a0 = _a[j], __t0 = 0;
803 __t0 ^= (X[((__a0)& 0x00000000000000FFULL)])<<0; __a0 >>= 8;
804 __t0 ^= (X[((__a0)& 0x00000000000000FFULL)])<<8; __a0 >>= 8;
805 __t0 ^= (X[((__a0)& 0x00000000000000FFULL)])<<16; __a0 >>= 8;
806 __t0 ^= (X[((__a0)& 0x00000000000000FFULL)])<<24; __a0 >>= 8;
807 __t0 ^= (X[((__a0)& 0x00000000000000FFULL)])<<32; __a0 >>= 8;
808 __t0 ^= (X[((__a0)& 0x00000000000000FFULL)])<<40; __a0 >>= 8;
809 __t0 ^= (X[((__a0)& 0x00000000000000FFULL)])<<48; __a0 >>= 8;
810 __t0 ^= (X[((__a0)& 0x00000000000000FFULL)])<<56;
814 __t = _a[j] & ~bitmask_end;
815 switch( (A->
x->offset + A->
x->ncols) % m4ri_radix ) {
816 case 0: __t ^= ((word)X[(
int)((_a[j] & 0xFF00000000000000ULL)>>56)])<<56;
817 case 56: __t ^= ((word)X[(
int)((_a[j] & 0x00FF000000000000ULL)>>48)])<<48;
818 case 48: __t ^= ((word)X[(
int)((_a[j] & 0x0000FF0000000000ULL)>>40)])<<40;
819 case 40: __t ^= ((word)X[(
int)((_a[j] & 0x000000FF00000000ULL)>>32)])<<32;
820 case 32: __t ^= ((word)X[(
int)((_a[j] & 0x00000000FF000000ULL)>>24)])<<24;
821 case 24: __t ^= ((word)X[(
int)((_a[j] & 0x0000000000FF0000ULL)>>16)])<<16;
822 case 16: __t ^= ((word)X[(
int)((_a[j] & 0x000000000000FF00ULL)>> 8)])<< 8;
823 case 8: __t ^= ((word)X[(
int)((_a[j] & 0x00000000000000FFULL)>> 0)])<< 0;
827 }
else if (A->
w == 16) {
828 switch( (start/16) %4 ) {
829 case 0: __t ^= (X[((__a)& 0x000000000000FFFFULL)])<<0; __a >>= 16;
830 case 1: __t ^= (X[((__a)& 0x000000000000FFFFULL)])<<16; __a >>= 16;
831 case 2: __t ^= (X[((__a)& 0x000000000000FFFFULL)])<<32; __a >>= 16;
832 case 3: __t ^= (X[((__a)& 0x000000000000FFFFULL)])<<48;
break;
833 default: m4ri_die(
"impossible");
835 if(A->
x->width-startblock == 1) {
836 _a[startblock] &= ~(bitmask_begin & bitmask_end);
837 _a[startblock] ^= __t & bitmask_begin & bitmask_end;
840 _a[startblock] &= ~bitmask_begin;
841 _a[startblock] ^= __t & bitmask_begin;
844 for(j=startblock+1; j+4<A->
x->width; j+=4) {
845 __a = _a[j], __t = 0;
846 __t ^= (X[((__a)& 0x000000000000FFFFULL)])<<0; __a >>= 16;
847 __t ^= (X[((__a)& 0x000000000000FFFFULL)])<<16; __a >>= 16;
848 __t ^= (X[((__a)& 0x000000000000FFFFULL)])<<32; __a >>= 16;
849 __t ^= (X[((__a)& 0x000000000000FFFFULL)])<<48;
852 __a = _a[j+1], __t = 0;
853 __t ^= (X[((__a)& 0x000000000000FFFFULL)])<<0; __a >>= 16;
854 __t ^= (X[((__a)& 0x000000000000FFFFULL)])<<16; __a >>= 16;
855 __t ^= (X[((__a)& 0x000000000000FFFFULL)])<<32; __a >>= 16;
856 __t ^= (X[((__a)& 0x000000000000FFFFULL)])<<48;
860 __a = _a[j+2], __t = 0;
861 __t ^= (X[((__a)& 0x000000000000FFFFULL)])<<0; __a >>= 16;
862 __t ^= (X[((__a)& 0x000000000000FFFFULL)])<<16; __a >>= 16;
863 __t ^= (X[((__a)& 0x000000000000FFFFULL)])<<32; __a >>= 16;
864 __t ^= (X[((__a)& 0x000000000000FFFFULL)])<<48;
867 __a = _a[j+3], __t = 0;
868 __t ^= (X[((__a)& 0x000000000000FFFFULL)])<<0; __a >>= 16;
869 __t ^= (X[((__a)& 0x000000000000FFFFULL)])<<16; __a >>= 16;
870 __t ^= (X[((__a)& 0x000000000000FFFFULL)])<<32; __a >>= 16;
871 __t ^= (X[((__a)& 0x000000000000FFFFULL)])<<48;
874 for( ; j<A->
x->width-1; j++) {
875 __a = _a[j], __t = 0;
876 __t ^= (X[((__a)& 0x000000000000FFFFULL)])<<0; __a >>= 16;
877 __t ^= (X[((__a)& 0x000000000000FFFFULL)])<<16; __a >>= 16;
878 __t ^= (X[((__a)& 0x000000000000FFFFULL)])<<32; __a >>= 16;
879 __t ^= (X[((__a)& 0x000000000000FFFFULL)])<<48;
883 __t = _a[j] & ~bitmask_end;
884 switch( (A->
x->offset + A->
x->ncols) % m4ri_radix) {
885 case 0: __t ^= ((word)X[(
int)((_a[j] & 0xFFFF000000000000ULL)>>48)])<<48;
886 case 48: __t ^= ((word)X[(
int)((_a[j] & 0x0000FFFF00000000ULL)>>32)])<<32;
887 case 32: __t ^= ((word)X[(
int)((_a[j] & 0x00000000FFFF0000ULL)>>16)])<<16;
888 case 16: __t ^= ((word)X[(
int)((_a[j] & 0x000000000000FFFFULL)>> 0)])<< 0;
893 for(rci_t j=start_col; j<A->
ncols; j++) {
910 mzd_row_swap(M->
x, rowa, rowb);
928 mzd_copy_row(B->
x, i, A->
x, j);
942 for(rci_t i=0; i<M->
w; i++)
943 mzd_col_swap(M->
x,M->
w*cola+i, M->
w*colb+i);
960 mzd_col_swap_in_rows(A->
x, A->
w*cola+e, A->
w*colb+e, start_row, stop_row);
978 mzd_row_add(M->
x, sourcerow, destrow);
992 return mzd_first_zero_row(A->
x);
1007 mzd_row_clear_offset(M->
x, row, coloffset*M->
w);
1035 #endif //M4RIE_MATRIX_H