aboutsummaryrefslogtreecommitdiffstats
path: root/contrib
diff options
context:
space:
mode:
authorJohn Birrell <jb@FreeBSD.org>1998-03-07 21:27:49 +0000
committerJohn Birrell <jb@FreeBSD.org>1998-03-07 21:27:49 +0000
commitf2b7551c390f41a5b645b2426bb098674042a057 (patch)
tree581d16499c65d0809a2e133ffcfd8678469e1fc4 /contrib
parenta5c9a443bfc2aba56e776d9a22c09b1fed876253 (diff)
downloadsrc-f2b7551c390f41a5b645b2426bb098674042a057.tar.gz
src-f2b7551c390f41a5b645b2426bb098674042a057.zip
Import of the mpn asm source files for alpha from V2.0.2 which
appears to match the other libgmp files in the repository.
Notes
Notes: svn path=/vendor/libgmp/dist/; revision=34204
Diffstat (limited to 'contrib')
-rw-r--r--contrib/libgmp/mpn/alpha/add_n.s120
-rw-r--r--contrib/libgmp/mpn/alpha/addmul_1.s92
-rw-r--r--contrib/libgmp/mpn/alpha/gmp-mparam.h27
-rw-r--r--contrib/libgmp/mpn/alpha/lshift.s109
-rw-r--r--contrib/libgmp/mpn/alpha/mul_1.s85
-rw-r--r--contrib/libgmp/mpn/alpha/rshift.s107
-rw-r--r--contrib/libgmp/mpn/alpha/sub_n.s120
-rw-r--r--contrib/libgmp/mpn/alpha/submul_1.s92
-rw-r--r--contrib/libgmp/mpn/alpha/udiv_qrnnd.S151
9 files changed, 903 insertions, 0 deletions
diff --git a/contrib/libgmp/mpn/alpha/add_n.s b/contrib/libgmp/mpn/alpha/add_n.s
new file mode 100644
index 000000000000..426556e3986e
--- /dev/null
+++ b/contrib/libgmp/mpn/alpha/add_n.s
@@ -0,0 +1,120 @@
+ # Alpha __mpn_add_n -- Add two limb vectors of the same length > 0 and
+ # store sum in a third limb vector.
+
+ # Copyright (C) 1995 Free Software Foundation, Inc.
+
+ # This file is part of the GNU MP Library.
+
+ # The GNU MP Library is free software; you can redistribute it and/or modify
+ # it under the terms of the GNU Library General Public License as published by
+ # the Free Software Foundation; either version 2 of the License, or (at your
+ # option) any later version.
+
+ # The GNU MP Library is distributed in the hope that it will be useful, but
+ # WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ # or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
+ # License for more details.
+
+ # You should have received a copy of the GNU Library General Public License
+ # along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+ # the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+ # MA 02111-1307, USA.
+
+
+ # INPUT PARAMETERS
+ # res_ptr $16
+ # s1_ptr $17
+ # s2_ptr $18
+ # size $19
+
+ .set noreorder
+ .set noat
+.text
+ .align 3
+ .globl __mpn_add_n
+ .ent __mpn_add_n
+__mpn_add_n:
+ .frame $30,0,$26,0
+
+ ldq $3,0($17)
+ ldq $4,0($18)
+
+ subq $19,1,$19
+ and $19,4-1,$2 # number of limbs in first loop
+ bis $31,$31,$0
+ beq $2,.L0 # if multiple of 4 limbs, skip first loop
+
+ subq $19,$2,$19
+
+.Loop0: subq $2,1,$2
+ ldq $5,8($17)
+ addq $4,$0,$4
+ ldq $6,8($18)
+ cmpult $4,$0,$1
+ addq $3,$4,$4
+ cmpult $4,$3,$0
+ stq $4,0($16)
+ or $0,$1,$0
+
+ addq $17,8,$17
+ addq $18,8,$18
+ bis $5,$5,$3
+ bis $6,$6,$4
+ addq $16,8,$16
+ bne $2,.Loop0
+
+.L0: beq $19,.Lend
+
+ .align 3
+.Loop: subq $19,4,$19
+
+ ldq $5,8($17)
+ addq $4,$0,$4
+ ldq $6,8($18)
+ cmpult $4,$0,$1
+ addq $3,$4,$4
+ cmpult $4,$3,$0
+ stq $4,0($16)
+ or $0,$1,$0
+
+ ldq $3,16($17)
+ addq $6,$0,$6
+ ldq $4,16($18)
+ cmpult $6,$0,$1
+ addq $5,$6,$6
+ cmpult $6,$5,$0
+ stq $6,8($16)
+ or $0,$1,$0
+
+ ldq $5,24($17)
+ addq $4,$0,$4
+ ldq $6,24($18)
+ cmpult $4,$0,$1
+ addq $3,$4,$4
+ cmpult $4,$3,$0
+ stq $4,16($16)
+ or $0,$1,$0
+
+ ldq $3,32($17)
+ addq $6,$0,$6
+ ldq $4,32($18)
+ cmpult $6,$0,$1
+ addq $5,$6,$6
+ cmpult $6,$5,$0
+ stq $6,24($16)
+ or $0,$1,$0
+
+ addq $17,32,$17
+ addq $18,32,$18
+ addq $16,32,$16
+ bne $19,.Loop
+
+.Lend: addq $4,$0,$4
+ cmpult $4,$0,$1
+ addq $3,$4,$4
+ cmpult $4,$3,$0
+ stq $4,0($16)
+ or $0,$1,$0
+ ret $31,($26),1
+
+ .end __mpn_add_n
diff --git a/contrib/libgmp/mpn/alpha/addmul_1.s b/contrib/libgmp/mpn/alpha/addmul_1.s
new file mode 100644
index 000000000000..048238ae9d66
--- /dev/null
+++ b/contrib/libgmp/mpn/alpha/addmul_1.s
@@ -0,0 +1,92 @@
+ # Alpha 21064 __mpn_addmul_1 -- Multiply a limb vector with a limb and add
+ # the result to a second limb vector.
+
+ # Copyright (C) 1992, 1994, 1995 Free Software Foundation, Inc.
+
+ # This file is part of the GNU MP Library.
+
+ # The GNU MP Library is free software; you can redistribute it and/or modify
+ # it under the terms of the GNU Library General Public License as published by
+ # the Free Software Foundation; either version 2 of the License, or (at your
+ # option) any later version.
+
+ # The GNU MP Library is distributed in the hope that it will be useful, but
+ # WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ # or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
+ # License for more details.
+
+ # You should have received a copy of the GNU Library General Public License
+ # along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+ # the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+ # MA 02111-1307, USA.
+
+
+ # INPUT PARAMETERS
+ # res_ptr r16
+ # s1_ptr r17
+ # size r18
+ # s2_limb r19
+
+ # This code runs at 42 cycles/limb on EV4 and 18 cycles/limb on EV5.
+
+ .set noreorder
+ .set noat
+.text
+ .align 3
+ .globl __mpn_addmul_1
+ .ent __mpn_addmul_1 2
+__mpn_addmul_1:
+ .frame $30,0,$26
+
+ ldq $2,0($17) # $2 = s1_limb
+ addq $17,8,$17 # s1_ptr++
+ subq $18,1,$18 # size--
+ mulq $2,$19,$3 # $3 = prod_low
+ ldq $5,0($16) # $5 = *res_ptr
+ umulh $2,$19,$0 # $0 = prod_high
+ beq $18,.Lend1 # jump if size was == 1
+ ldq $2,0($17) # $2 = s1_limb
+ addq $17,8,$17 # s1_ptr++
+ subq $18,1,$18 # size--
+ addq $5,$3,$3
+ cmpult $3,$5,$4
+ stq $3,0($16)
+ addq $16,8,$16 # res_ptr++
+ beq $18,.Lend2 # jump if size was == 2
+
+ .align 3
+.Loop: mulq $2,$19,$3 # $3 = prod_low
+ ldq $5,0($16) # $5 = *res_ptr
+ addq $4,$0,$0 # cy_limb = cy_limb + 'cy'
+ subq $18,1,$18 # size--
+ umulh $2,$19,$4 # $4 = cy_limb
+ ldq $2,0($17) # $2 = s1_limb
+ addq $17,8,$17 # s1_ptr++
+ addq $3,$0,$3 # $3 = cy_limb + prod_low
+ cmpult $3,$0,$0 # $0 = carry from (cy_limb + prod_low)
+ addq $5,$3,$3
+ cmpult $3,$5,$5
+ stq $3,0($16)
+ addq $16,8,$16 # res_ptr++
+ addq $5,$0,$0 # combine carries
+ bne $18,.Loop
+
+.Lend2: mulq $2,$19,$3 # $3 = prod_low
+ ldq $5,0($16) # $5 = *res_ptr
+ addq $4,$0,$0 # cy_limb = cy_limb + 'cy'
+ umulh $2,$19,$4 # $4 = cy_limb
+ addq $3,$0,$3 # $3 = cy_limb + prod_low
+ cmpult $3,$0,$0 # $0 = carry from (cy_limb + prod_low)
+ addq $5,$3,$3
+ cmpult $3,$5,$5
+ stq $3,0($16)
+ addq $5,$0,$0 # combine carries
+ addq $4,$0,$0 # cy_limb = prod_high + cy
+ ret $31,($26),1
+.Lend1: addq $5,$3,$3
+ cmpult $3,$5,$5
+ stq $3,0($16)
+ addq $0,$5,$0
+ ret $31,($26),1
+
+ .end __mpn_addmul_1
diff --git a/contrib/libgmp/mpn/alpha/gmp-mparam.h b/contrib/libgmp/mpn/alpha/gmp-mparam.h
new file mode 100644
index 000000000000..a3c66974dede
--- /dev/null
+++ b/contrib/libgmp/mpn/alpha/gmp-mparam.h
@@ -0,0 +1,27 @@
+/* gmp-mparam.h -- Compiler/machine parameter header file.
+
+Copyright (C) 1991, 1993, 1994 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Library General Public License as published by
+the Free Software Foundation; either version 2 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
+License for more details.
+
+You should have received a copy of the GNU Library General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#define BITS_PER_MP_LIMB 64
+#define BYTES_PER_MP_LIMB 8
+#define BITS_PER_LONGINT 64
+#define BITS_PER_INT 32
+#define BITS_PER_SHORTINT 16
+#define BITS_PER_CHAR 8
diff --git a/contrib/libgmp/mpn/alpha/lshift.s b/contrib/libgmp/mpn/alpha/lshift.s
new file mode 100644
index 000000000000..13bd24a4271d
--- /dev/null
+++ b/contrib/libgmp/mpn/alpha/lshift.s
@@ -0,0 +1,109 @@
+ # Alpha 21064 __mpn_lshift --
+
+ # Copyright (C) 1994, 1995 Free Software Foundation, Inc.
+
+ # This file is part of the GNU MP Library.
+
+ # The GNU MP Library is free software; you can redistribute it and/or modify
+ # it under the terms of the GNU Library General Public License as published by
+ # the Free Software Foundation; either version 2 of the License, or (at your
+ # option) any later version.
+
+ # The GNU MP Library is distributed in the hope that it will be useful, but
+ # WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ # or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
+ # License for more details.
+
+ # You should have received a copy of the GNU Library General Public License
+ # along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+ # the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+ # MA 02111-1307, USA.
+
+
+ # INPUT PARAMETERS
+ # res_ptr r16
+ # s1_ptr r17
+ # size r18
+ # cnt r19
+
+ # This code runs at 4.8 cycles/limb on the 21064. With infinite unrolling,
+ # it would take 4 cycles/limb. It should be possible to get down to 3
+ # cycles/limb since both ldq and stq can be paired with the other used
+ # instructions. But there are many restrictions in the 21064 pipeline that
+ # makes it hard, if not impossible, to get down to 3 cycles/limb:
+
+ # 1. ldq has a 3 cycle delay, srl and sll have a 2 cycle delay.
+ # 2. Only aligned instruction pairs can be paired.
+ # 3. The store buffer or silo might not be able to deal with the bandwidth.
+
+ .set noreorder
+ .set noat
+.text
+ .align 3
+ .globl __mpn_lshift
+ .ent __mpn_lshift
+__mpn_lshift:
+ .frame $30,0,$26,0
+
+ s8addq $18,$17,$17 # make r17 point at end of s1
+ ldq $4,-8($17) # load first limb
+ subq $17,8,$17
+ subq $31,$19,$7
+ s8addq $18,$16,$16 # make r16 point at end of RES
+ subq $18,1,$18
+ and $18,4-1,$20 # number of limbs in first loop
+ srl $4,$7,$0 # compute function result
+
+ beq $20,.L0
+ subq $18,$20,$18
+
+ .align 3
+.Loop0:
+ ldq $3,-8($17)
+ subq $16,8,$16
+ subq $17,8,$17
+ subq $20,1,$20
+ sll $4,$19,$5
+ srl $3,$7,$6
+ bis $3,$3,$4
+ bis $5,$6,$8
+ stq $8,0($16)
+ bne $20,.Loop0
+
+.L0: beq $18,.Lend
+
+ .align 3
+.Loop: ldq $3,-8($17)
+ subq $16,32,$16
+ subq $18,4,$18
+ sll $4,$19,$5
+ srl $3,$7,$6
+
+ ldq $4,-16($17)
+ sll $3,$19,$1
+ bis $5,$6,$8
+ stq $8,24($16)
+ srl $4,$7,$2
+
+ ldq $3,-24($17)
+ sll $4,$19,$5
+ bis $1,$2,$8
+ stq $8,16($16)
+ srl $3,$7,$6
+
+ ldq $4,-32($17)
+ sll $3,$19,$1
+ bis $5,$6,$8
+ stq $8,8($16)
+ srl $4,$7,$2
+
+ subq $17,32,$17
+ bis $1,$2,$8
+ stq $8,0($16)
+
+ bgt $18,.Loop
+
+.Lend: sll $4,$19,$8
+ stq $8,-8($16)
+ ret $31,($26),1
+ .end __mpn_lshift
diff --git a/contrib/libgmp/mpn/alpha/mul_1.s b/contrib/libgmp/mpn/alpha/mul_1.s
new file mode 100644
index 000000000000..a1f5a94b9e69
--- /dev/null
+++ b/contrib/libgmp/mpn/alpha/mul_1.s
@@ -0,0 +1,85 @@
+ # Alpha 21064 __mpn_mul_1 -- Multiply a limb vector with a limb and store
+ # the result in a second limb vector.
+
+ # Copyright (C) 1992, 1994, 1995 Free Software Foundation, Inc.
+
+ # This file is part of the GNU MP Library.
+
+ # The GNU MP Library is free software; you can redistribute it and/or modify
+ # it under the terms of the GNU Library General Public License as published by
+ # the Free Software Foundation; either version 2 of the License, or (at your
+ # option) any later version.
+
+ # The GNU MP Library is distributed in the hope that it will be useful, but
+ # WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ # or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
+ # License for more details.
+
+ # You should have received a copy of the GNU Library General Public License
+ # along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+ # the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+ # MA 02111-1307, USA.
+
+
+ # INPUT PARAMETERS
+ # res_ptr r16
+ # s1_ptr r17
+ # size r18
+ # s2_limb r19
+
+ # This code runs at 42 cycles/limb on the EV4 and 18 cycles/limb on the EV5.
+
+ # To improve performance for long multiplications, we would use
+ # 'fetch' for S1 and 'fetch_m' for RES. It's not obvious how to use
+ # these instructions without slowing down the general code: 1. We can
+ # only have two prefetches in operation at any time in the Alpha
+ # architecture. 2. There will seldom be any special alignment
+ # between RES_PTR and S1_PTR. Maybe we can simply divide the current
+ # loop into an inner and outer loop, having the inner loop handle
+ # exactly one prefetch block?
+
+ .set noreorder
+ .set noat
+.text
+ .align 3
+ .globl __mpn_mul_1
+ .ent __mpn_mul_1 2
+__mpn_mul_1:
+ .frame $30,0,$26
+
+ ldq $2,0($17) # $2 = s1_limb
+ subq $18,1,$18 # size--
+ mulq $2,$19,$3 # $3 = prod_low
+ bic $31,$31,$4 # clear cy_limb
+ umulh $2,$19,$0 # $0 = prod_high
+ beq $18,Lend1 # jump if size was == 1
+ ldq $2,8($17) # $2 = s1_limb
+ subq $18,1,$18 # size--
+ stq $3,0($16)
+ beq $18,Lend2 # jump if size was == 2
+
+ .align 3
+Loop: mulq $2,$19,$3 # $3 = prod_low
+ addq $4,$0,$0 # cy_limb = cy_limb + 'cy'
+ subq $18,1,$18 # size--
+ umulh $2,$19,$4 # $4 = cy_limb
+ ldq $2,16($17) # $2 = s1_limb
+ addq $17,8,$17 # s1_ptr++
+ addq $3,$0,$3 # $3 = cy_limb + prod_low
+ stq $3,8($16)
+ cmpult $3,$0,$0 # $0 = carry from (cy_limb + prod_low)
+ addq $16,8,$16 # res_ptr++
+ bne $18,Loop
+
+Lend2: mulq $2,$19,$3 # $3 = prod_low
+ addq $4,$0,$0 # cy_limb = cy_limb + 'cy'
+ umulh $2,$19,$4 # $4 = cy_limb
+ addq $3,$0,$3 # $3 = cy_limb + prod_low
+ cmpult $3,$0,$0 # $0 = carry from (cy_limb + prod_low)
+ stq $3,8($16)
+ addq $4,$0,$0 # cy_limb = prod_high + cy
+ ret $31,($26),1
+Lend1: stq $3,0($16)
+ ret $31,($26),1
+
+ .end __mpn_mul_1
diff --git a/contrib/libgmp/mpn/alpha/rshift.s b/contrib/libgmp/mpn/alpha/rshift.s
new file mode 100644
index 000000000000..389054ab0e92
--- /dev/null
+++ b/contrib/libgmp/mpn/alpha/rshift.s
@@ -0,0 +1,107 @@
+ # Alpha 21064 __mpn_rshift --
+
+ # Copyright (C) 1994, 1995 Free Software Foundation, Inc.
+
+ # This file is part of the GNU MP Library.
+
+ # The GNU MP Library is free software; you can redistribute it and/or modify
+ # it under the terms of the GNU Library General Public License as published by
+ # the Free Software Foundation; either version 2 of the License, or (at your
+ # option) any later version.
+
+ # The GNU MP Library is distributed in the hope that it will be useful, but
+ # WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ # or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
+ # License for more details.
+
+ # You should have received a copy of the GNU Library General Public License
+ # along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+ # the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+ # MA 02111-1307, USA.
+
+
+ # INPUT PARAMETERS
+ # res_ptr r16
+ # s1_ptr r17
+ # size r18
+ # cnt r19
+
+ # This code runs at 4.8 cycles/limb on the 21064. With infinite unrolling,
+ # it would take 4 cycles/limb. It should be possible to get down to 3
+ # cycles/limb since both ldq and stq can be paired with the other used
+ # instructions. But there are many restrictions in the 21064 pipeline that
+ # makes it hard, if not impossible, to get down to 3 cycles/limb:
+
+ # 1. ldq has a 3 cycle delay, srl and sll have a 2 cycle delay.
+ # 2. Only aligned instruction pairs can be paired.
+ # 3. The store buffer or silo might not be able to deal with the bandwidth.
+
+ .set noreorder
+ .set noat
+.text
+ .align 3
+ .globl __mpn_rshift
+ .ent __mpn_rshift
+__mpn_rshift:
+ .frame $30,0,$26,0
+
+ ldq $4,0($17) # load first limb
+ addq $17,8,$17
+ subq $31,$19,$7
+ subq $18,1,$18
+ and $18,4-1,$20 # number of limbs in first loop
+ sll $4,$7,$0 # compute function result
+
+ beq $20,.L0
+ subq $18,$20,$18
+
+ .align 3
+.Loop0:
+ ldq $3,0($17)
+ addq $16,8,$16
+ addq $17,8,$17
+ subq $20,1,$20
+ srl $4,$19,$5
+ sll $3,$7,$6
+ bis $3,$3,$4
+ bis $5,$6,$8
+ stq $8,-8($16)
+ bne $20,.Loop0
+
+.L0: beq $18,.Lend
+
+ .align 3
+.Loop: ldq $3,0($17)
+ addq $16,32,$16
+ subq $18,4,$18
+ srl $4,$19,$5
+ sll $3,$7,$6
+
+ ldq $4,8($17)
+ srl $3,$19,$1
+ bis $5,$6,$8
+ stq $8,-32($16)
+ sll $4,$7,$2
+
+ ldq $3,16($17)
+ srl $4,$19,$5
+ bis $1,$2,$8
+ stq $8,-24($16)
+ sll $3,$7,$6
+
+ ldq $4,24($17)
+ srl $3,$19,$1
+ bis $5,$6,$8
+ stq $8,-16($16)
+ sll $4,$7,$2
+
+ addq $17,32,$17
+ bis $1,$2,$8
+ stq $8,-8($16)
+
+ bgt $18,.Loop
+
+.Lend: srl $4,$19,$8
+ stq $8,0($16)
+ ret $31,($26),1
+ .end __mpn_rshift
diff --git a/contrib/libgmp/mpn/alpha/sub_n.s b/contrib/libgmp/mpn/alpha/sub_n.s
new file mode 100644
index 000000000000..3c90c116973b
--- /dev/null
+++ b/contrib/libgmp/mpn/alpha/sub_n.s
@@ -0,0 +1,120 @@
+ # Alpha __mpn_sub_n -- Subtract two limb vectors of the same length > 0 and
+ # store difference in a third limb vector.
+
+ # Copyright (C) 1995 Free Software Foundation, Inc.
+
+ # This file is part of the GNU MP Library.
+
+ # The GNU MP Library is free software; you can redistribute it and/or modify
+ # it under the terms of the GNU Library General Public License as published by
+ # the Free Software Foundation; either version 2 of the License, or (at your
+ # option) any later version.
+
+ # The GNU MP Library is distributed in the hope that it will be useful, but
+ # WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ # or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
+ # License for more details.
+
+ # You should have received a copy of the GNU Library General Public License
+ # along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+ # the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+ # MA 02111-1307, USA.
+
+
+ # INPUT PARAMETERS
+ # res_ptr $16
+ # s1_ptr $17
+ # s2_ptr $18
+ # size $19
+
+ .set noreorder
+ .set noat
+.text
+ .align 3
+ .globl __mpn_sub_n
+ .ent __mpn_sub_n
+__mpn_sub_n:
+ .frame $30,0,$26,0
+
+ ldq $3,0($17)
+ ldq $4,0($18)
+
+ subq $19,1,$19
+ and $19,4-1,$2 # number of limbs in first loop
+ bis $31,$31,$0
+ beq $2,.L0 # if multiple of 4 limbs, skip first loop
+
+ subq $19,$2,$19
+
+.Loop0: subq $2,1,$2
+ ldq $5,8($17)
+ addq $4,$0,$4
+ ldq $6,8($18)
+ cmpult $4,$0,$1
+ subq $3,$4,$4
+ cmpult $3,$4,$0
+ stq $4,0($16)
+ or $0,$1,$0
+
+ addq $17,8,$17
+ addq $18,8,$18
+ bis $5,$5,$3
+ bis $6,$6,$4
+ addq $16,8,$16
+ bne $2,.Loop0
+
+.L0: beq $19,.Lend
+
+ .align 3
+.Loop: subq $19,4,$19
+
+ ldq $5,8($17)
+ addq $4,$0,$4
+ ldq $6,8($18)
+ cmpult $4,$0,$1
+ subq $3,$4,$4
+ cmpult $3,$4,$0
+ stq $4,0($16)
+ or $0,$1,$0
+
+ ldq $3,16($17)
+ addq $6,$0,$6
+ ldq $4,16($18)
+ cmpult $6,$0,$1
+ subq $5,$6,$6
+ cmpult $5,$6,$0
+ stq $6,8($16)
+ or $0,$1,$0
+
+ ldq $5,24($17)
+ addq $4,$0,$4
+ ldq $6,24($18)
+ cmpult $4,$0,$1
+ subq $3,$4,$4
+ cmpult $3,$4,$0
+ stq $4,16($16)
+ or $0,$1,$0
+
+ ldq $3,32($17)
+ addq $6,$0,$6
+ ldq $4,32($18)
+ cmpult $6,$0,$1
+ subq $5,$6,$6
+ cmpult $5,$6,$0
+ stq $6,24($16)
+ or $0,$1,$0
+
+ addq $17,32,$17
+ addq $18,32,$18
+ addq $16,32,$16
+ bne $19,.Loop
+
+.Lend: addq $4,$0,$4
+ cmpult $4,$0,$1
+ subq $3,$4,$4
+ cmpult $3,$4,$0
+ stq $4,0($16)
+ or $0,$1,$0
+ ret $31,($26),1
+
+ .end __mpn_sub_n
diff --git a/contrib/libgmp/mpn/alpha/submul_1.s b/contrib/libgmp/mpn/alpha/submul_1.s
new file mode 100644
index 000000000000..1ed0c6a8d9e8
--- /dev/null
+++ b/contrib/libgmp/mpn/alpha/submul_1.s
@@ -0,0 +1,92 @@
+ # Alpha 21064 __mpn_submul_1 -- Multiply a limb vector with a limb and
+ # subtract the result from a second limb vector.
+
+ # Copyright (C) 1992, 1994, 1995 Free Software Foundation, Inc.
+
+ # This file is part of the GNU MP Library.
+
+ # The GNU MP Library is free software; you can redistribute it and/or modify
+ # it under the terms of the GNU Library General Public License as published by
+ # the Free Software Foundation; either version 2 of the License, or (at your
+ # option) any later version.
+
+ # The GNU MP Library is distributed in the hope that it will be useful, but
+ # WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ # or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
+ # License for more details.
+
+ # You should have received a copy of the GNU Library General Public License
+ # along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+ # the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+ # MA 02111-1307, USA.
+
+
+ # INPUT PARAMETERS
+ # res_ptr r16
+ # s1_ptr r17
+ # size r18
+ # s2_limb r19
+
+ # This code runs at 42 cycles/limb on EV4 and 18 cycles/limb on EV5.
+
+ .set noreorder
+ .set noat
+.text
+ .align 3
+ .globl __mpn_submul_1
+ .ent __mpn_submul_1 2
+__mpn_submul_1:
+ .frame $30,0,$26
+
+ ldq $2,0($17) # $2 = s1_limb
+ addq $17,8,$17 # s1_ptr++
+ subq $18,1,$18 # size--
+ mulq $2,$19,$3 # $3 = prod_low
+ ldq $5,0($16) # $5 = *res_ptr
+ umulh $2,$19,$0 # $0 = prod_high
+ beq $18,.Lend1 # jump if size was == 1
+ ldq $2,0($17) # $2 = s1_limb
+ addq $17,8,$17 # s1_ptr++
+ subq $18,1,$18 # size--
+ subq $5,$3,$3
+ cmpult $5,$3,$4
+ stq $3,0($16)
+ addq $16,8,$16 # res_ptr++
+ beq $18,.Lend2 # jump if size was == 2
+
+ .align 3
+.Loop: mulq $2,$19,$3 # $3 = prod_low
+ ldq $5,0($16) # $5 = *res_ptr
+ addq $4,$0,$0 # cy_limb = cy_limb + 'cy'
+ subq $18,1,$18 # size--
+ umulh $2,$19,$4 # $4 = cy_limb
+ ldq $2,0($17) # $2 = s1_limb
+ addq $17,8,$17 # s1_ptr++
+ addq $3,$0,$3 # $3 = cy_limb + prod_low
+ cmpult $3,$0,$0 # $0 = carry from (cy_limb + prod_low)
+ subq $5,$3,$3
+ cmpult $5,$3,$5
+ stq $3,0($16)
+ addq $16,8,$16 # res_ptr++
+ addq $5,$0,$0 # combine carries
+ bne $18,.Loop
+
+.Lend2: mulq $2,$19,$3 # $3 = prod_low
+ ldq $5,0($16) # $5 = *res_ptr
+ addq $4,$0,$0 # cy_limb = cy_limb + 'cy'
+ umulh $2,$19,$4 # $4 = cy_limb
+ addq $3,$0,$3 # $3 = cy_limb + prod_low
+ cmpult $3,$0,$0 # $0 = carry from (cy_limb + prod_low)
+ subq $5,$3,$3
+ cmpult $5,$3,$5
+ stq $3,0($16)
+ addq $5,$0,$0 # combine carries
+ addq $4,$0,$0 # cy_limb = prod_high + cy
+ ret $31,($26),1
+.Lend1: subq $5,$3,$3
+ cmpult $5,$3,$5
+ stq $3,0($16)
+ addq $0,$5,$0
+ ret $31,($26),1
+
+ .end __mpn_submul_1
diff --git a/contrib/libgmp/mpn/alpha/udiv_qrnnd.S b/contrib/libgmp/mpn/alpha/udiv_qrnnd.S
new file mode 100644
index 000000000000..d3d2cee93d13
--- /dev/null
+++ b/contrib/libgmp/mpn/alpha/udiv_qrnnd.S
@@ -0,0 +1,151 @@
+ # Alpha 21064 __udiv_qrnnd
+
+ # Copyright (C) 1992, 1994, 1995 Free Software Foundation, Inc.
+
+ # This file is part of the GNU MP Library.
+
+ # The GNU MP Library is free software; you can redistribute it and/or modify
+ # it under the terms of the GNU Library General Public License as published by
+ # the Free Software Foundation; either version 2 of the License, or (at your
+ # option) any later version.
+
+ # The GNU MP Library is distributed in the hope that it will be useful, but
+ # WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ # or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
+ # License for more details.
+
+ # You should have received a copy of the GNU Library General Public License
+ # along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+ # the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+ # MA 02111-1307, USA.
+
+
+ .set noreorder
+ .set noat
+.text
+ .align 3
+ .globl __udiv_qrnnd
+ .ent __udiv_qrnnd
+__udiv_qrnnd:
+ .frame $30,0,$26,0
+ .prologue 0
+#define cnt $2
+#define tmp $3
+#define rem_ptr $16
+#define n1 $17
+#define n0 $18
+#define d $19
+#define qb $20
+
+ ldiq cnt,16
+ blt d,.Largedivisor
+
+.Loop1: cmplt n0,0,tmp
+ addq n1,n1,n1
+ bis n1,tmp,n1
+ addq n0,n0,n0
+ cmpule d,n1,qb
+ subq n1,d,tmp
+ cmovne qb,tmp,n1
+ bis n0,qb,n0
+ cmplt n0,0,tmp
+ addq n1,n1,n1
+ bis n1,tmp,n1
+ addq n0,n0,n0
+ cmpule d,n1,qb
+ subq n1,d,tmp
+ cmovne qb,tmp,n1
+ bis n0,qb,n0
+ cmplt n0,0,tmp
+ addq n1,n1,n1
+ bis n1,tmp,n1
+ addq n0,n0,n0
+ cmpule d,n1,qb
+ subq n1,d,tmp
+ cmovne qb,tmp,n1
+ bis n0,qb,n0
+ cmplt n0,0,tmp
+ addq n1,n1,n1
+ bis n1,tmp,n1
+ addq n0,n0,n0
+ cmpule d,n1,qb
+ subq n1,d,tmp
+ cmovne qb,tmp,n1
+ bis n0,qb,n0
+ subq cnt,1,cnt
+ bgt cnt,.Loop1
+ stq n1,0(rem_ptr)
+ bis $31,n0,$0
+ ret $31,($26),1
+
+.Largedivisor:
+ and n0,1,$4
+
+ srl n0,1,n0
+ sll n1,63,tmp
+ or tmp,n0,n0
+ srl n1,1,n1
+
+ and d,1,$6
+ srl d,1,$5
+ addq $5,$6,$5
+
+.Loop2: cmplt n0,0,tmp
+ addq n1,n1,n1
+ bis n1,tmp,n1
+ addq n0,n0,n0
+ cmpule $5,n1,qb
+ subq n1,$5,tmp
+ cmovne qb,tmp,n1
+ bis n0,qb,n0
+ cmplt n0,0,tmp
+ addq n1,n1,n1
+ bis n1,tmp,n1
+ addq n0,n0,n0
+ cmpule $5,n1,qb
+ subq n1,$5,tmp
+ cmovne qb,tmp,n1
+ bis n0,qb,n0
+ cmplt n0,0,tmp
+ addq n1,n1,n1
+ bis n1,tmp,n1
+ addq n0,n0,n0
+ cmpule $5,n1,qb
+ subq n1,$5,tmp
+ cmovne qb,tmp,n1
+ bis n0,qb,n0
+ cmplt n0,0,tmp
+ addq n1,n1,n1
+ bis n1,tmp,n1
+ addq n0,n0,n0
+ cmpule $5,n1,qb
+ subq n1,$5,tmp
+ cmovne qb,tmp,n1
+ bis n0,qb,n0
+ subq cnt,1,cnt
+ bgt cnt,.Loop2
+
+ addq n1,n1,n1
+ addq $4,n1,n1
+ bne $6,.LOdd
+ stq n1,0(rem_ptr)
+ bis $31,n0,$0
+ ret $31,($26),1
+
+.LOdd:
+ /* q' in n0. r' in n1 */
+ addq n1,n0,n1
+ cmpult n1,n0,tmp # tmp := carry from addq
+ beq tmp,.LLp6
+ addq n0,1,n0
+ subq n1,d,n1
+.LLp6: cmpult n1,d,tmp
+ bne tmp,.LLp7
+ addq n0,1,n0
+ subq n1,d,n1
+.LLp7:
+ stq n1,0(rem_ptr)
+ bis $31,n0,$0
+ ret $31,($26),1
+
+ .end __udiv_qrnnd